From 8d528190a95e1d2539e7d2865874f20ded596f3d Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 19 May 2023 13:23:44 +0800 Subject: [PATCH] rt --- multi_language.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/multi_language.py b/multi_language.py index 04ebcaf..12b3760 100644 --- a/multi_language.py +++ b/multi_language.py @@ -1,6 +1,6 @@ import os import functools -import os +import re import pickle import time @@ -79,22 +79,26 @@ def lru_file_cache(maxsize=128, ttl=None, filename=None): return decorator_function +def contains_chinese(string): + """ + Returns True if the given string contains Chinese characters, False otherwise. + """ + chinese_regex = re.compile(u'[\u4e00-\u9fff]+') + return chinese_regex.search(string) is not None def extract_chinese_characters(file_path): + syntax = [] with open(file_path, 'r', encoding='utf-8') as f: content = f.read() - chinese_characters = [] - sentence = {'file':file_path, 'begin':-1, 'end':-1, 'word': ""} - for index, char in enumerate(content): - if 0x4e00 <= ord(char) <= 0x9fff: - sentence['word'] += char - if sentence['begin'] == -1: sentence['begin'] = index - sentence['end'] = index - else: - if len(sentence['word'])>0: - chinese_characters.append(sentence) - sentence = {'file':file_path, 'begin':-1, 'end':-1, 'word': ""} - return chinese_characters + import ast + root = ast.parse(content) + for node in ast.walk(root): + if isinstance(node, ast.Name): + if contains_chinese(node.id): + print(node.id) + syntax.append(node) + + return syntax def extract_chinese_characters_from_directory(directory_path): chinese_characters = []