translate not fin

2023-05-19 23:52:20 +08:00 · 2023-05-19 23:52:20 +08:00 · c376e46f4d
commit c376e46f4d
parent 8d528190a9
3 changed files with 792 additions and 114 deletions
--- a/docs/translate_english.json
+++ b/docs/translate_english.json
--- a/multi_language.py
+++ b/multi_language.py
@ -1,10 +1,13 @@
 import os
 import json
 import functools
 import re
 import pickle
 import time
 CACHE_FOLDER = "gpt_log"
 blacklist = ['multi-language', 'gpt_log', '.git', 'private_upload']
 LANG = "English"
 if not os.path.exists(CACHE_FOLDER):
    os.makedirs(CACHE_FOLDER)
@ -78,7 +81,6 @@ def lru_file_cache(maxsize=128, ttl=None, filename=None):
    return decorator_function
 def contains_chinese(string):
    """
    Returns True if the given string contains Chinese characters, False otherwise.
@ -86,122 +88,259 @@ def contains_chinese(string):
    chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
    return chinese_regex.search(string) is not None
-def extract_chinese_characters(file_path):
+def split_list(lst, n_each_req):
-    syntax = []
+    """
-    with open(file_path, 'r', encoding='utf-8') as f:
+    Split a list into smaller lists, each with a maximum number of elements.
-        content = f.read()
+    :param lst: the list to split
-        import ast
+    :param n_each_req: the maximum number of elements in each sub-list
-        root = ast.parse(content)
+    :return: a list of sub-lists
-        for node in ast.walk(root):
+    """
-            if isinstance(node, ast.Name):
+    result = []
-                if contains_chinese(node.id):
+    for i in range(0, len(lst), n_each_req):
-                    print(node.id)
+        result.append(lst[i:i + n_each_req])
-                    syntax.append(node)
+    return result
-        return syntax
+def map_to_json(map, language):
    dict_ = read_map_from_json(language)
    dict_.update(map)
    with open(f'docs/translate_{language.lower()}.json', 'w', encoding='utf8') as f:
        json.dump(dict_, f, indent=4, ensure_ascii=False)
-def extract_chinese_characters_from_directory(directory_path):
+def read_map_from_json(language):
-    chinese_characters = []
+    if os.path.exists(f'docs/translate_{language.lower()}.json'):
        with open(f'docs/translate_{language.lower()}.json', 'r', encoding='utf8') as f: 
            return json.load(f)
    return {}
 cached_translation = {}
 cached_translation = read_map_from_json(language=LANG)
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
 def trans(word_to_translate, language, special=False):
    if len(word_to_translate) == 0: return {}
    from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
    from toolbox import get_conf, ChatBotWithCookies
    proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
        get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
    llm_kwargs = {
        'api_key': API_KEY,
        'llm_model': LLM_MODEL,
        'top_p':1.0, 
        'max_length': None,
        'temperature':0.0,
    }
    N_EACH_REQ = 16
    word_to_translate_split = split_list(word_to_translate, N_EACH_REQ)
    inputs_array = [str(s) for s in word_to_translate_split]
    inputs_show_user_array = inputs_array
    history_array = [[] for _ in inputs_array]
    if special: #  to English using CamelCase Naming Convention
        sys_prompt_array = [f"Translate following names to English with CamelCase naming convention. Keep original format" for _ in inputs_array]
    else:
        sys_prompt_array = [f"Translate following sentences to {LANG}. Keep original format." for _ in inputs_array]
    chatbot = ChatBotWithCookies(llm_kwargs)
    gpt_say_generator = request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        inputs_array, 
        inputs_show_user_array, 
        llm_kwargs, 
        chatbot, 
        history_array, 
        sys_prompt_array, 
    )
    while True:
        try:
            gpt_say = next(gpt_say_generator)
            print(gpt_say[1][0][1])
        except StopIteration as e:
            result = e.value
            break
    translated_result = {}
    for i, r in enumerate(result):
        if i%2 == 1:
            try:
                res_before_trans = eval(result[i-1])
                res_after_trans = eval(result[i])
                for a,b in zip(res_before_trans, res_after_trans):
                    translated_result[a] = b
            except:
                try:
                    res_before_trans = eval(result[i-1])
                    result[i] = result[i].strip('[\']')
                    res_after_trans = [s for s in result[i].split("', '")]
                    for a,b in zip(res_before_trans, res_after_trans):
                        translated_result[a] = b
                except:
                    res_before_trans = eval(result[i-1])
                    for a in res_before_trans:
                        translated_result[a] = None
    return translated_result
 def step_1_core_key_translate():
    def extract_chinese_characters(file_path):
        syntax = []
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            import ast
            root = ast.parse(content)
            for node in ast.walk(root):
                if isinstance(node, ast.Name):
                    if contains_chinese(node.id): syntax.append(node.id)
                if isinstance(node, ast.Import):
                    for n in node.names:
                        if contains_chinese(n.name): syntax.append(n.name)
                elif isinstance(node, ast.ImportFrom):
                    for n in node.names:
                        if contains_chinese(n.name): syntax.append(n.name)
                        for k in node.module.split('.'):
                            if contains_chinese(k): syntax.append(k)
            return syntax
    def extract_chinese_characters_from_directory(directory_path):
        chinese_characters = []
        for root, dirs, files in os.walk(directory_path):
            if any([b in root for b in blacklist]):
                continue
            for file in files:
                if file.endswith('.py'):
                    file_path = os.path.join(root, file)
                    chinese_characters.extend(extract_chinese_characters(file_path))
        return chinese_characters
    directory_path = './'
    chinese_core_names = extract_chinese_characters_from_directory(directory_path)
    chinese_core_keys = [name for name in chinese_core_names]
    chinese_core_keys_norepeat = []
    for d in chinese_core_keys:
        if d not in chinese_core_keys_norepeat: chinese_core_keys_norepeat.append(d)
    need_translate = []
    cached_translation = read_map_from_json(language=LANG)
    cached_translation_keys = list(cached_translation.keys())
    for d in chinese_core_keys_norepeat:
        if d not in cached_translation_keys: 
            need_translate.append(d)
    need_translate_mapping = trans(need_translate, language=LANG, special=True)
    map_to_json(need_translate_mapping, language=LANG)
    cached_translation = read_map_from_json(language=LANG)
    cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
    chinese_core_keys_norepeat_mapping = {}
    for k in chinese_core_keys_norepeat:
        chinese_core_keys_norepeat_mapping.update({k:cached_translation[k]})
    # ===============================================
    # copy
    # ===============================================
    def copy_source_code():
        from toolbox import get_conf
        import shutil
        import os
        try: shutil.rmtree(f'./multi-language/{LANG}/')
        except: pass
        os.makedirs(f'./multi-language', exist_ok=True)
        backup_dir = f'./multi-language/{LANG}/'
        shutil.copytree('./', backup_dir, ignore=lambda x, y: blacklist)
    copy_source_code()
    # ===============================================
    # primary key replace
    # ===============================================
    directory_path = f'./multi-language/{LANG}/'
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.py'):
                file_path = os.path.join(root, file)
-                chinese_characters.extend(extract_chinese_characters(file_path))
+                syntax = []
-    return chinese_characters
+                # read again
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
-directory_path = './'
+                for k, v in chinese_core_keys_norepeat_mapping.items():
-chinese_characters = extract_chinese_characters_from_directory(directory_path)
+                    content = content.replace(k, v)
 word_to_translate = {}
 for d in chinese_characters:
    word_to_translate[d['word']] = "TRANS"
-def break_dictionary(d, n):
+                with open(file_path, 'w', encoding='utf-8') as f:
-    items = list(d.items())
+                    f.write(content)
    num_dicts = (len(items) + n - 1) // n
    return [{k: v for k, v in items[i*n:(i+1)*n]} for i in range(num_dicts)]
 N_EACH_REQ = 50
 word_to_translate_split = break_dictionary(word_to_translate, N_EACH_REQ)
 LANG = "English"
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
 def trans(words):
    # from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
    # from toolbox import get_conf, ChatBotWithCookies
    # proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
    #     get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
    # llm_kwargs = {
    #     'api_key': API_KEY,
    #     'llm_model': LLM_MODEL,
    #     'top_p':1.0, 
    #     'max_length': None,
    #     'temperature':0.0,
    # }
    # plugin_kwargs = {}
    # chatbot = ChatBotWithCookies(llm_kwargs)
    # history = []
    # for gpt_say in request_gpt_model_in_new_thread_with_ui_alive(
    #     inputs=words, inputs_show_user=words, 
    #     llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], 
    #     sys_prompt=f"Translate following words to {LANG}, replace `TRANS` with translated result."
    # ):
    #     gpt_say = gpt_say[1][0][1]
    # return gpt_say
    return '{}'
 translated_result = {}
 for d in word_to_translate_split:
    res = trans(str(d))
    try:
        # convert translated result back to python dictionary
        res_dict = eval(res)
    except:
        print('Unexpected output.')
    translated_result.update(res_dict)
 print('All Chinese characters:', chinese_characters)
-# =================== create copy =====================
+def step_2_core_key_translate():
-def copy_source_code():
+
-    """
+    # =================================================================================================
-    一键更新协议：备份和下载
+    # step2 
-    """
+    # =================================================================================================
-    from toolbox import get_conf
+    def get_strings(node):
-    import shutil
+        strings = []
-    import os
+
-    import requests
+        # recursively traverse the AST
-    import zipfile
+        for child in ast.iter_child_nodes(node):
-    try: shutil.rmtree(f'./multi-language/{LANG}/')
+            if isinstance(child, ast.Str):
-    except: pass
+                if contains_chinese(child.s):
-    os.makedirs(f'./multi-language', exist_ok=True)
+                    string_ = child.s.strip().strip(',').strip().strip('.').strip()
-    backup_dir = f'./multi-language/{LANG}/'
+                    if string_.startswith('[Local Message]'):
-    shutil.copytree('./', backup_dir, ignore=lambda x, y: ['multi-language', 'gpt_log', '.git', 'private_upload'])
+                        string_ = string_.replace('[Local Message]', '')
-copy_source_code()
+                        string_ = string_.strip().strip(',').strip().strip('.').strip()
                    strings.append([
                        string_, 
                        child.lineno*10000+child.col_offset
                    ])
            elif isinstance(child, ast.AST):
                strings.extend(get_strings(child))
        return strings
    string_literals = []
    directory_path = f'./multi-language/{LANG}/'
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.py'):
                file_path = os.path.join(root, file)
                syntax = []
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    import ast
                    tree = ast.parse(content)
                    res = get_strings(tree)
                    string_literals.extend(res)
    chinese_literal_names = []
    chinese_literal_names_norepeat = []
    for string, offset in string_literals:
        chinese_literal_names.append(string)
    chinese_literal_names_norepeat = []
    for d in chinese_literal_names:
        if d not in chinese_literal_names_norepeat: chinese_literal_names_norepeat.append(d)
    need_translate = []
    cached_translation = read_map_from_json(language=LANG)
    cached_translation_keys = list(cached_translation.keys())
    for d in chinese_literal_names_norepeat:
        if d not in cached_translation_keys: 
            need_translate.append(d)
-for d in chinese_characters:
+    up = trans(need_translate, language=LANG, special=False)
-    d['file'] = f'./multi-language/{LANG}/' + d['file']
+    map_to_json(up, language=LANG)
-    if d['word'] in translated_result:
+    cached_translation = read_map_from_json(language=LANG)
-        d['trans'] = translated_result[d['word']]
+    cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
    else:
        d['trans'] = None
-chinese_characters = sorted(chinese_characters, key=lambda x: len(x['word']), reverse=True)
+    # ===============================================
-for d in chinese_characters:
+    # literal key replace
-    if d['trans'] is None:
+    # ===============================================
-        continue
+    directory_path = f'./multi-language/{LANG}/'
-    
+    for root, dirs, files in os.walk(directory_path):
-
+        for file in files:
-
+            if file.endswith('.py'):
-    with open(d['file'], 'r', encoding='utf-8') as f:
+                file_path = os.path.join(root, file)
-        content = f.read()
+                syntax = []
-    
+                # read again
-    content.replace(d['word'], d['trans'])
+                with open(file_path, 'r', encoding='utf-8') as f:
-    substring = d['trans']
+                    content = f.read()
-    substring_start_index = content.find(substring)
+                
-    substring_end_index = substring_start_index + len(substring) - 1
+                for k, v in cached_translation.items():
-    if content[substring_start_index].isalpha() or content[substring_start_index].isdigit():
+                    content = content.replace(k, v)
-        content = content[:substring_start_index+1]
+
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(content)
 step_1_core_key_translate()
 step_2_core_key_translate()
--- a/theme.py
+++ b/theme.py
@ -103,35 +103,30 @@ def adjust_theme():
 advanced_css = """
 /* 设置表格的外边距为1em，内部单元格之间边框合并，空单元格显示. */
 .markdown-body table {
    margin: 1em 0;
    border-collapse: collapse;
    empty-cells: show;
 }
 /* 设置表格单元格的内边距为5px，边框粗细为1.2px，颜色为--border-color-primary. */
 .markdown-body th, .markdown-body td {
    border: 1.2px solid var(--border-color-primary);
    padding: 5px;
 }
 /* 设置表头背景颜色为rgba(175,184,193,0.2)，透明度为0.2. */
 .markdown-body thead {
    background-color: rgba(175,184,193,0.2);
 }
 /* 设置表头单元格的内边距为0.5em和0.2em. */
 .markdown-body thead th {
    padding: .5em .2em;
 }
 /* 去掉列表前缀的默认间距，使其与文本线对齐. */
 .markdown-body ol, .markdown-body ul {
    padding-inline-start: 2em !important;
 }
-/* 设定聊天气泡的样式，包括圆角、最大宽度和阴影等. */
+/* chat box. */
 [class *= "message"] {
    border-radius: var(--radius-xl) !important;
    /* padding: var(--spacing-xl) !important; */
@ -151,7 +146,7 @@ advanced_css = """
    border-bottom-right-radius: 0 !important;
 }
-/* 行内代码的背景设为淡灰色，设定圆角和间距. */
+/* linein code block. */
 .markdown-body code {
    display: inline;
    white-space: break-spaces;
@ -171,7 +166,7 @@ advanced_css = """
    background-color: rgba(175,184,193,0.2);
 }
-/* 设定代码块的样式，包括背景颜色、内、外边距、圆角。 */
+/* code block css */
 .markdown-body pre code {
    display: block;
    overflow: auto;