translate not fin
This commit is contained in:
parent
8d528190a9
commit
c376e46f4d
544
docs/translate_english.json
Normal file
544
docs/translate_english.json
Normal file
File diff suppressed because one or more lines are too long
@ -1,10 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
|
import json
|
||||||
import functools
|
import functools
|
||||||
import re
|
import re
|
||||||
import pickle
|
import pickle
|
||||||
import time
|
import time
|
||||||
|
|
||||||
CACHE_FOLDER = "gpt_log"
|
CACHE_FOLDER = "gpt_log"
|
||||||
|
blacklist = ['multi-language', 'gpt_log', '.git', 'private_upload']
|
||||||
|
LANG = "English"
|
||||||
|
|
||||||
if not os.path.exists(CACHE_FOLDER):
|
if not os.path.exists(CACHE_FOLDER):
|
||||||
os.makedirs(CACHE_FOLDER)
|
os.makedirs(CACHE_FOLDER)
|
||||||
@ -78,7 +81,6 @@ def lru_file_cache(maxsize=128, ttl=None, filename=None):
|
|||||||
|
|
||||||
return decorator_function
|
return decorator_function
|
||||||
|
|
||||||
|
|
||||||
def contains_chinese(string):
|
def contains_chinese(string):
|
||||||
"""
|
"""
|
||||||
Returns True if the given string contains Chinese characters, False otherwise.
|
Returns True if the given string contains Chinese characters, False otherwise.
|
||||||
@ -86,122 +88,259 @@ def contains_chinese(string):
|
|||||||
chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
|
chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
|
||||||
return chinese_regex.search(string) is not None
|
return chinese_regex.search(string) is not None
|
||||||
|
|
||||||
def extract_chinese_characters(file_path):
|
def split_list(lst, n_each_req):
|
||||||
syntax = []
|
"""
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
Split a list into smaller lists, each with a maximum number of elements.
|
||||||
content = f.read()
|
:param lst: the list to split
|
||||||
import ast
|
:param n_each_req: the maximum number of elements in each sub-list
|
||||||
root = ast.parse(content)
|
:return: a list of sub-lists
|
||||||
for node in ast.walk(root):
|
"""
|
||||||
if isinstance(node, ast.Name):
|
result = []
|
||||||
if contains_chinese(node.id):
|
for i in range(0, len(lst), n_each_req):
|
||||||
print(node.id)
|
result.append(lst[i:i + n_each_req])
|
||||||
syntax.append(node)
|
return result
|
||||||
|
|
||||||
return syntax
|
def map_to_json(map, language):
|
||||||
|
dict_ = read_map_from_json(language)
|
||||||
|
dict_.update(map)
|
||||||
|
with open(f'docs/translate_{language.lower()}.json', 'w', encoding='utf8') as f:
|
||||||
|
json.dump(dict_, f, indent=4, ensure_ascii=False)
|
||||||
|
|
||||||
def extract_chinese_characters_from_directory(directory_path):
|
def read_map_from_json(language):
|
||||||
chinese_characters = []
|
if os.path.exists(f'docs/translate_{language.lower()}.json'):
|
||||||
|
with open(f'docs/translate_{language.lower()}.json', 'r', encoding='utf8') as f:
|
||||||
|
return json.load(f)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
cached_translation = {}
|
||||||
|
cached_translation = read_map_from_json(language=LANG)
|
||||||
|
|
||||||
|
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
|
||||||
|
def trans(word_to_translate, language, special=False):
|
||||||
|
if len(word_to_translate) == 0: return {}
|
||||||
|
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||||
|
from toolbox import get_conf, ChatBotWithCookies
|
||||||
|
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
|
||||||
|
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
|
||||||
|
llm_kwargs = {
|
||||||
|
'api_key': API_KEY,
|
||||||
|
'llm_model': LLM_MODEL,
|
||||||
|
'top_p':1.0,
|
||||||
|
'max_length': None,
|
||||||
|
'temperature':0.0,
|
||||||
|
}
|
||||||
|
N_EACH_REQ = 16
|
||||||
|
word_to_translate_split = split_list(word_to_translate, N_EACH_REQ)
|
||||||
|
inputs_array = [str(s) for s in word_to_translate_split]
|
||||||
|
inputs_show_user_array = inputs_array
|
||||||
|
history_array = [[] for _ in inputs_array]
|
||||||
|
if special: # to English using CamelCase Naming Convention
|
||||||
|
sys_prompt_array = [f"Translate following names to English with CamelCase naming convention. Keep original format" for _ in inputs_array]
|
||||||
|
else:
|
||||||
|
sys_prompt_array = [f"Translate following sentences to {LANG}. Keep original format." for _ in inputs_array]
|
||||||
|
chatbot = ChatBotWithCookies(llm_kwargs)
|
||||||
|
gpt_say_generator = request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||||
|
inputs_array,
|
||||||
|
inputs_show_user_array,
|
||||||
|
llm_kwargs,
|
||||||
|
chatbot,
|
||||||
|
history_array,
|
||||||
|
sys_prompt_array,
|
||||||
|
)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
gpt_say = next(gpt_say_generator)
|
||||||
|
print(gpt_say[1][0][1])
|
||||||
|
except StopIteration as e:
|
||||||
|
result = e.value
|
||||||
|
break
|
||||||
|
translated_result = {}
|
||||||
|
for i, r in enumerate(result):
|
||||||
|
if i%2 == 1:
|
||||||
|
try:
|
||||||
|
res_before_trans = eval(result[i-1])
|
||||||
|
res_after_trans = eval(result[i])
|
||||||
|
for a,b in zip(res_before_trans, res_after_trans):
|
||||||
|
translated_result[a] = b
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
res_before_trans = eval(result[i-1])
|
||||||
|
result[i] = result[i].strip('[\']')
|
||||||
|
res_after_trans = [s for s in result[i].split("', '")]
|
||||||
|
for a,b in zip(res_before_trans, res_after_trans):
|
||||||
|
translated_result[a] = b
|
||||||
|
except:
|
||||||
|
res_before_trans = eval(result[i-1])
|
||||||
|
for a in res_before_trans:
|
||||||
|
translated_result[a] = None
|
||||||
|
return translated_result
|
||||||
|
|
||||||
|
def step_1_core_key_translate():
|
||||||
|
def extract_chinese_characters(file_path):
|
||||||
|
syntax = []
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
import ast
|
||||||
|
root = ast.parse(content)
|
||||||
|
for node in ast.walk(root):
|
||||||
|
if isinstance(node, ast.Name):
|
||||||
|
if contains_chinese(node.id): syntax.append(node.id)
|
||||||
|
if isinstance(node, ast.Import):
|
||||||
|
for n in node.names:
|
||||||
|
if contains_chinese(n.name): syntax.append(n.name)
|
||||||
|
elif isinstance(node, ast.ImportFrom):
|
||||||
|
for n in node.names:
|
||||||
|
if contains_chinese(n.name): syntax.append(n.name)
|
||||||
|
for k in node.module.split('.'):
|
||||||
|
if contains_chinese(k): syntax.append(k)
|
||||||
|
return syntax
|
||||||
|
|
||||||
|
def extract_chinese_characters_from_directory(directory_path):
|
||||||
|
chinese_characters = []
|
||||||
|
for root, dirs, files in os.walk(directory_path):
|
||||||
|
if any([b in root for b in blacklist]):
|
||||||
|
continue
|
||||||
|
for file in files:
|
||||||
|
if file.endswith('.py'):
|
||||||
|
file_path = os.path.join(root, file)
|
||||||
|
chinese_characters.extend(extract_chinese_characters(file_path))
|
||||||
|
return chinese_characters
|
||||||
|
|
||||||
|
directory_path = './'
|
||||||
|
chinese_core_names = extract_chinese_characters_from_directory(directory_path)
|
||||||
|
chinese_core_keys = [name for name in chinese_core_names]
|
||||||
|
chinese_core_keys_norepeat = []
|
||||||
|
for d in chinese_core_keys:
|
||||||
|
if d not in chinese_core_keys_norepeat: chinese_core_keys_norepeat.append(d)
|
||||||
|
need_translate = []
|
||||||
|
cached_translation = read_map_from_json(language=LANG)
|
||||||
|
cached_translation_keys = list(cached_translation.keys())
|
||||||
|
for d in chinese_core_keys_norepeat:
|
||||||
|
if d not in cached_translation_keys:
|
||||||
|
need_translate.append(d)
|
||||||
|
|
||||||
|
need_translate_mapping = trans(need_translate, language=LANG, special=True)
|
||||||
|
map_to_json(need_translate_mapping, language=LANG)
|
||||||
|
cached_translation = read_map_from_json(language=LANG)
|
||||||
|
cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
|
||||||
|
|
||||||
|
chinese_core_keys_norepeat_mapping = {}
|
||||||
|
for k in chinese_core_keys_norepeat:
|
||||||
|
chinese_core_keys_norepeat_mapping.update({k:cached_translation[k]})
|
||||||
|
|
||||||
|
# ===============================================
|
||||||
|
# copy
|
||||||
|
# ===============================================
|
||||||
|
def copy_source_code():
|
||||||
|
|
||||||
|
from toolbox import get_conf
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
try: shutil.rmtree(f'./multi-language/{LANG}/')
|
||||||
|
except: pass
|
||||||
|
os.makedirs(f'./multi-language', exist_ok=True)
|
||||||
|
backup_dir = f'./multi-language/{LANG}/'
|
||||||
|
shutil.copytree('./', backup_dir, ignore=lambda x, y: blacklist)
|
||||||
|
copy_source_code()
|
||||||
|
|
||||||
|
# ===============================================
|
||||||
|
# primary key replace
|
||||||
|
# ===============================================
|
||||||
|
directory_path = f'./multi-language/{LANG}/'
|
||||||
for root, dirs, files in os.walk(directory_path):
|
for root, dirs, files in os.walk(directory_path):
|
||||||
for file in files:
|
for file in files:
|
||||||
if file.endswith('.py'):
|
if file.endswith('.py'):
|
||||||
file_path = os.path.join(root, file)
|
file_path = os.path.join(root, file)
|
||||||
chinese_characters.extend(extract_chinese_characters(file_path))
|
syntax = []
|
||||||
return chinese_characters
|
# read again
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
directory_path = './'
|
for k, v in chinese_core_keys_norepeat_mapping.items():
|
||||||
chinese_characters = extract_chinese_characters_from_directory(directory_path)
|
content = content.replace(k, v)
|
||||||
word_to_translate = {}
|
|
||||||
for d in chinese_characters:
|
|
||||||
word_to_translate[d['word']] = "TRANS"
|
|
||||||
|
|
||||||
def break_dictionary(d, n):
|
with open(file_path, 'w', encoding='utf-8') as f:
|
||||||
items = list(d.items())
|
f.write(content)
|
||||||
num_dicts = (len(items) + n - 1) // n
|
|
||||||
return [{k: v for k, v in items[i*n:(i+1)*n]} for i in range(num_dicts)]
|
|
||||||
|
|
||||||
N_EACH_REQ = 50
|
|
||||||
word_to_translate_split = break_dictionary(word_to_translate, N_EACH_REQ)
|
|
||||||
LANG = "English"
|
|
||||||
|
|
||||||
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
|
|
||||||
def trans(words):
|
|
||||||
# from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
|
||||||
# from toolbox import get_conf, ChatBotWithCookies
|
|
||||||
# proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
|
|
||||||
# get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
|
|
||||||
# llm_kwargs = {
|
|
||||||
# 'api_key': API_KEY,
|
|
||||||
# 'llm_model': LLM_MODEL,
|
|
||||||
# 'top_p':1.0,
|
|
||||||
# 'max_length': None,
|
|
||||||
# 'temperature':0.0,
|
|
||||||
# }
|
|
||||||
# plugin_kwargs = {}
|
|
||||||
# chatbot = ChatBotWithCookies(llm_kwargs)
|
|
||||||
# history = []
|
|
||||||
# for gpt_say in request_gpt_model_in_new_thread_with_ui_alive(
|
|
||||||
# inputs=words, inputs_show_user=words,
|
|
||||||
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
|
||||||
# sys_prompt=f"Translate following words to {LANG}, replace `TRANS` with translated result."
|
|
||||||
# ):
|
|
||||||
# gpt_say = gpt_say[1][0][1]
|
|
||||||
# return gpt_say
|
|
||||||
return '{}'
|
|
||||||
|
|
||||||
translated_result = {}
|
|
||||||
for d in word_to_translate_split:
|
|
||||||
res = trans(str(d))
|
|
||||||
try:
|
|
||||||
# convert translated result back to python dictionary
|
|
||||||
res_dict = eval(res)
|
|
||||||
except:
|
|
||||||
print('Unexpected output.')
|
|
||||||
translated_result.update(res_dict)
|
|
||||||
|
|
||||||
print('All Chinese characters:', chinese_characters)
|
|
||||||
|
|
||||||
|
|
||||||
# =================== create copy =====================
|
def step_2_core_key_translate():
|
||||||
def copy_source_code():
|
|
||||||
"""
|
# =================================================================================================
|
||||||
一键更新协议:备份和下载
|
# step2
|
||||||
"""
|
# =================================================================================================
|
||||||
from toolbox import get_conf
|
def get_strings(node):
|
||||||
import shutil
|
strings = []
|
||||||
import os
|
|
||||||
import requests
|
# recursively traverse the AST
|
||||||
import zipfile
|
for child in ast.iter_child_nodes(node):
|
||||||
try: shutil.rmtree(f'./multi-language/{LANG}/')
|
if isinstance(child, ast.Str):
|
||||||
except: pass
|
if contains_chinese(child.s):
|
||||||
os.makedirs(f'./multi-language', exist_ok=True)
|
string_ = child.s.strip().strip(',').strip().strip('.').strip()
|
||||||
backup_dir = f'./multi-language/{LANG}/'
|
if string_.startswith('[Local Message]'):
|
||||||
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['multi-language', 'gpt_log', '.git', 'private_upload'])
|
string_ = string_.replace('[Local Message]', '')
|
||||||
copy_source_code()
|
string_ = string_.strip().strip(',').strip().strip('.').strip()
|
||||||
|
strings.append([
|
||||||
|
string_,
|
||||||
|
child.lineno*10000+child.col_offset
|
||||||
|
])
|
||||||
|
elif isinstance(child, ast.AST):
|
||||||
|
strings.extend(get_strings(child))
|
||||||
|
|
||||||
|
return strings
|
||||||
|
|
||||||
|
string_literals = []
|
||||||
|
directory_path = f'./multi-language/{LANG}/'
|
||||||
|
for root, dirs, files in os.walk(directory_path):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith('.py'):
|
||||||
|
file_path = os.path.join(root, file)
|
||||||
|
syntax = []
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
import ast
|
||||||
|
tree = ast.parse(content)
|
||||||
|
res = get_strings(tree)
|
||||||
|
string_literals.extend(res)
|
||||||
|
|
||||||
|
chinese_literal_names = []
|
||||||
|
chinese_literal_names_norepeat = []
|
||||||
|
for string, offset in string_literals:
|
||||||
|
chinese_literal_names.append(string)
|
||||||
|
chinese_literal_names_norepeat = []
|
||||||
|
for d in chinese_literal_names:
|
||||||
|
if d not in chinese_literal_names_norepeat: chinese_literal_names_norepeat.append(d)
|
||||||
|
need_translate = []
|
||||||
|
cached_translation = read_map_from_json(language=LANG)
|
||||||
|
cached_translation_keys = list(cached_translation.keys())
|
||||||
|
for d in chinese_literal_names_norepeat:
|
||||||
|
if d not in cached_translation_keys:
|
||||||
|
need_translate.append(d)
|
||||||
|
|
||||||
|
|
||||||
for d in chinese_characters:
|
up = trans(need_translate, language=LANG, special=False)
|
||||||
d['file'] = f'./multi-language/{LANG}/' + d['file']
|
map_to_json(up, language=LANG)
|
||||||
if d['word'] in translated_result:
|
cached_translation = read_map_from_json(language=LANG)
|
||||||
d['trans'] = translated_result[d['word']]
|
cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
|
||||||
else:
|
|
||||||
d['trans'] = None
|
|
||||||
|
|
||||||
chinese_characters = sorted(chinese_characters, key=lambda x: len(x['word']), reverse=True)
|
# ===============================================
|
||||||
for d in chinese_characters:
|
# literal key replace
|
||||||
if d['trans'] is None:
|
# ===============================================
|
||||||
continue
|
directory_path = f'./multi-language/{LANG}/'
|
||||||
|
for root, dirs, files in os.walk(directory_path):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith('.py'):
|
||||||
with open(d['file'], 'r', encoding='utf-8') as f:
|
file_path = os.path.join(root, file)
|
||||||
content = f.read()
|
syntax = []
|
||||||
|
# read again
|
||||||
content.replace(d['word'], d['trans'])
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
substring = d['trans']
|
content = f.read()
|
||||||
substring_start_index = content.find(substring)
|
|
||||||
substring_end_index = substring_start_index + len(substring) - 1
|
for k, v in cached_translation.items():
|
||||||
if content[substring_start_index].isalpha() or content[substring_start_index].isdigit():
|
content = content.replace(k, v)
|
||||||
content = content[:substring_start_index+1]
|
|
||||||
|
with open(file_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
step_1_core_key_translate()
|
||||||
|
step_2_core_key_translate()
|
11
theme.py
11
theme.py
@ -103,35 +103,30 @@ def adjust_theme():
|
|||||||
|
|
||||||
|
|
||||||
advanced_css = """
|
advanced_css = """
|
||||||
/* 设置表格的外边距为1em,内部单元格之间边框合并,空单元格显示. */
|
|
||||||
.markdown-body table {
|
.markdown-body table {
|
||||||
margin: 1em 0;
|
margin: 1em 0;
|
||||||
border-collapse: collapse;
|
border-collapse: collapse;
|
||||||
empty-cells: show;
|
empty-cells: show;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 设置表格单元格的内边距为5px,边框粗细为1.2px,颜色为--border-color-primary. */
|
|
||||||
.markdown-body th, .markdown-body td {
|
.markdown-body th, .markdown-body td {
|
||||||
border: 1.2px solid var(--border-color-primary);
|
border: 1.2px solid var(--border-color-primary);
|
||||||
padding: 5px;
|
padding: 5px;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 设置表头背景颜色为rgba(175,184,193,0.2),透明度为0.2. */
|
|
||||||
.markdown-body thead {
|
.markdown-body thead {
|
||||||
background-color: rgba(175,184,193,0.2);
|
background-color: rgba(175,184,193,0.2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 设置表头单元格的内边距为0.5em和0.2em. */
|
|
||||||
.markdown-body thead th {
|
.markdown-body thead th {
|
||||||
padding: .5em .2em;
|
padding: .5em .2em;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 去掉列表前缀的默认间距,使其与文本线对齐. */
|
|
||||||
.markdown-body ol, .markdown-body ul {
|
.markdown-body ol, .markdown-body ul {
|
||||||
padding-inline-start: 2em !important;
|
padding-inline-start: 2em !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 设定聊天气泡的样式,包括圆角、最大宽度和阴影等. */
|
/* chat box. */
|
||||||
[class *= "message"] {
|
[class *= "message"] {
|
||||||
border-radius: var(--radius-xl) !important;
|
border-radius: var(--radius-xl) !important;
|
||||||
/* padding: var(--spacing-xl) !important; */
|
/* padding: var(--spacing-xl) !important; */
|
||||||
@ -151,7 +146,7 @@ advanced_css = """
|
|||||||
border-bottom-right-radius: 0 !important;
|
border-bottom-right-radius: 0 !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 行内代码的背景设为淡灰色,设定圆角和间距. */
|
/* linein code block. */
|
||||||
.markdown-body code {
|
.markdown-body code {
|
||||||
display: inline;
|
display: inline;
|
||||||
white-space: break-spaces;
|
white-space: break-spaces;
|
||||||
@ -171,7 +166,7 @@ advanced_css = """
|
|||||||
background-color: rgba(175,184,193,0.2);
|
background-color: rgba(175,184,193,0.2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 设定代码块的样式,包括背景颜色、内、外边距、圆角。 */
|
/* code block css */
|
||||||
.markdown-body pre code {
|
.markdown-body pre code {
|
||||||
display: block;
|
display: block;
|
||||||
overflow: auto;
|
overflow: auto;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user