translate not fin
This commit is contained in:
parent
8d528190a9
commit
c376e46f4d
544
docs/translate_english.json
Normal file
544
docs/translate_english.json
Normal file
File diff suppressed because one or more lines are too long
@ -1,10 +1,13 @@
|
||||
import os
|
||||
import json
|
||||
import functools
|
||||
import re
|
||||
import pickle
|
||||
import time
|
||||
|
||||
CACHE_FOLDER = "gpt_log"
|
||||
blacklist = ['multi-language', 'gpt_log', '.git', 'private_upload']
|
||||
LANG = "English"
|
||||
|
||||
if not os.path.exists(CACHE_FOLDER):
|
||||
os.makedirs(CACHE_FOLDER)
|
||||
@ -78,7 +81,6 @@ def lru_file_cache(maxsize=128, ttl=None, filename=None):
|
||||
|
||||
return decorator_function
|
||||
|
||||
|
||||
def contains_chinese(string):
|
||||
"""
|
||||
Returns True if the given string contains Chinese characters, False otherwise.
|
||||
@ -86,122 +88,259 @@ def contains_chinese(string):
|
||||
chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
|
||||
return chinese_regex.search(string) is not None
|
||||
|
||||
def extract_chinese_characters(file_path):
|
||||
syntax = []
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
import ast
|
||||
root = ast.parse(content)
|
||||
for node in ast.walk(root):
|
||||
if isinstance(node, ast.Name):
|
||||
if contains_chinese(node.id):
|
||||
print(node.id)
|
||||
syntax.append(node)
|
||||
def split_list(lst, n_each_req):
|
||||
"""
|
||||
Split a list into smaller lists, each with a maximum number of elements.
|
||||
:param lst: the list to split
|
||||
:param n_each_req: the maximum number of elements in each sub-list
|
||||
:return: a list of sub-lists
|
||||
"""
|
||||
result = []
|
||||
for i in range(0, len(lst), n_each_req):
|
||||
result.append(lst[i:i + n_each_req])
|
||||
return result
|
||||
|
||||
return syntax
|
||||
def map_to_json(map, language):
|
||||
dict_ = read_map_from_json(language)
|
||||
dict_.update(map)
|
||||
with open(f'docs/translate_{language.lower()}.json', 'w', encoding='utf8') as f:
|
||||
json.dump(dict_, f, indent=4, ensure_ascii=False)
|
||||
|
||||
def extract_chinese_characters_from_directory(directory_path):
|
||||
chinese_characters = []
|
||||
def read_map_from_json(language):
|
||||
if os.path.exists(f'docs/translate_{language.lower()}.json'):
|
||||
with open(f'docs/translate_{language.lower()}.json', 'r', encoding='utf8') as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
cached_translation = {}
|
||||
cached_translation = read_map_from_json(language=LANG)
|
||||
|
||||
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
|
||||
def trans(word_to_translate, language, special=False):
|
||||
if len(word_to_translate) == 0: return {}
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from toolbox import get_conf, ChatBotWithCookies
|
||||
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
|
||||
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
|
||||
llm_kwargs = {
|
||||
'api_key': API_KEY,
|
||||
'llm_model': LLM_MODEL,
|
||||
'top_p':1.0,
|
||||
'max_length': None,
|
||||
'temperature':0.0,
|
||||
}
|
||||
N_EACH_REQ = 16
|
||||
word_to_translate_split = split_list(word_to_translate, N_EACH_REQ)
|
||||
inputs_array = [str(s) for s in word_to_translate_split]
|
||||
inputs_show_user_array = inputs_array
|
||||
history_array = [[] for _ in inputs_array]
|
||||
if special: # to English using CamelCase Naming Convention
|
||||
sys_prompt_array = [f"Translate following names to English with CamelCase naming convention. Keep original format" for _ in inputs_array]
|
||||
else:
|
||||
sys_prompt_array = [f"Translate following sentences to {LANG}. Keep original format." for _ in inputs_array]
|
||||
chatbot = ChatBotWithCookies(llm_kwargs)
|
||||
gpt_say_generator = request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array,
|
||||
inputs_show_user_array,
|
||||
llm_kwargs,
|
||||
chatbot,
|
||||
history_array,
|
||||
sys_prompt_array,
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
gpt_say = next(gpt_say_generator)
|
||||
print(gpt_say[1][0][1])
|
||||
except StopIteration as e:
|
||||
result = e.value
|
||||
break
|
||||
translated_result = {}
|
||||
for i, r in enumerate(result):
|
||||
if i%2 == 1:
|
||||
try:
|
||||
res_before_trans = eval(result[i-1])
|
||||
res_after_trans = eval(result[i])
|
||||
for a,b in zip(res_before_trans, res_after_trans):
|
||||
translated_result[a] = b
|
||||
except:
|
||||
try:
|
||||
res_before_trans = eval(result[i-1])
|
||||
result[i] = result[i].strip('[\']')
|
||||
res_after_trans = [s for s in result[i].split("', '")]
|
||||
for a,b in zip(res_before_trans, res_after_trans):
|
||||
translated_result[a] = b
|
||||
except:
|
||||
res_before_trans = eval(result[i-1])
|
||||
for a in res_before_trans:
|
||||
translated_result[a] = None
|
||||
return translated_result
|
||||
|
||||
def step_1_core_key_translate():
|
||||
def extract_chinese_characters(file_path):
|
||||
syntax = []
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
import ast
|
||||
root = ast.parse(content)
|
||||
for node in ast.walk(root):
|
||||
if isinstance(node, ast.Name):
|
||||
if contains_chinese(node.id): syntax.append(node.id)
|
||||
if isinstance(node, ast.Import):
|
||||
for n in node.names:
|
||||
if contains_chinese(n.name): syntax.append(n.name)
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
for n in node.names:
|
||||
if contains_chinese(n.name): syntax.append(n.name)
|
||||
for k in node.module.split('.'):
|
||||
if contains_chinese(k): syntax.append(k)
|
||||
return syntax
|
||||
|
||||
def extract_chinese_characters_from_directory(directory_path):
|
||||
chinese_characters = []
|
||||
for root, dirs, files in os.walk(directory_path):
|
||||
if any([b in root for b in blacklist]):
|
||||
continue
|
||||
for file in files:
|
||||
if file.endswith('.py'):
|
||||
file_path = os.path.join(root, file)
|
||||
chinese_characters.extend(extract_chinese_characters(file_path))
|
||||
return chinese_characters
|
||||
|
||||
directory_path = './'
|
||||
chinese_core_names = extract_chinese_characters_from_directory(directory_path)
|
||||
chinese_core_keys = [name for name in chinese_core_names]
|
||||
chinese_core_keys_norepeat = []
|
||||
for d in chinese_core_keys:
|
||||
if d not in chinese_core_keys_norepeat: chinese_core_keys_norepeat.append(d)
|
||||
need_translate = []
|
||||
cached_translation = read_map_from_json(language=LANG)
|
||||
cached_translation_keys = list(cached_translation.keys())
|
||||
for d in chinese_core_keys_norepeat:
|
||||
if d not in cached_translation_keys:
|
||||
need_translate.append(d)
|
||||
|
||||
need_translate_mapping = trans(need_translate, language=LANG, special=True)
|
||||
map_to_json(need_translate_mapping, language=LANG)
|
||||
cached_translation = read_map_from_json(language=LANG)
|
||||
cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
|
||||
|
||||
chinese_core_keys_norepeat_mapping = {}
|
||||
for k in chinese_core_keys_norepeat:
|
||||
chinese_core_keys_norepeat_mapping.update({k:cached_translation[k]})
|
||||
|
||||
# ===============================================
|
||||
# copy
|
||||
# ===============================================
|
||||
def copy_source_code():
|
||||
|
||||
from toolbox import get_conf
|
||||
import shutil
|
||||
import os
|
||||
try: shutil.rmtree(f'./multi-language/{LANG}/')
|
||||
except: pass
|
||||
os.makedirs(f'./multi-language', exist_ok=True)
|
||||
backup_dir = f'./multi-language/{LANG}/'
|
||||
shutil.copytree('./', backup_dir, ignore=lambda x, y: blacklist)
|
||||
copy_source_code()
|
||||
|
||||
# ===============================================
|
||||
# primary key replace
|
||||
# ===============================================
|
||||
directory_path = f'./multi-language/{LANG}/'
|
||||
for root, dirs, files in os.walk(directory_path):
|
||||
for file in files:
|
||||
if file.endswith('.py'):
|
||||
file_path = os.path.join(root, file)
|
||||
chinese_characters.extend(extract_chinese_characters(file_path))
|
||||
return chinese_characters
|
||||
syntax = []
|
||||
# read again
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
for k, v in chinese_core_keys_norepeat_mapping.items():
|
||||
content = content.replace(k, v)
|
||||
|
||||
directory_path = './'
|
||||
chinese_characters = extract_chinese_characters_from_directory(directory_path)
|
||||
word_to_translate = {}
|
||||
for d in chinese_characters:
|
||||
word_to_translate[d['word']] = "TRANS"
|
||||
|
||||
def break_dictionary(d, n):
|
||||
items = list(d.items())
|
||||
num_dicts = (len(items) + n - 1) // n
|
||||
return [{k: v for k, v in items[i*n:(i+1)*n]} for i in range(num_dicts)]
|
||||
|
||||
N_EACH_REQ = 50
|
||||
word_to_translate_split = break_dictionary(word_to_translate, N_EACH_REQ)
|
||||
LANG = "English"
|
||||
|
||||
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
|
||||
def trans(words):
|
||||
# from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
# from toolbox import get_conf, ChatBotWithCookies
|
||||
# proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
|
||||
# get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
|
||||
# llm_kwargs = {
|
||||
# 'api_key': API_KEY,
|
||||
# 'llm_model': LLM_MODEL,
|
||||
# 'top_p':1.0,
|
||||
# 'max_length': None,
|
||||
# 'temperature':0.0,
|
||||
# }
|
||||
# plugin_kwargs = {}
|
||||
# chatbot = ChatBotWithCookies(llm_kwargs)
|
||||
# history = []
|
||||
# for gpt_say in request_gpt_model_in_new_thread_with_ui_alive(
|
||||
# inputs=words, inputs_show_user=words,
|
||||
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
||||
# sys_prompt=f"Translate following words to {LANG}, replace `TRANS` with translated result."
|
||||
# ):
|
||||
# gpt_say = gpt_say[1][0][1]
|
||||
# return gpt_say
|
||||
return '{}'
|
||||
|
||||
translated_result = {}
|
||||
for d in word_to_translate_split:
|
||||
res = trans(str(d))
|
||||
try:
|
||||
# convert translated result back to python dictionary
|
||||
res_dict = eval(res)
|
||||
except:
|
||||
print('Unexpected output.')
|
||||
translated_result.update(res_dict)
|
||||
|
||||
print('All Chinese characters:', chinese_characters)
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
# =================== create copy =====================
|
||||
def copy_source_code():
|
||||
"""
|
||||
一键更新协议:备份和下载
|
||||
"""
|
||||
from toolbox import get_conf
|
||||
import shutil
|
||||
import os
|
||||
import requests
|
||||
import zipfile
|
||||
try: shutil.rmtree(f'./multi-language/{LANG}/')
|
||||
except: pass
|
||||
os.makedirs(f'./multi-language', exist_ok=True)
|
||||
backup_dir = f'./multi-language/{LANG}/'
|
||||
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['multi-language', 'gpt_log', '.git', 'private_upload'])
|
||||
copy_source_code()
|
||||
def step_2_core_key_translate():
|
||||
|
||||
# =================================================================================================
|
||||
# step2
|
||||
# =================================================================================================
|
||||
def get_strings(node):
|
||||
strings = []
|
||||
|
||||
# recursively traverse the AST
|
||||
for child in ast.iter_child_nodes(node):
|
||||
if isinstance(child, ast.Str):
|
||||
if contains_chinese(child.s):
|
||||
string_ = child.s.strip().strip(',').strip().strip('.').strip()
|
||||
if string_.startswith('[Local Message]'):
|
||||
string_ = string_.replace('[Local Message]', '')
|
||||
string_ = string_.strip().strip(',').strip().strip('.').strip()
|
||||
strings.append([
|
||||
string_,
|
||||
child.lineno*10000+child.col_offset
|
||||
])
|
||||
elif isinstance(child, ast.AST):
|
||||
strings.extend(get_strings(child))
|
||||
|
||||
return strings
|
||||
|
||||
string_literals = []
|
||||
directory_path = f'./multi-language/{LANG}/'
|
||||
for root, dirs, files in os.walk(directory_path):
|
||||
for file in files:
|
||||
if file.endswith('.py'):
|
||||
file_path = os.path.join(root, file)
|
||||
syntax = []
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
import ast
|
||||
tree = ast.parse(content)
|
||||
res = get_strings(tree)
|
||||
string_literals.extend(res)
|
||||
|
||||
chinese_literal_names = []
|
||||
chinese_literal_names_norepeat = []
|
||||
for string, offset in string_literals:
|
||||
chinese_literal_names.append(string)
|
||||
chinese_literal_names_norepeat = []
|
||||
for d in chinese_literal_names:
|
||||
if d not in chinese_literal_names_norepeat: chinese_literal_names_norepeat.append(d)
|
||||
need_translate = []
|
||||
cached_translation = read_map_from_json(language=LANG)
|
||||
cached_translation_keys = list(cached_translation.keys())
|
||||
for d in chinese_literal_names_norepeat:
|
||||
if d not in cached_translation_keys:
|
||||
need_translate.append(d)
|
||||
|
||||
|
||||
for d in chinese_characters:
|
||||
d['file'] = f'./multi-language/{LANG}/' + d['file']
|
||||
if d['word'] in translated_result:
|
||||
d['trans'] = translated_result[d['word']]
|
||||
else:
|
||||
d['trans'] = None
|
||||
up = trans(need_translate, language=LANG, special=False)
|
||||
map_to_json(up, language=LANG)
|
||||
cached_translation = read_map_from_json(language=LANG)
|
||||
cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
|
||||
|
||||
chinese_characters = sorted(chinese_characters, key=lambda x: len(x['word']), reverse=True)
|
||||
for d in chinese_characters:
|
||||
if d['trans'] is None:
|
||||
continue
|
||||
|
||||
|
||||
|
||||
with open(d['file'], 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
content.replace(d['word'], d['trans'])
|
||||
substring = d['trans']
|
||||
substring_start_index = content.find(substring)
|
||||
substring_end_index = substring_start_index + len(substring) - 1
|
||||
if content[substring_start_index].isalpha() or content[substring_start_index].isdigit():
|
||||
content = content[:substring_start_index+1]
|
||||
# ===============================================
|
||||
# literal key replace
|
||||
# ===============================================
|
||||
directory_path = f'./multi-language/{LANG}/'
|
||||
for root, dirs, files in os.walk(directory_path):
|
||||
for file in files:
|
||||
if file.endswith('.py'):
|
||||
file_path = os.path.join(root, file)
|
||||
syntax = []
|
||||
# read again
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
for k, v in cached_translation.items():
|
||||
content = content.replace(k, v)
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
step_1_core_key_translate()
|
||||
step_2_core_key_translate()
|
11
theme.py
11
theme.py
@ -103,35 +103,30 @@ def adjust_theme():
|
||||
|
||||
|
||||
advanced_css = """
|
||||
/* 设置表格的外边距为1em,内部单元格之间边框合并,空单元格显示. */
|
||||
.markdown-body table {
|
||||
margin: 1em 0;
|
||||
border-collapse: collapse;
|
||||
empty-cells: show;
|
||||
}
|
||||
|
||||
/* 设置表格单元格的内边距为5px,边框粗细为1.2px,颜色为--border-color-primary. */
|
||||
.markdown-body th, .markdown-body td {
|
||||
border: 1.2px solid var(--border-color-primary);
|
||||
padding: 5px;
|
||||
}
|
||||
|
||||
/* 设置表头背景颜色为rgba(175,184,193,0.2),透明度为0.2. */
|
||||
.markdown-body thead {
|
||||
background-color: rgba(175,184,193,0.2);
|
||||
}
|
||||
|
||||
/* 设置表头单元格的内边距为0.5em和0.2em. */
|
||||
.markdown-body thead th {
|
||||
padding: .5em .2em;
|
||||
}
|
||||
|
||||
/* 去掉列表前缀的默认间距,使其与文本线对齐. */
|
||||
.markdown-body ol, .markdown-body ul {
|
||||
padding-inline-start: 2em !important;
|
||||
}
|
||||
|
||||
/* 设定聊天气泡的样式,包括圆角、最大宽度和阴影等. */
|
||||
/* chat box. */
|
||||
[class *= "message"] {
|
||||
border-radius: var(--radius-xl) !important;
|
||||
/* padding: var(--spacing-xl) !important; */
|
||||
@ -151,7 +146,7 @@ advanced_css = """
|
||||
border-bottom-right-radius: 0 !important;
|
||||
}
|
||||
|
||||
/* 行内代码的背景设为淡灰色,设定圆角和间距. */
|
||||
/* linein code block. */
|
||||
.markdown-body code {
|
||||
display: inline;
|
||||
white-space: break-spaces;
|
||||
@ -171,7 +166,7 @@ advanced_css = """
|
||||
background-color: rgba(175,184,193,0.2);
|
||||
}
|
||||
|
||||
/* 设定代码块的样式,包括背景颜色、内、外边距、圆角。 */
|
||||
/* code block css */
|
||||
.markdown-body pre code {
|
||||
display: block;
|
||||
overflow: auto;
|
||||
|
Loading…
x
Reference in New Issue
Block a user