minor pdf translation adjustment
This commit is contained in:
parent
744759704d
commit
bd5280df1b
@ -1,7 +1,7 @@
|
|||||||
from toolbox import CatchException, report_exception, get_log_folder, gen_time_str, check_packages
|
from toolbox import CatchException, report_exception, get_log_folder, gen_time_str, check_packages
|
||||||
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
|
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
|
||||||
from toolbox import write_history_to_file, promote_file_to_downloadzone, get_conf, extract_archive
|
from toolbox import write_history_to_file, promote_file_to_downloadzone, get_conf, extract_archive
|
||||||
from toolbox import get_upload_folder, zip_folder
|
from toolbox import generate_file_link, zip_folder, trimmed_format_exc, trimmed_format_exc_markdown
|
||||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||||
from .crazy_utils import read_and_clean_pdf_text
|
from .crazy_utils import read_and_clean_pdf_text
|
||||||
@ -52,7 +52,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
|||||||
yield from 解析PDF_DOC2X(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request)
|
yield from 解析PDF_DOC2X(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request)
|
||||||
return
|
return
|
||||||
except:
|
except:
|
||||||
chatbot.append([None, "DOC2X服务不可用,现在将执行效果稍差的旧版代码。"])
|
chatbot.append([None, f"DOC2X服务不可用,现在将执行效果稍差的旧版代码。{trimmed_format_exc_markdown()}"])
|
||||||
yield from update_ui(chatbot=chatbot, history=history)
|
yield from update_ui(chatbot=chatbot, history=history)
|
||||||
|
|
||||||
# ------- 第二种方法,效果次优 -------
|
# ------- 第二种方法,效果次优 -------
|
||||||
@ -146,6 +146,21 @@ def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, cha
|
|||||||
promote_file_to_downloadzone(generated_fp, chatbot=chatbot)
|
promote_file_to_downloadzone(generated_fp, chatbot=chatbot)
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
|
# 生成在线预览html
|
||||||
|
file_name = '在线预览翻译(原文)' + gen_time_str() + '.html'
|
||||||
|
preview_fp = os.path.join(ex_folder, file_name)
|
||||||
|
from shared_utils.advanced_markdown_format import markdown_convertion_for_file
|
||||||
|
with open(generated_fp, "r", encoding="utf-8") as f:
|
||||||
|
md = f.read()
|
||||||
|
# Markdown中使用不标准的表格,需要在表格前加上一个emoji,以便公式渲染
|
||||||
|
md = re.sub(r'^<table>', r'😃<table>', md, flags=re.MULTILINE)
|
||||||
|
html = markdown_convertion_for_file(md)
|
||||||
|
with open(preview_fp, "w", encoding="utf-8") as f: f.write(html)
|
||||||
|
chatbot.append([None, f"生成在线预览:{generate_file_link([preview_fp])}"])
|
||||||
|
promote_file_to_downloadzone(preview_fp, chatbot=chatbot)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
chatbot.append((None, f"调用Markdown插件 {ex_folder} ..."))
|
chatbot.append((None, f"调用Markdown插件 {ex_folder} ..."))
|
||||||
plugin_kwargs['markdown_expected_output_dir'] = ex_folder
|
plugin_kwargs['markdown_expected_output_dir'] = ex_folder
|
||||||
|
|
||||||
@ -157,21 +172,16 @@ def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, cha
|
|||||||
# 修正一些公式问题
|
# 修正一些公式问题
|
||||||
with open(generated_fp, 'r', encoding='utf8') as f: content = f.read()
|
with open(generated_fp, 'r', encoding='utf8') as f: content = f.read()
|
||||||
content = content.replace('```markdown', '\n').replace('```', '\n')
|
content = content.replace('```markdown', '\n').replace('```', '\n')
|
||||||
|
# Markdown中使用不标准的表格,需要在表格前加上一个emoji,以便公式渲染
|
||||||
|
content = re.sub(r'^<table>', r'😃<table>', content, flags=re.MULTILINE)
|
||||||
with open(generated_fp, 'w', encoding='utf8') as f: f.write(content)
|
with open(generated_fp, 'w', encoding='utf8') as f: f.write(content)
|
||||||
# 生成在线预览html
|
# 生成在线预览html
|
||||||
file_name = '在线预览翻译' + gen_time_str() + '.html'
|
file_name = '在线预览翻译' + gen_time_str() + '.html'
|
||||||
# with open('crazy_functions/pdf_fns/report_template_v2.html', 'r', encoding='utf8') as f:
|
|
||||||
# html_template = f.read()
|
|
||||||
# html_template = html_template.replace("{MARKDOWN_FILE_PATH}", translated_f_name)
|
|
||||||
preview_fp = os.path.join(ex_folder, file_name)
|
preview_fp = os.path.join(ex_folder, file_name)
|
||||||
# with open(preview_fp, 'w', encoding='utf8') as f:
|
|
||||||
# f.write(html_template)
|
|
||||||
# 生成在线预览html
|
|
||||||
from shared_utils.advanced_markdown_format import markdown_convertion_for_file
|
from shared_utils.advanced_markdown_format import markdown_convertion_for_file
|
||||||
with open(generated_fp, "r", encoding="utf-8") as f:
|
with open(generated_fp, "r", encoding="utf-8") as f:
|
||||||
md = f.read()
|
md = f.read()
|
||||||
html = markdown_convertion_for_file(md)
|
html = markdown_convertion_for_file(md)
|
||||||
# print(html)
|
|
||||||
with open(preview_fp, "w", encoding="utf-8") as f: f.write(html)
|
with open(preview_fp, "w", encoding="utf-8") as f: f.write(html)
|
||||||
promote_file_to_downloadzone(preview_fp, chatbot=chatbot)
|
promote_file_to_downloadzone(preview_fp, chatbot=chatbot)
|
||||||
# 生成包含图片的压缩包
|
# 生成包含图片的压缩包
|
||||||
|
@ -211,8 +211,21 @@ def markdown_convertion_for_file(txt):
|
|||||||
"""
|
"""
|
||||||
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
|
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
|
||||||
"""
|
"""
|
||||||
pre = '<div class="markdown-body">'
|
from themes.theme import advanced_css
|
||||||
suf = "</div>"
|
pre = f"""
|
||||||
|
<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>
|
||||||
|
<body>
|
||||||
|
<div class="test_temp1" style="width:10%; height: 500px; float:left;"></div>
|
||||||
|
<div class="test_temp2" style="width:80%;padding: 40px;float:left;padding-left: 20px;padding-right: 20px;box-shadow: rgba(0, 0, 0, 0.2) 0px 0px 8px 8px;border-radius: 10px;">
|
||||||
|
<div class="markdown-body">
|
||||||
|
"""
|
||||||
|
suf = """
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="test_temp3" style="width:10%; height: 500px; float:left;"></div>
|
||||||
|
</body>
|
||||||
|
"""
|
||||||
|
|
||||||
if txt.startswith(pre) and txt.endswith(suf):
|
if txt.startswith(pre) and txt.endswith(suf):
|
||||||
# print('警告,输入了已经经过转化的字符串,二次转化可能出问题')
|
# print('警告,输入了已经经过转化的字符串,二次转化可能出问题')
|
||||||
return txt # 已经被转化过,不需要再次转化
|
return txt # 已经被转化过,不需要再次转化
|
||||||
|
13
toolbox.py
13
toolbox.py
@ -192,6 +192,8 @@ def trimmed_format_exc():
|
|||||||
replace_path = "."
|
replace_path = "."
|
||||||
return str.replace(current_path, replace_path)
|
return str.replace(current_path, replace_path)
|
||||||
|
|
||||||
|
def trimmed_format_exc_markdown():
|
||||||
|
return '\n\n```\n' + trimmed_format_exc() + '```'
|
||||||
|
|
||||||
def CatchException(f):
|
def CatchException(f):
|
||||||
"""
|
"""
|
||||||
@ -536,6 +538,17 @@ def on_file_uploaded(
|
|||||||
return chatbot, txt, txt2, cookies
|
return chatbot, txt, txt2, cookies
|
||||||
|
|
||||||
|
|
||||||
|
def generate_file_link(report_files:List[str]):
|
||||||
|
file_links = ""
|
||||||
|
for f in report_files:
|
||||||
|
file_links += (
|
||||||
|
f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
|
||||||
|
)
|
||||||
|
return file_links
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def on_report_generated(cookies:dict, files:List[str], chatbot:ChatBotWithCookies):
|
def on_report_generated(cookies:dict, files:List[str], chatbot:ChatBotWithCookies):
|
||||||
if "files_to_promote" in cookies:
|
if "files_to_promote" in cookies:
|
||||||
report_files = cookies["files_to_promote"]
|
report_files = cookies["files_to_promote"]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user