From 4ad432e1da21f1481c5be615338b55b75e5aff9b Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Mon, 16 Oct 2023 22:13:59 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E7=89=88HTML=E6=8A=A5=E5=91=8A?= =?UTF-8?q?=E9=A1=B5=E9=9D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/crazy_utils.py | 48 --------- crazy_functions/latex_fns/latex_actions.py | 2 +- crazy_functions/pdf_fns/parse_pdf.py | 2 +- crazy_functions/pdf_fns/report_gen_html.py | 48 +++++++++ crazy_functions/pdf_fns/report_template.html | 104 +++++++++++++++++++ crazy_functions/批量翻译PDF文档_NOUGAT.py | 3 +- crazy_functions/批量翻译PDF文档_多线程.py | 4 +- 7 files changed, 158 insertions(+), 53 deletions(-) create mode 100644 crazy_functions/pdf_fns/report_gen_html.py create mode 100644 crazy_functions/pdf_fns/report_template.html diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 3f2ee76..c6a4155 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -767,54 +767,6 @@ def try_install_deps(deps, reload_m=[]): importlib.reload(__import__(m)) -HTML_CSS = """ -.row { - display: flex; - flex-wrap: wrap; -} -.column { - flex: 1; - padding: 10px; -} -.table-header { - font-weight: bold; - border-bottom: 1px solid black; -} -.table-row { - border-bottom: 1px solid lightgray; -} -.table-cell { - padding: 5px; -} -""" - -TABLE_CSS = """ -
-
REPLACE_A
-
REPLACE_B
-
-""" - -class construct_html(): - def __init__(self) -> None: - self.css = HTML_CSS - self.html_string = f'翻译结果' - - - def add_row(self, a, b): - tmp = TABLE_CSS - from toolbox import markdown_convertion - tmp = tmp.replace('REPLACE_A', markdown_convertion(a)) - tmp = tmp.replace('REPLACE_B', markdown_convertion(b)) - self.html_string += tmp - - - def save_file(self, file_name): - with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f: - f.write(self.html_string.encode('utf-8', 'ignore').decode()) - return os.path.join(get_log_folder(), file_name) - - def get_plugin_arg(plugin_kwargs, key, default): # 如果参数是空的 if (key in plugin_kwargs) and (plugin_kwargs[key] == ""): plugin_kwargs.pop(key) diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index dcde0e9..7e561df 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -423,7 +423,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder): # write html try: import shutil - from ..crazy_utils import construct_html + from crazy_functions.pdf_fns.report_gen_html import construct_html from toolbox import gen_time_str ch = construct_html() orig = "" diff --git a/crazy_functions/pdf_fns/parse_pdf.py b/crazy_functions/pdf_fns/parse_pdf.py index a047efc..9853fd5 100644 --- a/crazy_functions/pdf_fns/parse_pdf.py +++ b/crazy_functions/pdf_fns/parse_pdf.py @@ -73,7 +73,7 @@ def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chat return res_path def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG): - from crazy_functions.crazy_utils import construct_html + from crazy_functions.pdf_fns.report_gen_html import construct_html from crazy_functions.crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency diff --git a/crazy_functions/pdf_fns/report_gen_html.py b/crazy_functions/pdf_fns/report_gen_html.py new file mode 100644 index 0000000..2fc1f2d --- /dev/null +++ b/crazy_functions/pdf_fns/report_gen_html.py @@ -0,0 +1,48 @@ +from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder +import os + + + + +class construct_html(): + def __init__(self) -> None: + self.html_string = "" + + def add_row(self, a, b): + from toolbox import markdown_convertion + template = """ + { + primary_col: { + header: `__PRIMARY_HEADER__`, + msg: `__PRIMARY_MSG__`, + }, + secondary_rol: { + header: `__SECONDARY_HEADER__`, + msg: `__SECONDARY_MSG__`, + } + }, + """ + template_ = template + if len(a.split('\n')) == 1: + template_ = template_.replace("__PRIMARY_HEADER__", markdown_convertion(a[:10]) + ' ...') + template_ = template_.replace("__PRIMARY_MSG__", markdown_convertion(a)) + else: + template_ = template_.replace("__PRIMARY_HEADER__", markdown_convertion(a.split('\n')[0])) + template_ = template_.replace("__PRIMARY_MSG__", markdown_convertion('\n'.join(a.split('\n')[1:]))) + + if len(b.split('\n')) == 1: + template_ = template_.replace("__SECONDARY_HEADER__", markdown_convertion(b[:10]) + ' ...') + template_ = template_.replace("__SECONDARY_MSG__", markdown_convertion(b)) + else: + template_ = template_.replace("__SECONDARY_HEADER__", markdown_convertion(b.split('\n')[0])) + template_ = template_.replace("__SECONDARY_MSG__", markdown_convertion('\n'.join(b.split('\n')[1:]))) + self.html_string += template_ + + def save_file(self, file_name): + from toolbox import get_log_folder + with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f: + html_template = f.read() + html_template = html_template.replace("__TF_ARR__", self.html_string) + with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f: + f.write(html_template.encode('utf-8', 'ignore').decode()) + return os.path.join(get_log_folder(), file_name) diff --git a/crazy_functions/pdf_fns/report_template.html b/crazy_functions/pdf_fns/report_template.html new file mode 100644 index 0000000..39a1e7c --- /dev/null +++ b/crazy_functions/pdf_fns/report_template.html @@ -0,0 +1,104 @@ + + + + + + __TITLE__ + + + + + +
+

文章目录

+ +
+ + + diff --git a/crazy_functions/批量翻译PDF文档_NOUGAT.py b/crazy_functions/批量翻译PDF文档_NOUGAT.py index c0961c1..65a7c01 100644 --- a/crazy_functions/批量翻译PDF文档_NOUGAT.py +++ b/crazy_functions/批量翻译PDF文档_NOUGAT.py @@ -97,7 +97,8 @@ def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwa generated_conclusion_files = [] generated_html_files = [] DST_LANG = "中文" - from crazy_functions.crazy_utils import nougat_interface, construct_html + from crazy_functions.crazy_utils import nougat_interface + from crazy_functions.pdf_fns.report_gen_html import construct_html nougat_handle = nougat_interface() for index, fp in enumerate(file_manifest): chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 0f60a90..ca581d1 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -63,7 +63,7 @@ def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwa generated_conclusion_files = [] generated_html_files = [] DST_LANG = "中文" - from crazy_functions.crazy_utils import construct_html + from crazy_functions.pdf_fns.report_gen_html import construct_html for index, fp in enumerate(file_manifest): chatbot.append(["当前进度:", f"正在连接GROBID服务,请稍候: {grobid_url}\n如果等待时间过长,请修改config中的GROBID_URL,可修改成本地GROBID服务。"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 article_dict = parse_pdf(fp, grobid_url) @@ -86,7 +86,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, TOKEN_LIMIT_PER_FRAGMENT = 1024 generated_conclusion_files = [] generated_html_files = [] - from crazy_functions.crazy_utils import construct_html + from crazy_functions.pdf_fns.report_gen_html import construct_html for index, fp in enumerate(file_manifest): # 读取PDF文件 file_content, page_one = read_and_clean_pdf_text(fp)