From a711db0b5b357b0ac886bd0531d1ffc06bc23403 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Wed, 25 Oct 2023 11:32:32 +0800 Subject: [PATCH] stashed commit --- config.py | 5 ++++ crazy_functions/crazy_utils.py | 9 +++--- crazy_functions/latex_fns/latex_actions.py | 22 ++++++++++++-- crazy_functions/latex_fns/latex_toolbox.py | 34 ++++++++++++++++++++++ tests/test_plugins.py | 4 ++- 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/config.py b/config.py index 387fab6..b76997f 100644 --- a/config.py +++ b/config.py @@ -212,6 +212,11 @@ WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid", # 自定义按钮的最大数量限制 NUM_CUSTOM_BASIC_BTN = 4 + +# LATEX实验性功能 +LATEX_EXPERIMENTAL = False + + """ 在线大模型配置关联关系示意图 │ diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 8533d08..8cc4042 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -205,13 +205,12 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( retry_op = retry_times_at_unknown_error exceeded_cnt = 0 mutable[index][2] = "执行中" + detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience while True: # watchdog error - if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience: - raise RuntimeError("检测到程序终止。") + if detect_timeout(): raise RuntimeError("检测到程序终止。") try: # 【第一种情况】:顺利完成 - # time.sleep(10); raise RuntimeError("测试") gpt_say = predict_no_ui_long_connection( inputs=inputs, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True @@ -219,7 +218,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( mutable[index][2] = "已成功" return gpt_say except ConnectionAbortedError as token_exceeded_error: - # 【第二种情况】:Token溢出, + # 【第二种情况】:Token溢出 if handle_token_exceed: exceeded_cnt += 1 # 【选择处理】 尝试计算比例,尽可能多地保留文本 @@ -240,6 +239,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( return gpt_say # 放弃 except: # 【第三种情况】:其他错误 + if detect_timeout(): raise RuntimeError("检测到程序终止。") tb_str = '```\n' + trimmed_format_exc() + '```' print(tb_str) gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" @@ -256,6 +256,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( for i in range(wait): mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1) # 开始重试 + if detect_timeout(): raise RuntimeError("检测到程序终止。") mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}" continue # 返回重试 else: diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index 7e561df..6232b19 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -1,9 +1,10 @@ from toolbox import update_ui, update_ui_lastest_msg, get_log_folder -from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone +from toolbox import get_conf, objdump, objload, promote_file_to_downloadzone from .latex_toolbox import PRESERVE, TRANSFORM from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout +from .latex_toolbox import find_title_and_abs import os, shutil import re @@ -90,7 +91,15 @@ class LatexPaperSplit(): "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者) self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" + self.title = "unknown" + self.abstract = "unknown" + def read_title_and_abstract(self, txt): + title, abstract = find_title_and_abs(txt) + if title is not None: + self.title = title.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '') + if abstract is not None: + self.abstract = abstract.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '') def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10): """ @@ -234,8 +243,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。')) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 lps = LatexPaperSplit() + lps.read_title_and_abstract(merged_content) res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数 - # <-------- 拆分过长的latex片段 ----------> pfg = LatexPaperFileGroup() for index, r in enumerate(res): @@ -256,12 +265,19 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin else: # <-------- gpt 多线程请求 ----------> + LATEX_EXPERIMENTAL, = get_conf('LATEX_EXPERIMENTAL') + history_array = [[""] for _ in range(n_split)] + if LATEX_EXPERIMENTAL: + paper_meta = f"The paper you processing is `{lps.title}`, a part of the abstraction is `{lps.abstract}`" + paper_meta_max_len = 888 + history_array = [[ paper_meta[:paper_meta_max_len] + '...', "Understand, what should I do?"] for _ in range(n_split)] + gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( inputs_array=inputs_array, inputs_show_user_array=inputs_show_user_array, llm_kwargs=llm_kwargs, chatbot=chatbot, - history_array=[[""] for _ in range(n_split)], + history_array=history_array, sys_prompt_array=sys_prompt_array, # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待 scroller_max_len = 40 diff --git a/crazy_functions/latex_fns/latex_toolbox.py b/crazy_functions/latex_fns/latex_toolbox.py index 330cb65..b56825a 100644 --- a/crazy_functions/latex_fns/latex_toolbox.py +++ b/crazy_functions/latex_fns/latex_toolbox.py @@ -318,6 +318,40 @@ def merge_tex_files_(project_foler, main_file, mode): main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:] return main_file + +def find_title_and_abs(main_file): + + def extract_abstract_1(text): + pattern = r"\\abstract\{(.*?)\}" + match = re.search(pattern, text, re.DOTALL) + if match: + return match.group(1) + else: + return None + + def extract_abstract_2(text): + pattern = r"\\begin\{abstract\}(.*?)\\end\{abstract\}" + match = re.search(pattern, text, re.DOTALL) + if match: + return match.group(1) + else: + return None + + def extract_title(string): + pattern = r"\\title\{(.*?)\}" + match = re.search(pattern, string, re.DOTALL) + + if match: + return match.group(1) + else: + return None + + abstract = extract_abstract_1(main_file) + if abstract is None: + abstract = extract_abstract_2(main_file) + title = extract_title(main_file) + return title, abstract + def merge_tex_files(project_foler, main_file, mode): """ Merge Tex project recrusively diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 5998bc4..8470895 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -11,7 +11,9 @@ if __name__ == "__main__": from tests.test_utils import plugin_test # plugin_test(plugin='crazy_functions.函数动态生成->函数动态生成', main_input='交换图像的蓝色通道和红色通道', advanced_arg={"file_path_arg": "./build/ants.jpg"}) - plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="2307.07522") + # plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="2307.07522") + + plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="G:/SEAFILE_LOCAL/50503047/我的资料库/学位/paperlatex/aaai/Fu_8368_with_appendix") # plugin_test(plugin='crazy_functions.虚空终端->虚空终端', main_input='修改api-key为sk-jhoejriotherjep')