stashed commit

This commit is contained in:
qingxu fu 2023-10-25 11:32:32 +08:00
parent 27c4e3ef4f
commit a711db0b5b
5 changed files with 66 additions and 8 deletions

View File

@ -212,6 +212,11 @@ WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid",
# 自定义按钮的最大数量限制 # 自定义按钮的最大数量限制
NUM_CUSTOM_BASIC_BTN = 4 NUM_CUSTOM_BASIC_BTN = 4
# LATEX实验性功能
LATEX_EXPERIMENTAL = False
""" """
在线大模型配置关联关系示意图 在线大模型配置关联关系示意图

View File

@ -205,13 +205,12 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
retry_op = retry_times_at_unknown_error retry_op = retry_times_at_unknown_error
exceeded_cnt = 0 exceeded_cnt = 0
mutable[index][2] = "执行中" mutable[index][2] = "执行中"
detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience
while True: while True:
# watchdog error # watchdog error
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience: if detect_timeout(): raise RuntimeError("检测到程序终止。")
raise RuntimeError("检测到程序终止。")
try: try:
# 【第一种情况】:顺利完成 # 【第一种情况】:顺利完成
# time.sleep(10); raise RuntimeError("测试")
gpt_say = predict_no_ui_long_connection( gpt_say = predict_no_ui_long_connection(
inputs=inputs, llm_kwargs=llm_kwargs, history=history, inputs=inputs, llm_kwargs=llm_kwargs, history=history,
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
@ -219,7 +218,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
mutable[index][2] = "已成功" mutable[index][2] = "已成功"
return gpt_say return gpt_say
except ConnectionAbortedError as token_exceeded_error: except ConnectionAbortedError as token_exceeded_error:
# 【第二种情况】Token溢出 # 【第二种情况】Token溢出
if handle_token_exceed: if handle_token_exceed:
exceeded_cnt += 1 exceeded_cnt += 1
# 【选择处理】 尝试计算比例,尽可能多地保留文本 # 【选择处理】 尝试计算比例,尽可能多地保留文本
@ -240,6 +239,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
return gpt_say # 放弃 return gpt_say # 放弃
except: except:
# 【第三种情况】:其他错误 # 【第三种情况】:其他错误
if detect_timeout(): raise RuntimeError("检测到程序终止。")
tb_str = '```\n' + trimmed_format_exc() + '```' tb_str = '```\n' + trimmed_format_exc() + '```'
print(tb_str) print(tb_str)
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n" gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
@ -256,6 +256,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
for i in range(wait): for i in range(wait):
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1) mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
# 开始重试 # 开始重试
if detect_timeout(): raise RuntimeError("检测到程序终止。")
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}" mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
continue # 返回重试 continue # 返回重试
else: else:

View File

@ -1,9 +1,10 @@
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone from toolbox import get_conf, objdump, objload, promote_file_to_downloadzone
from .latex_toolbox import PRESERVE, TRANSFORM from .latex_toolbox import PRESERVE, TRANSFORM
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
from .latex_toolbox import find_title_and_abs
import os, shutil import os, shutil
import re import re
@ -90,7 +91,15 @@ class LatexPaperSplit():
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
# 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者 # 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
self.title = "unknown"
self.abstract = "unknown"
def read_title_and_abstract(self, txt):
title, abstract = find_title_and_abs(txt)
if title is not None:
self.title = title.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
if abstract is not None:
self.abstract = abstract.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10): def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
""" """
@ -234,8 +243,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件这需要一段时间计算文档越长耗时越长请耐心等待。')) chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件这需要一段时间计算文档越长耗时越长请耐心等待。'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
lps = LatexPaperSplit() lps = LatexPaperSplit()
lps.read_title_and_abstract(merged_content)
res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数 res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
# <-------- 拆分过长的latex片段 ----------> # <-------- 拆分过长的latex片段 ---------->
pfg = LatexPaperFileGroup() pfg = LatexPaperFileGroup()
for index, r in enumerate(res): for index, r in enumerate(res):
@ -256,12 +265,19 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
else: else:
# <-------- gpt 多线程请求 ----------> # <-------- gpt 多线程请求 ---------->
LATEX_EXPERIMENTAL, = get_conf('LATEX_EXPERIMENTAL')
history_array = [[""] for _ in range(n_split)]
if LATEX_EXPERIMENTAL:
paper_meta = f"The paper you processing is `{lps.title}`, a part of the abstraction is `{lps.abstract}`"
paper_meta_max_len = 888
history_array = [[ paper_meta[:paper_meta_max_len] + '...', "Understand, what should I do?"] for _ in range(n_split)]
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array=inputs_array, inputs_array=inputs_array,
inputs_show_user_array=inputs_show_user_array, inputs_show_user_array=inputs_show_user_array,
llm_kwargs=llm_kwargs, llm_kwargs=llm_kwargs,
chatbot=chatbot, chatbot=chatbot,
history_array=[[""] for _ in range(n_split)], history_array=history_array,
sys_prompt_array=sys_prompt_array, sys_prompt_array=sys_prompt_array,
# max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待 # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
scroller_max_len = 40 scroller_max_len = 40

View File

@ -318,6 +318,40 @@ def merge_tex_files_(project_foler, main_file, mode):
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:] main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
return main_file return main_file
def find_title_and_abs(main_file):
def extract_abstract_1(text):
pattern = r"\\abstract\{(.*?)\}"
match = re.search(pattern, text, re.DOTALL)
if match:
return match.group(1)
else:
return None
def extract_abstract_2(text):
pattern = r"\\begin\{abstract\}(.*?)\\end\{abstract\}"
match = re.search(pattern, text, re.DOTALL)
if match:
return match.group(1)
else:
return None
def extract_title(string):
pattern = r"\\title\{(.*?)\}"
match = re.search(pattern, string, re.DOTALL)
if match:
return match.group(1)
else:
return None
abstract = extract_abstract_1(main_file)
if abstract is None:
abstract = extract_abstract_2(main_file)
title = extract_title(main_file)
return title, abstract
def merge_tex_files(project_foler, main_file, mode): def merge_tex_files(project_foler, main_file, mode):
""" """
Merge Tex project recrusively Merge Tex project recrusively

View File

@ -11,7 +11,9 @@ if __name__ == "__main__":
from tests.test_utils import plugin_test from tests.test_utils import plugin_test
# plugin_test(plugin='crazy_functions.函数动态生成->函数动态生成', main_input='交换图像的蓝色通道和红色通道', advanced_arg={"file_path_arg": "./build/ants.jpg"}) # plugin_test(plugin='crazy_functions.函数动态生成->函数动态生成', main_input='交换图像的蓝色通道和红色通道', advanced_arg={"file_path_arg": "./build/ants.jpg"})
plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="2307.07522") # plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="2307.07522")
plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="G:/SEAFILE_LOCAL/50503047/我的资料库/学位/paperlatex/aaai/Fu_8368_with_appendix")
# plugin_test(plugin='crazy_functions.虚空终端->虚空终端', main_input='修改api-key为sk-jhoejriotherjep') # plugin_test(plugin='crazy_functions.虚空终端->虚空终端', main_input='修改api-key为sk-jhoejriotherjep')