diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py new file mode 100644 index 0000000..12f4910 --- /dev/null +++ b/crazy_functions/批量总结PDF文档.py @@ -0,0 +1,74 @@ +from predict import predict_no_ui +from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down +fast_debug = False + + +def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt): + import time, glob, os, codecs, fitz + print('begin analysis on:', file_manifest) + for index, fp in enumerate(file_manifest): + with fitz.open(fp) as doc: + file_content = "" + for page in doc: + file_content += page.getText() + print(file_content) + + prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else "" + i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```' + i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}' + chatbot.append((i_say_show_user, "[Local Message] waiting gpt response.")) + print('[1] yield chatbot, history') + yield chatbot, history, '正常' + + if not fast_debug: + msg = '正常' + # ** gpt request ** + gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时 + + print('[2] end gpt req') + chatbot[-1] = (i_say_show_user, gpt_say) + history.append(i_say_show_user); history.append(gpt_say) + print('[3] yield chatbot, history') + yield chatbot, history, msg + print('[4] next') + if not fast_debug: time.sleep(2) + + all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)]) + i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。' + chatbot.append((i_say, "[Local Message] waiting gpt response.")) + yield chatbot, history, '正常' + + if not fast_debug: + msg = '正常' + # ** gpt request ** + gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时 + + chatbot[-1] = (i_say, gpt_say) + history.append(i_say); history.append(gpt_say) + yield chatbot, history, msg + res = write_results_to_file(history) + chatbot.append(("完成了吗?", res)) + yield chatbot, history, msg + + +def 批量总结PDF文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): + history = [] # 清空历史,以免输入溢出 + import glob, os + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": txt = '空空如也的输入栏' + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield chatbot, history, '正常' + return + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \ + [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \ + # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \ + # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)] + if len(file_manifest) == 0: + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.pdf文件: {txt}") + yield chatbot, history, '正常' + return + + yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + \ No newline at end of file diff --git a/crazy_functions/读文章写摘要.py b/crazy_functions/读文章写摘要.py index c85204f..9ad3434 100644 --- a/crazy_functions/读文章写摘要.py +++ b/crazy_functions/读文章写摘要.py @@ -48,54 +48,8 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist yield chatbot, history, msg -def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt): - import time, glob, os, codecs, fitz - print('begin analysis on:', file_manifest) - for index, fp in enumerate(file_manifest): - with fitz.open(fp) as doc: - file_content = "" - for page in doc: - file_content += page.getText() - print(file_content) - - prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else "" - i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```' - i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}' - chatbot.append((i_say_show_user, "[Local Message] waiting gpt response.")) - print('[1] yield chatbot, history') - yield chatbot, history, '正常' - - if not fast_debug: - msg = '正常' - # ** gpt request ** - gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时 - - print('[2] end gpt req') - chatbot[-1] = (i_say_show_user, gpt_say) - history.append(i_say_show_user); history.append(gpt_say) - print('[3] yield chatbot, history') - yield chatbot, history, msg - print('[4] next') - if not fast_debug: time.sleep(2) - - all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)]) - i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。' - chatbot.append((i_say, "[Local Message] waiting gpt response.")) - yield chatbot, history, '正常' - - if not fast_debug: - msg = '正常' - # ** gpt request ** - gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时 - - chatbot[-1] = (i_say, gpt_say) - history.append(i_say); history.append(gpt_say) - yield chatbot, history, msg - res = write_results_to_file(history) - chatbot.append(("完成了吗?", res)) - yield chatbot, history, msg - +@CatchException def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): history = [] # 清空历史,以免输入溢出 import glob, os @@ -106,15 +60,11 @@ def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTx report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield chatbot, history, '正常' return - file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \ - [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \ + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \ # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \ # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)] if len(file_manifest) == 0: - report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或.pdf文件: {txt}") + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") yield chatbot, history, '正常' return - if '.pdf' in file_manifest[0]: - yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) - else: - yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) \ No newline at end of file diff --git a/functional_crazy.py b/functional_crazy.py index d537573..eaa07b7 100644 --- a/functional_crazy.py +++ b/functional_crazy.py @@ -1,6 +1,7 @@ def get_crazy_functionals(): from crazy_functions.读文章写摘要 import 读文章写摘要 + from crazy_functions.批量总结PDF文档 import 批量总结PDF文档 from crazy_functions.生成函数注释 import 批量生成函数注释 from crazy_functions.解析项目源代码 import 解析项目本身 from crazy_functions.解析项目源代码 import 解析一个Python项目 @@ -28,6 +29,10 @@ def get_crazy_functionals(): "Color": "stop", # 按钮颜色 "Function": 读文章写摘要 }, + "[实验] 批量总结pdf文档并生成双语摘要(配合input输入框)": { + "Color": "stop", # 按钮颜色 + "Function": 批量总结PDF文档 + }, "[实验] 批量生成函数注释(配合input输入框)": { "Color": "stop", # 按钮颜色 "Function": 批量生成函数注释