From 98724cd39562d9dbe2c6f0c65ddf3d94bd30170b Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 13 Apr 2023 11:18:44 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 2 +- crazy_functions/理解PDF文档内容.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/crazy_functional.py b/crazy_functional.py index fbd238f..5a76757 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -107,7 +107,7 @@ def get_crazy_functions(): "Color": "stop", "Function": HotReload(总结word文档) }, - "[测试功能] 理解PDF文档内容(通用接口,读取文件输入区)": { + "理解PDF文档内容 (模仿ChatPDF)": { # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 "Color": "stop", "AsButton": False, # 加入下拉菜单中 diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py index fada6b0..f4ba1a6 100644 --- a/crazy_functions/理解PDF文档内容.py +++ b/crazy_functions/理解PDF文档内容.py @@ -10,6 +10,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro print('begin analysis on:', file_name) file_content, page_one = read_and_clean_pdf_text(file_name) + ############################## <第零步,从摘要中提取高价值信息,放到history中> ################################## # 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割) # 的长度必须小于 2500 个 Token TOKEN_LIMIT_PER_FRAGMENT = 2500