diff --git a/README.md b/README.md index e0d6fd8..96b0435 100644 --- a/README.md +++ b/README.md @@ -68,11 +68,13 @@ huggingface免科学上网[在线体验](https://huggingface.co/spaces/qingxu98/ -- 多种大语言模型混合调用([v3.1分支](https://github.com/binary-husky/chatgpt_academic/tree/v3.1)测试中) +- 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + [API2D](https://api2d.com/)-GPT4, [v3.1分支](https://github.com/binary-husky/chatgpt_academic/tree/v3.1)测试中)
- +
+v3.1的[huggingface测试版](https://huggingface.co/spaces/qingxu98/academic-chatgpt-beta)(huggingface版不支持chatglm) + ## 直接运行 (Windows, Linux or MacOS) diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 189d948..ba5f18c 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -444,6 +444,7 @@ def read_and_clean_pdf_text(fp): pf = 998 for l in t['lines']: txt_line = "".join([wtf['text'] for wtf in l['spans']]) + if len(txt_line) == 0: continue pf = primary_ffsize(l) meta_line.append([txt_line, pf, l['bbox'], l]) for wtf in l['spans']: # for l in t['lines']: @@ -554,8 +555,8 @@ def read_and_clean_pdf_text(fp): meta_txt = meta_txt.replace('\n', '\n\n') ############################## <第 5 步,展示分割效果> ################################## - for f in finals: - print亮黄(f) - print亮绿('***************************') + # for f in finals: + # print亮黄(f) + # print亮绿('***************************') return meta_txt, page_one_meta diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 79c1253..42e75b4 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -13,7 +13,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_ # 基本信息:功能、贡献者 chatbot.append([ "函数插件功能?", - "批量总结PDF文档。函数插件贡献者: Binary-Husky"]) + "批量翻译PDF文档。函数插件贡献者: Binary-Husky"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 尝试导入依赖,如果缺少依赖,则给出安装建议 @@ -59,7 +59,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt): import os import tiktoken - TOKEN_LIMIT_PER_FRAGMENT = 1600 + TOKEN_LIMIT_PER_FRAGMENT = 1280 generated_conclusion_files = [] for index, fp in enumerate(file_manifest): @@ -91,13 +91,13 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, # 多线,翻译 gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( inputs_array=[ - f"以下是你需要翻译的论文片段:\n{frag}" for frag in paper_fragments], + f"你需要翻译以下内容:\n{frag}" for frag in paper_fragments], inputs_show_user_array=[f"\n---\n 原文: \n\n {frag.replace('#', '')} \n---\n 翻译:\n " for frag in paper_fragments], llm_kwargs=llm_kwargs, chatbot=chatbot, history_array=[[paper_meta] for _ in paper_fragments], sys_prompt_array=[ - "请你作为一个学术翻译,负责把学术论文的片段准确翻译成中文。" for _ in paper_fragments], + "请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in paper_fragments], # max_workers=5 # OpenAI所允许的最大并行过载 ) diff --git a/toolbox.py b/toolbox.py index e3e75ce..0a31755 100644 --- a/toolbox.py +++ b/toolbox.py @@ -456,7 +456,7 @@ def on_file_uploaded(files, chatbot, txt): chatbot.append(['我上传了文件,请查收', f'[Local Message] 收到以下文件: \n\n{moved_files_str}' + f'\n\n调用路径参数已自动修正到: \n\n{txt}' + - f'\n\n现在您点击任意实验功能时,以上文件将被作为输入参数'+err_msg]) + f'\n\n现在您点击任意“红颜色”标识的函数插件时,以上文件将被作为输入参数'+err_msg]) return chatbot, txt diff --git a/version b/version index e1a3466..962aee5 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 2.68, + "version": 2.7, "show_feature": true, - "new_feature": "改善理解pdf(chatpdf)功能 <-> 修复读取罕见字符的BUG <-> 如果一键更新失败,可前往github手动更新" + "new_feature": "修复BUG <-> 改善理解pdf(chatpdf)功能 <-> 如果一键更新失败,可前往github手动更新" }