diff --git a/.github/workflows/build-with-latex.yml b/.github/workflows/build-with-latex.yml new file mode 100644 index 0000000..fb16d2c --- /dev/null +++ b/.github/workflows/build-with-latex.yml @@ -0,0 +1,44 @@ +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages +name: Create and publish a Docker image for Latex support + +on: + push: + branches: + - 'master' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}_with_latex + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + push: true + file: docs/GithubAction+NoLocal+Latex + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/README.md b/README.md index d4d6858..7976076 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ To translate this project to arbitary language with GPT, read and run [`multi_la > > 1.请注意只有**红颜色**标识的函数插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR! > -> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。 +> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/gpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。 > > 3.本项目兼容并鼓励尝试国产大语言模型chatglm和RWKV, 盘古等等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,api2d-key3"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 @@ -31,13 +31,13 @@ To translate this project to arbitary language with GPT, read and run [`multi_la 一键中英互译 | 一键中英互译 一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释 [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键 -模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/chatgpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) -[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码 +模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) +[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码 [程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键可以剖析其他Python/C/C++/Java/Lua/...项目树 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [函数插件] 一键解读latex/pdf论文全文并生成摘要 Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [函数插件] 一键翻译或润色latex论文 批量注释生成 | [函数插件] 一键批量生成函数注释 -Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/chatgpt_academic/blob/master/docs/README_EN.md)了吗? +Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗? chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程) [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF @@ -46,8 +46,8 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 ⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具⭐ 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 -启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 -[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持,[API2D](https://api2d.com/)接口支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? +启动暗色gradio[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 +[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama),[RWKV](https://github.com/BlinkDL/ChatRWKV)和[盘古α](https://openi.org.cn/pangu/) 更多新功能展示(图像生成等) …… | 见本文档结尾处 …… @@ -91,8 +91,8 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 1. 下载项目 ```sh -git clone https://github.com/binary-husky/chatgpt_academic.git -cd chatgpt_academic +git clone https://github.com/binary-husky/gpt_academic.git +cd gpt_academic ``` 2. 配置API_KEY @@ -113,6 +113,7 @@ conda activate gptac_venv # 激活anaconda环境 python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤 ``` +
如果需要支持清华ChatGLM/复旦MOSS作为后端,请点击展开此处

@@ -150,8 +151,8 @@ python main.py 1. 仅ChatGPT(推荐大多数人选择) ``` sh -git clone https://github.com/binary-husky/chatgpt_academic.git # 下载项目 -cd chatgpt_academic # 进入路径 +git clone https://github.com/binary-husky/gpt_academic.git # 下载项目 +cd gpt_academic # 进入路径 nano config.py # 用任意文本编辑器编辑config.py, 配置 “Proxy”, “API_KEY” 以及 “WEB_PORT” (例如50923) 等 docker build -t gpt-academic . # 安装 @@ -160,6 +161,7 @@ docker run --rm -it --net=host gpt-academic #(最后一步-选择2)在macOS/windows环境下,只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口 docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic ``` +P.S. 如果需要依赖Latex的插件功能,请见Wiki 2. ChatGPT + ChatGLM + MOSS(需要熟悉Docker) @@ -188,10 +190,10 @@ docker-compose up 按照`config.py`中的说明配置API_URL_REDIRECT即可。 4. 远程云服务器部署(需要云服务器知识与经验)。 -请访问[部署wiki-1](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) +请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) 5. 使用WSL2(Windows Subsystem for Linux 子系统)。 -请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) +请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) 6. 如何在二级网址(如`http://localhost/subpath`)下运行。 请访问[FastAPI运行说明](docs/WithFastapi.md) @@ -220,7 +222,7 @@ docker-compose up 编写强大的函数插件来执行任何你想得到的和想不到的任务。 本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。 -详情请参考[函数插件指南](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 +详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 --- # Latest Update @@ -228,7 +230,7 @@ docker-compose up 1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件, 另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。 -Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存,点击 `删除所有本地对话历史记录` 可以删除所有html存档缓存。 +Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存。

@@ -251,38 +253,33 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h -5. 这是一个能够“自我译解”的开源项目 -
- -
- -6. 译解其他开源项目,不在话下 +5. 译解其他开源项目
-7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`) +6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`)
-8. 新增MOSS大语言模型支持 +7. 新增MOSS大语言模型支持
-9. OpenAI图像生成 +8. OpenAI图像生成
-10. OpenAI音频解析与总结 +9. OpenAI音频解析与总结
-11. Latex全文校对纠错 +10. Latex全文校对纠错
===> @@ -310,30 +307,32 @@ gpt_academic开发者QQ群-2:610599535 - 已知问题 - 某些浏览器翻译插件干扰此软件前端的运行 - - 官方Gradio目前有很多兼容性Bug,请务必使用requirement.txt安装Gradio + - 官方Gradio目前有很多兼容性Bug,请务必使用`requirement.txt`安装Gradio ## 参考与学习 ``` -代码中参考了很多其他优秀项目中的设计,主要包括: +代码中参考了很多其他优秀项目中的设计,顺序不分先后: -# 项目1:清华ChatGLM-6B: +# 清华ChatGLM-6B: https://github.com/THUDM/ChatGLM-6B -# 项目2:清华JittorLLMs: +# 清华JittorLLMs: https://github.com/Jittor/JittorLLMs -# 项目3:Edge-GPT: -https://github.com/acheong08/EdgeGPT - -# 项目4:ChuanhuChatGPT: -https://github.com/GaiZhenbiao/ChuanhuChatGPT - -# 项目5:ChatPaper: +# ChatPaper: https://github.com/kaixindelele/ChatPaper -# 更多: +# Edge-GPT: +https://github.com/acheong08/EdgeGPT + +# ChuanhuChatGPT: +https://github.com/GaiZhenbiao/ChuanhuChatGPT + +# Oobabooga one-click installer: +https://github.com/oobabooga/one-click-installers + +# More: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -https://github.com/oobabooga/one-click-installers ``` diff --git a/config.py b/config.py index 14b089e..87e0ec9 100644 --- a/config.py +++ b/config.py @@ -46,7 +46,7 @@ MAX_RETRY = 2 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 ) LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ -AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] +AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] # P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # 本地LLM模型如ChatGLM的执行方式 CPU/GPU diff --git a/crazy_functional.py b/crazy_functional.py index abd44d7..a724b97 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -112,14 +112,13 @@ def get_crazy_functions(): "AsButton": False, # 加入下拉菜单中 "Function": HotReload(解析项目本身) }, - "[老旧的Demo] 把本项目源代码切换成全英文": { - # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 - "AsButton": False, # 加入下拉菜单中 - "Function": HotReload(全项目切换英文) - }, + # "[老旧的Demo] 把本项目源代码切换成全英文": { + # # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 + # "AsButton": False, # 加入下拉菜单中 + # "Function": HotReload(全项目切换英文) + # }, "[插件demo] 历史上的今天": { # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 - "AsButton": False, # 加入下拉菜单中 "Function": HotReload(高阶功能模板函数) }, @@ -359,15 +358,42 @@ def get_crazy_functions(): }) from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF function_plugins.update({ - "Arixv论文精细翻译": { + "Arixv翻译(输入arxivID) [需Latex]": { "Color": "stop", - "AsButton": True, - # "AdvancedArgs": True, - # "ArgsReminder": "", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": + "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ + "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', "Function": HotReload(Latex翻译中文并重新编译PDF) } }) + # function_plugins.update({ + # "本地论文翻译(上传Latex压缩包) [需Latex]": { + # "Color": "stop", + # "AsButton": False, + # "AdvancedArgs": True, + # "ArgsReminder": + # "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ + # "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', + # "Function": HotReload(Latex翻译中文并重新编译PDF) + # } + # }) except: print('Load function plugin failed') - ###################### 第n组插件 ########################### + + # try: + # from crazy_functions.虚空终端 import 终端 + # function_plugins.update({ + # "超级终端": { + # "Color": "stop", + # "AsButton": False, + # # "AdvancedArgs": True, + # # "ArgsReminder": "", + # "Function": HotReload(终端) + # } + # }) + # except: + # print('Load function plugin failed') + return function_plugins diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/Langchain知识库.py index 5b09d3b..31c459a 100644 --- a/crazy_functions/Langchain知识库.py +++ b/crazy_functions/Langchain知识库.py @@ -30,7 +30,7 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro ) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 from .crazy_utils import try_install_deps - try_install_deps(['zh_langchain==0.2.0']) + try_install_deps(['zh_langchain==0.2.1']) # < --------------------读取参数--------------- > if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") @@ -84,7 +84,7 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 from .crazy_utils import try_install_deps - try_install_deps(['zh_langchain==0.2.0']) + try_install_deps(['zh_langchain==0.2.1']) # < ------------------- --------------- > kai = knowledge_archive_interface() diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 1d5e103..1886375 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -1,12 +1,13 @@ from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str +from functools import partial import glob, os, requests, time pj = os.path.join ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") # =================================== 工具函数 =============================================== 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". ' -def switch_prompt(pfg, mode): +def switch_prompt(pfg, mode, more_requirement): """ Generate prompts and system prompts based on the mode for proofreading or translating. Args: @@ -25,7 +26,7 @@ def switch_prompt(pfg, mode): f"\n\n{frag}" for frag in pfg.sp_file_contents] sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] elif mode == 'translate_zh': - inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 + + inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + r"Answer me only with the translated text:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] @@ -79,7 +80,7 @@ def arxiv_download(chatbot, history, txt): os.makedirs(translation_dir) target_file = pj(translation_dir, 'translate_zh.pdf') if os.path.exists(target_file): - promote_file_to_downloadzone(target_file) + promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot) return target_file return False def is_float(s): @@ -88,8 +89,10 @@ def arxiv_download(chatbot, history, txt): return True except ValueError: return False - if ('.' in txt) and ('/' not in txt) and is_float(txt): + if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID txt = 'https://arxiv.org/abs/' + txt.strip() + if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID + txt = 'https://arxiv.org/abs/' + txt[:10] if not txt.startswith('https://arxiv.org'): return txt, None @@ -105,6 +108,7 @@ def arxiv_download(chatbot, history, txt): return msg, None # <-------------- set format -------------> arxiv_id = url_.split('/abs/')[-1] + if 'v' in arxiv_id: arxiv_id = arxiv_id[:10] cached_translation_pdf = check_cached_translation_pdf(arxiv_id) if cached_translation_pdf: return cached_translation_pdf, arxiv_id @@ -177,7 +181,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> if not os.path.exists(project_folder + '/merge_proofread.tex'): - yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt) + yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, + chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt) # <-------------- compile PDF -------------> @@ -186,13 +191,14 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo # <-------------- zip PDF -------------> - zip_result(project_folder) + zip_res = zip_result(project_folder) if success: chatbot.append((f"成功啦", '请查收结果(压缩包)...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 else: chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) # <-------------- we are done -------------> return success @@ -205,9 +211,13 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, # <-------------- information about this plugin -------------> chatbot.append([ "函数插件功能?", - "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) + "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + # <-------------- more requirements -------------> + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + more_req = plugin_kwargs.get("advanced_arg", "") + _switch_prompt_ = partial(switch_prompt, more_requirement=more_req) # <-------------- check deps -------------> try: @@ -255,21 +265,23 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> if not os.path.exists(project_folder + '/merge_translate_zh.tex'): - yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt) + yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, + chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_) # <-------------- compile PDF -------------> - success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', + success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh', work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) # <-------------- zip PDF -------------> - zip_result(project_folder) + zip_res = zip_result(project_folder) if success: chatbot.append((f"成功啦", '请查收结果(压缩包)...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 else: chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) # <-------------- we are done -------------> diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 96301ff..a1b1493 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -698,3 +698,51 @@ def try_install_deps(deps): for dep in deps: import subprocess, sys subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep]) + + +class construct_html(): + def __init__(self) -> None: + self.css = """ +.row { + display: flex; + flex-wrap: wrap; +} + +.column { + flex: 1; + padding: 10px; +} + +.table-header { + font-weight: bold; + border-bottom: 1px solid black; +} + +.table-row { + border-bottom: 1px solid lightgray; +} + +.table-cell { + padding: 5px; +} + """ + self.html_string = f'翻译结果' + + + def add_row(self, a, b): + tmp = """ +
+
REPLACE_A
+
REPLACE_B
+
+ """ + from toolbox import markdown_convertion + tmp = tmp.replace('REPLACE_A', markdown_convertion(a)) + tmp = tmp.replace('REPLACE_B', markdown_convertion(b)) + self.html_string += tmp + + + def save_file(self, file_name): + with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f: + f.write(self.html_string.encode('utf-8', 'ignore').decode()) + diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 89ca7a5..a1e7758 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -127,7 +127,7 @@ def rm_comments(main_file): new_file_remove_comment_lines = [] for l in main_file.splitlines(): # 删除整行的空注释 - if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")): + if l.lstrip().startswith("%"): pass else: new_file_remove_comment_lines.append(l) @@ -165,17 +165,22 @@ def merge_tex_files(project_foler, main_file, mode): main_file = rm_comments(main_file) if mode == 'translate_zh': + # find paper documentclass pattern = re.compile(r'\\documentclass.*\n') match = pattern.search(main_file) + assert match is not None, "Cannot find documentclass statement!" position = match.end() add_ctex = '\\usepackage{ctex}\n' add_url = '\\usepackage{url}\n' if '{url}' not in main_file else '' main_file = main_file[:position] + add_ctex + add_url + main_file[position:] - # 2 fontset=windows + # fontset=windows import platform - if platform.system() != 'Windows': - main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file) - main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file) + main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file) + main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file) + # find paper abstract + pattern = re.compile(r'\\begin\{abstract\}.*\n') + match = pattern.search(main_file) + assert match is not None, "Cannot find paper abstract section!" return main_file @@ -398,7 +403,7 @@ class LatexPaperSplit(): def __init__(self) -> None: self.nodes = None self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ - "版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \ + "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者) self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" @@ -418,6 +423,7 @@ class LatexPaperSplit(): if mode == 'translate_zh': pattern = re.compile(r'\\begin\{abstract\}.*\n') match = pattern.search(result_string) + assert match is not None, "Cannot find paper abstract section!" position = match.end() result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:] return result_string @@ -491,7 +497,32 @@ class LatexPaperFileGroup(): f.write(res) return manifest +def write_html(sp_file_contents, sp_file_result, chatbot): + # write html + try: + import copy + from .crazy_utils import construct_html + from toolbox import gen_time_str + ch = construct_html() + orig = "" + trans = "" + final = [] + for c,r in zip(sp_file_contents, sp_file_result): + final.append(c) + final.append(r) + for i, k in enumerate(final): + if i%2==0: + orig = k + if i%2==1: + trans = k + ch.add_row(a=orig, b=trans) + create_report_file_name = f"{gen_time_str()}.trans.html" + ch.save_file(create_report_file_name) + promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot) + except: + from toolbox import trimmed_format_exc + print('writing html result failed:', trimmed_format_exc()) def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]): import time, os, re @@ -568,6 +599,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin pfg.get_token_num = None objdump(pfg, file=pj(project_folder,'temp.pkl')) + write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot) # <-------- 写出文件 ----------> msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。" @@ -617,7 +649,7 @@ def compile_latex_with_timeout(command, timeout=60): return False return True -def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder): +def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'): import os, time current_dir = os.getcwd() n_fix = 1 @@ -628,6 +660,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f while True: import os + # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面 os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) @@ -649,15 +682,16 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) - yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 - print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') - ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') + if mode!='translate_zh': + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 + print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') + ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') - yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) # <---------------------> os.chdir(current_dir) @@ -678,7 +712,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') if os.path.exists(pj(work_folder, '..', 'translation')): shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf')) - promote_file_to_downloadzone(result_pdf) + promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) return True # 成功啦 else: if n_fix>=max_try: break diff --git a/crazy_functions/对话历史存档.py b/crazy_functions/对话历史存档.py index c638d1b..fed0f8f 100644 --- a/crazy_functions/对话历史存档.py +++ b/crazy_functions/对话历史存档.py @@ -1,4 +1,4 @@ -from toolbox import CatchException, update_ui +from toolbox import CatchException, update_ui, promote_file_to_downloadzone from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive import re @@ -29,9 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None): for h in history: f.write("\n>>>" + h) f.write('') - res = '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}') - print(res) - return res + promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot) + return '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}') def gen_file_preview(file_name): try: diff --git a/crazy_functions/虚空终端.py b/crazy_functions/虚空终端.py new file mode 100644 index 0000000..fe71a46 --- /dev/null +++ b/crazy_functions/虚空终端.py @@ -0,0 +1,131 @@ +from toolbox import CatchException, update_ui, gen_time_str +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from .crazy_utils import input_clipping + + +prompt = """ +I have to achieve some functionalities by calling one of the functions below. +Your job is to find the correct funtion to use to satisfy my requirement, +and then write python code to call this function with correct parameters. + +These are functions you are allowed to choose from: +1. + 功能描述: 总结音视频内容 + 调用函数: ConcludeAudioContent(txt, llm_kwargs) + 参数说明: + txt: 音频文件的路径 + llm_kwargs: 模型参数, 永远给定None +2. + 功能描述: 将每次对话记录写入Markdown格式的文件中 + 调用函数: WriteMarkdown() +3. + 功能描述: 将指定目录下的PDF文件从英文翻译成中文 + 调用函数: BatchTranslatePDFDocuments_MultiThreaded(txt, llm_kwargs) + 参数说明: + txt: PDF文件所在的路径 + llm_kwargs: 模型参数, 永远给定None +4. + 功能描述: 根据文本使用GPT模型生成相应的图像 + 调用函数: ImageGeneration(txt, llm_kwargs) + 参数说明: + txt: 图像生成所用到的提示文本 + llm_kwargs: 模型参数, 永远给定None +5. + 功能描述: 对输入的word文档进行摘要生成 + 调用函数: SummarizingWordDocuments(input_path, output_path) + 参数说明: + input_path: 待处理的word文档路径 + output_path: 摘要生成后的文档路径 + + +You should always anwser with following format: +---------------- +Code: +``` +class AutoAcademic(object): + def __init__(self): + self.selected_function = "FILL_CORRECT_FUNCTION_HERE" # e.g., "GenerateImage" + self.txt = "FILL_MAIN_PARAMETER_HERE" # e.g., "荷叶上的蜻蜓" + self.llm_kwargs = None +``` +Explanation: +只有GenerateImage和生成图像相关, 因此选择GenerateImage函数。 +---------------- + +Now, this is my requirement: + +""" +def get_fn_lib(): + return { + "BatchTranslatePDFDocuments_MultiThreaded": ("crazy_functions.批量翻译PDF文档_多线程", "批量翻译PDF文档"), + "SummarizingWordDocuments": ("crazy_functions.总结word文档", "总结word文档"), + "ImageGeneration": ("crazy_functions.图片生成", "图片生成"), + "TranslateMarkdownFromEnglishToChinese": ("crazy_functions.批量Markdown翻译", "Markdown中译英"), + "SummaryAudioVideo": ("crazy_functions.总结音视频", "总结音视频"), + } + +def inspect_dependency(chatbot, history): + return True + +def eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + import subprocess, sys, os, shutil, importlib + + with open('gpt_log/void_terminal_runtime.py', 'w', encoding='utf8') as f: + f.write(code) + + try: + AutoAcademic = getattr(importlib.import_module('gpt_log.void_terminal_runtime', 'AutoAcademic'), 'AutoAcademic') + # importlib.reload(AutoAcademic) + auto_dict = AutoAcademic() + selected_function = auto_dict.selected_function + txt = auto_dict.txt + fp, fn = get_fn_lib()[selected_function] + fn_plugin = getattr(importlib.import_module(fp, fn), fn) + yield from fn_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port) + except: + from toolbox import trimmed_format_exc + chatbot.append(["执行错误", f"\n```\n{trimmed_format_exc()}\n```\n"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + +def get_code_block(reply): + import re + pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks + matches = re.findall(pattern, reply) # find all code blocks in text + if len(matches) != 1: + raise RuntimeError("GPT is not generating proper code.") + return matches[0].strip('python') # code block + +@CatchException +def 终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行 + plugin_kwargs 插件模型的参数, 暂时没有用武之地 + chatbot 聊天显示框的句柄, 用于显示给用户 + history 聊天历史, 前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + # 清空历史, 以免输入溢出 + history = [] + + # 基本信息:功能、贡献者 + chatbot.append(["函数插件功能?", "根据自然语言执行插件命令, 作者: binary-husky, 插件初始化中 ..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # # 尝试导入依赖, 如果缺少依赖, 则给出安装建议 + # dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面 + # if not dep_ok: return + + # 输入 + i_say = prompt + txt + # 开始 + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=txt, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], + sys_prompt="" + ) + + # 将代码转为动画 + code = get_code_block(gpt_say) + yield from eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port) diff --git a/docs/GithubAction+NoLocal+Latex b/docs/GithubAction+NoLocal+Latex new file mode 100644 index 0000000..5ff9bb8 --- /dev/null +++ b/docs/GithubAction+NoLocal+Latex @@ -0,0 +1,25 @@ +# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM +# - 1 修改 `config.py` +# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex . +# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex + +FROM fuqingxu/python311_texlive_ctex:latest + +# 指定路径 +WORKDIR /gpt + +RUN pip3 install gradio openai numpy arxiv rich +RUN pip3 install colorama Markdown pygments pymupdf + +# 装载项目文件 +COPY . . + + +# 安装依赖 +RUN pip3 install -r requirements.txt + +# 可选步骤,用于预热模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/main.py b/main.py index 7dbf17f..65e1f4c 100644 --- a/main.py +++ b/main.py @@ -155,7 +155,7 @@ def main(): for k in crazy_fns: if not crazy_fns[k].get("AsButton", True): continue click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo) - click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) + click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot]) cancel_handles.append(click_handle) # 函数插件-下拉菜单与随变按钮的互动 def on_dropdown_changed(k): @@ -175,7 +175,7 @@ def main(): if k in [r"打开插件列表", r"请先从插件列表中选择"]: return yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs) click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo) - click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) + click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot]) cancel_handles.append(click_handle) # 终止按钮的回调函数注册 stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index b6efe21..a27407c 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -83,6 +83,15 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, + + "gpt-3.5-turbo-16k": { + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": openai_endpoint, + "max_token": 1024*16, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, "gpt-4": { "fn_with_ui": chatgpt_ui, diff --git a/toolbox.py b/toolbox.py index 4b0e1dd..fb6aa9f 100644 --- a/toolbox.py +++ b/toolbox.py @@ -6,6 +6,7 @@ import re import os from latex2mathml.converter import convert as tex2mathml from functools import wraps, lru_cache +pj = os.path.join """ ======================================================================== @@ -221,16 +222,21 @@ def text_divide_paragraph(text): """ 将文本按照段落分隔符分割开,生成带有段落标签的HTML代码。 """ + pre = '
' + suf = '
' + if text.startswith(pre) and text.endswith(suf): + return text + if '```' in text: # careful input - return text + return pre + text + suf else: # wtf input lines = text.split("\n") for i, line in enumerate(lines): lines[i] = lines[i].replace(" ", " ") text = "
".join(lines) - return text + return pre + text + suf @lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 def markdown_convertion(txt): @@ -342,8 +348,11 @@ def format_io(self, y): if y is None or y == []: return [] i_ask, gpt_reply = y[-1] - i_ask = text_divide_paragraph(i_ask) # 输入部分太自由,预处理一波 - gpt_reply = close_up_code_segment_during_stream(gpt_reply) # 当代码输出半截的时候,试着补上后个``` + # 输入部分太自由,预处理一波 + if i_ask is not None: i_ask = text_divide_paragraph(i_ask) + # 当代码输出半截的时候,试着补上后个``` + if gpt_reply is not None: gpt_reply = close_up_code_segment_during_stream(gpt_reply) + # process y[-1] = ( None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code', 'tables']), None if gpt_reply is None else markdown_convertion(gpt_reply) @@ -391,7 +400,7 @@ def extract_archive(file_path, dest_dir): print("Successfully extracted rar archive to {}".format(dest_dir)) except: print("Rar format requires additional dependencies to install") - return '\n\n需要安装pip install rarfile来解压rar文件' + return '\n\n解压失败! 需要安装pip install rarfile来解压rar文件' # 第三方库,需要预先pip install py7zr elif file_extension == '.7z': @@ -402,7 +411,7 @@ def extract_archive(file_path, dest_dir): print("Successfully extracted 7z archive to {}".format(dest_dir)) except: print("7z format requires additional dependencies to install") - return '\n\n需要安装pip install py7zr来解压7z文件' + return '\n\n解压失败! 需要安装pip install py7zr来解压7z文件' else: return '' return '' @@ -431,13 +440,17 @@ def find_recent_files(directory): return recent_files -def promote_file_to_downloadzone(file, rename_file=None): +def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): # 将文件复制一份到下载区 import shutil if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}' new_path = os.path.join(f'./gpt_log/', rename_file) - if os.path.exists(new_path): os.remove(new_path) - shutil.copyfile(file, new_path) + if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path) + if not os.path.exists(new_path): shutil.copyfile(file, new_path) + if chatbot: + if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote'] + else: current = [] + chatbot._cookies.update({'file_to_promote': [new_path] + current}) def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): """ @@ -477,16 +490,20 @@ def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): return chatbot, txt, txt2 -def on_report_generated(files, chatbot): +def on_report_generated(cookies, files, chatbot): from toolbox import find_recent_files - report_files = find_recent_files('gpt_log') + if 'file_to_promote' in cookies: + report_files = cookies['file_to_promote'] + cookies.pop('file_to_promote') + else: + report_files = find_recent_files('gpt_log') if len(report_files) == 0: return None, chatbot # files.extend(report_files) file_links = '' for f in report_files: file_links += f'
{f}' chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) - return report_files, chatbot + return cookies, report_files, chatbot def is_openai_api_key(key): API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key) @@ -788,7 +805,8 @@ def zip_result(folder): import time t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) zip_folder(folder, './gpt_log/', f'{t}-result.zip') - + return pj('./gpt_log/', f'{t}-result.zip') + def gen_time_str(): import time return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) diff --git a/version b/version index 669c708..ceb909a 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.4, + "version": 3.41, "show_feature": true, - "new_feature": "新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" + "new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" }