diff --git a/Dockerfile b/Dockerfile index 97ad13d..ac47b8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,28 +1,34 @@ -# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM -# 如何构建: 先修改 `config.py`, 然后 docker build -t gpt-academic . -# 如何运行: docker run --rm -it --net=host gpt-academic +# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型或者latex运行依赖,请参考 docker-compose.yml +# 如何构建: 先修改 `config.py`, 然后 `docker build -t gpt-academic . ` +# 如何运行(Linux下): `docker run --rm -it --net=host gpt-academic ` +# 如何运行(其他操作系统,选择任意一个固定端口50923): `docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic ` FROM python:3.11 + +# 非必要步骤,更换pip源 RUN echo '[global]' > /etc/pip.conf && \ echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \ echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf +# 进入工作路径 WORKDIR /gpt - - -# 安装依赖 +# 安装大部分依赖,利用Docker缓存加速以后的构建 COPY requirements.txt ./ COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl RUN pip3 install -r requirements.txt -# 装载项目文件 + + +# 装载项目文件,安装剩余依赖 COPY . . RUN pip3 install -r requirements.txt -# 可选步骤,用于预热模块 + +# 非必要步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + # 启动 CMD ["python3", "-u", "main.py"] diff --git a/README.md b/README.md index 5121076..ff8c371 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,26 @@ > **Note** > -> 2023.5.27 对Gradio依赖进行了调整,Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时,请严格选择`requirements.txt`中**指定的版本**: -> -> `pip install -r requirements.txt` +> 2023.7.5: Gradio依赖调整。请及时**更新代码** > +> 2023.7.8: pydantic出现兼容问题,已修改 `requirements.txt`。安装依赖时,请严格选择`requirements.txt`中**指定的版本** +> +> `pip install -r requirements.txt` -# GPT 学术优化 (GPT Academic) -**如果喜欢这个项目,请给它一个Star;如果你发明了更好用的快捷键或函数插件,欢迎发pull requests** +#
GPT 学术优化 (GPT Academic)
+ +**如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或函数插件,欢迎发pull requests!** If you like this project, please give it a Star. If you've come up with more useful academic shortcuts or functional plugins, feel free to open an issue or pull request. We also have a README in [English|](docs/README_EN.md)[日本語|](docs/README_JP.md)[한국어|](https://github.com/mldljyh/ko_gpt_academic)[Русский|](docs/README_RS.md)[Français](docs/README_FR.md) translated by this project itself. To translate this project to arbitary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental). > **Note** > -> 1.请注意只有**红颜色**标识的函数插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR! +> 1.请注意只有 **高亮(如红色)** 标识的函数插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。 > > 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/gpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。 > -> 3.本项目兼容并鼓励尝试国产大语言模型chatglm和RWKV, 盘古等等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,api2d-key3"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 +> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM和Moss等等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,api2d-key3"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 @@ -42,13 +44,13 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程) [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/) -互联网信息聚合+GPT | [函数插件] 一键[让GPT先从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck),再回答问题,让信息永不过时 -⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具⭐ +互联网信息聚合+GPT | [函数插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时 +⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 -启动暗色gradio[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 +启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 [多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? -更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama),[RWKV](https://github.com/BlinkDL/ChatRWKV)和[盘古α](https://openi.org.cn/pangu/) +更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/) 更多新功能展示(图像生成等) …… | 见本文档结尾处 …… @@ -85,9 +87,8 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 ---- # Installation -## 安装-方法1:直接运行 (Windows, Linux or MacOS) +### 安装方法I:直接运行 (Windows, Linux or MacOS) 1. 下载项目 ```sh @@ -140,7 +141,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt- python main.py ``` -## 安装-方法2:使用Docker +### 安装方法II:使用Docker 1. 仅ChatGPT(推荐大多数人选择,等价于docker-compose方案1) @@ -171,7 +172,7 @@ docker-compose up ``` -## 安装-方法3:其他部署姿势 +### 安装方法III:其他部署姿势 1. 一键运行脚本。 完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。 脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。 @@ -194,11 +195,9 @@ docker-compose up 7. 如何在二级网址(如`http://localhost/subpath`)下运行。 请访问[FastAPI运行说明](docs/WithFastapi.md) ---- -# Advanced Usage -## 自定义新的便捷按钮 / 自定义函数插件 -1. 自定义新的便捷按钮(学术快捷键) +# Advanced Usage +### I:自定义新的便捷按钮(学术快捷键) 任意文本编辑器打开`core_functional.py`,添加条目如下,然后重启程序即可。(如果按钮已经添加成功并可见,那么前缀、后缀都支持热修改,无需重启程序即可生效。) 例如 ``` @@ -214,15 +213,15 @@ docker-compose up -2. 自定义函数插件 +### II:自定义函数插件 编写强大的函数插件来执行任何你想得到的和想不到的任务。 本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。 详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 ---- + # Latest Update -## 新功能动态 +### I:新功能动态 1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件, 另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。 @@ -283,7 +282,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h -## 版本: +### II:版本: - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级) - version 3.4: +arxiv论文翻译、latex论文批改功能 - version 3.3: +互联网信息综合功能 @@ -305,11 +304,12 @@ gpt_academic开发者QQ群-2:610599535 - 某些浏览器翻译插件干扰此软件前端的运行 - 官方Gradio目前有很多兼容性Bug,请务必使用`requirement.txt`安装Gradio -## 主题 +### III:主题 1. `Chuanhu-Keldos-Green` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/) -## 参考与学习 + +### IV:参考与学习 ``` 代码中参考了很多其他优秀项目中的设计,顺序不分先后: diff --git a/config.py b/config.py index 4a9f723..f0383bc 100644 --- a/config.py +++ b/config.py @@ -1,17 +1,27 @@ -# [step 1]>> 例如: API_KEY = "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" (此key无效) -API_KEY = "sk-此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2" +""" + 以下所有配置也都支持利用环境变量覆写,环境变量配置格式见docker-compose.yml。 + 读取优先级:环境变量 > config_private.py > config.py + --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + All the following configurations also support using environment variables to override, + and the environment variable configuration format can be seen in docker-compose.yml. + Configuration reading priority: environment variable > config_private.py > config.py +""" + +# [step 1]>> API_KEY = "sk-123456789xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx123456789"。极少数情况下,还需要填写组织(格式如org-123456789abcdefghijklmno的),请向下翻,找 API_ORG 设置项 +API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4" # [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改 USE_PROXY = False if USE_PROXY: - # 填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改 - # 例如 "socks5h://localhost:11284" - # [协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http - # [地址] 懂的都懂,不懂就填localhost或者127.0.0.1肯定错不了(localhost意思是代理软件安装在本机上) - # [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上 - - # 代理网络的地址,打开你的*学*网软件查看代理的协议(socks5/http)、地址(localhost)和端口(11284) + """ + 填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改 + <配置教程&视频教程> https://github.com/binary-husky/gpt_academic/issues/1> + [协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http + [地址] 懂的都懂,不懂就填localhost或者127.0.0.1肯定错不了(localhost意思是代理软件安装在本机上) + [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上 + """ + # 代理网络的地址,打开你的*学*网软件查看代理的协议(socks5h / http)、地址(localhost)和端口(11284) proxies = { # [协议]:// [地址] :[端口] "http": "socks5h://localhost:11284", # 再例如 "http": "http://127.0.0.1:7890", @@ -20,82 +30,100 @@ if USE_PROXY: else: proxies = None -# [step 3]>> 多线程函数插件中,默认允许多少路线程同时访问OpenAI。Free trial users的限制是每分钟3次,Pay-as-you-go users的限制是每分钟3500次 -# 一言以蔽之:免费用户填3,OpenAI绑了信用卡的用户可以填 16 或者更高。提高限制请查询:https://platform.openai.com/docs/guides/rate-limits/overview +# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------ + +# 重新URL重新定向,实现更换API_URL的作用(常规情况下,不要修改!! 高危设置!通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!) +# 格式 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"} +# 例如 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions":"https://reverse-proxy-url/v1/chat/completions"} +API_URL_REDIRECT = {} + + +# 多线程函数插件中,默认允许多少路线程同时访问OpenAI。Free trial users的限制是每分钟3次,Pay-as-you-go users的限制是每分钟3500次 +# 一言以蔽之:免费(5刀)用户填3,OpenAI绑了信用卡的用户可以填 16 或者更高。提高限制请查询:https://platform.openai.com/docs/guides/rate-limits/overview DEFAULT_WORKER_NUM = 3 -# [step 4]>> 以下配置可以优化体验,但大部分场合下并不需要修改 # 对话窗的高度 CHATBOT_HEIGHT = 1115 + # 代码高亮 CODE_HIGHLIGHT = True + # 窗口布局 -LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局) -DARK_MODE = True # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局) +LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局) +DARK_MODE = True # 暗色模式 / 亮色模式 + # 发送请求到OpenAI后,等待多久判定为超时 TIMEOUT_SECONDS = 30 + # 网页的端口, -1代表随机端口 WEB_PORT = -1 + # 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制 MAX_RETRY = 2 -# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 ) + +# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 ) LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ -AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] +AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"] # P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] + # 本地LLM模型如ChatGLM的执行方式 CPU/GPU LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda" + # 设置gradio的并行线程数(不需要修改) CONCURRENT_COUNT = 100 + # 是否在提交时自动清空输入框 AUTO_CLEAR_TXT = False # 色彩主体,可选 ["Default", "Chuanhu-Keldos-Green"] -THEME = "Chuanhu-Keldos-Green" +THEME = "Default" # 加一个live2d装饰 ADD_WAIFU = False + # 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个) # [("username", "password"), ("username2", "password2"), ...] AUTHENTICATION = [] -# 重新URL重新定向,实现更换API_URL的作用(常规情况下,不要修改!!) -# (高危设置!通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!) -# 格式 {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"} -# 例如 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://ai.open.com/api/conversation"} -API_URL_REDIRECT = {} # 如果需要在二级路径下运行(常规情况下,不要修改!!)(需要配合修改main.py才能生效!) CUSTOM_PATH = "/" -# 如果需要使用newbing,把newbing的长长的cookie放到这里 -NEWBING_STYLE = "creative" # ["creative", "balanced", "precise"] -# 从现在起,如果您调用"newbing-free"模型,则无需填写NEWBING_COOKIES -NEWBING_COOKIES = """ -your bing cookies here -""" + +# 极少数情况下,openai的官方KEY需要伴随组织编码(格式如org-xxxxxxxxxxxxxxxxxxxxxxxx)使用 +API_ORG = "" + # 如果需要使用Slack Claude,使用教程详情见 request_llm/README.md SLACK_CLAUDE_BOT_ID = '' SLACK_CLAUDE_USER_TOKEN = '' + # 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md -AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/" -AZURE_API_KEY = "填入azure openai api的密钥" -AZURE_API_VERSION = "填入api版本" -AZURE_ENGINE = "填入ENGINE" +AZURE_ENDPOINT = "https://你亲手写的api名称.openai.azure.com/" +AZURE_API_KEY = "填入azure openai api的密钥" # 建议直接在API_KEY处填写,该选项即将被弃用 +AZURE_ENGINE = "填入你亲手写的部署名" # 读 docs\use_azure.md + + +# 使用Newbing +NEWBING_STYLE = "creative" # ["creative", "balanced", "precise"] +NEWBING_COOKIES = """ +put your new bing cookies here +""" + # 阿里云实时语音识别 配置门槛较高 限高级用户使用 参考 https://help.aliyun.com/document_detail/450255.html ENABLE_AUDIO = True ALIYUN_TOKEN="" # 例如 f37f30e0f9934c34a992f6f64f7eba4f -ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK +ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK \ No newline at end of file diff --git a/crazy_functional.py b/crazy_functional.py index 84f73f3..0f321f5 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -352,6 +352,32 @@ def get_crazy_functions(): }) except: print('Load function plugin failed') + + try: + from crazy_functions.交互功能函数模板 import 交互功能模板函数 + function_plugins.update({ + "交互功能模板函数": { + "Color": "stop", + "AsButton": False, + "Function": HotReload(交互功能模板函数) + } + }) + except: + print('Load function plugin failed') + + # try: + # from crazy_functions.chatglm微调工具 import 微调数据集生成 + # function_plugins.update({ + # "黑盒模型学习: 微调数据集生成 (先上传数据集)": { + # "Color": "stop", + # "AsButton": False, + # "AdvancedArgs": True, + # "ArgsReminder": "针对数据集输入(如 绿帽子*深蓝色衬衫*黑色运动裤)给出指令,例如您可以将以下命令复制到下方: --llm_to_learn=azure-gpt-3.5 --prompt_prefix='根据下面的服装类型提示,想象一个穿着者,对这个人外貌、身处的环境、内心世界、过去经历进行描写。要求:100字以内,用第二人称。' --system_prompt=''", + # "Function": HotReload(微调数据集生成) + # } + # }) + # except: + # print('Load function plugin failed') try: from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比 diff --git a/crazy_functions/chatglm微调工具.py b/crazy_functions/chatglm微调工具.py new file mode 100644 index 0000000..58a9208 --- /dev/null +++ b/crazy_functions/chatglm微调工具.py @@ -0,0 +1,71 @@ +from toolbox import CatchException, update_ui, promote_file_to_downloadzone +from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency +import datetime, json + +def fetch_items(list_of_items, batch_size): + for i in range(0, len(list_of_items), batch_size): + yield list_of_items[i:i + batch_size] + +def string_to_options(arguments): + import argparse + import shlex + + # Create an argparse.ArgumentParser instance + parser = argparse.ArgumentParser() + + # Add command-line arguments + parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo") + parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='') + parser.add_argument("--system_prompt", type=str, help="System prompt", default='') + parser.add_argument("--batch", type=int, help="System prompt", default=50) + + # Parse the arguments + args = parser.parse_args(shlex.split(arguments)) + + return args + +@CatchException +def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 + plugin_kwargs 插件模型的参数 + chatbot 聊天显示框的句柄,用于显示给用户 + history 聊天历史,前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + history = [] # 清空历史,以免输入溢出 + chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成")) + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + args = plugin_kwargs.get("advanced_arg", None) + if args is None: + chatbot.append(("没给定指令", "退出")) + yield from update_ui(chatbot=chatbot, history=history); return + else: + arguments = string_to_options(arguments=args) + + dat = [] + with open(txt, 'r', encoding='utf8') as f: + for line in f.readlines(): + json_dat = json.loads(line) + dat.append(json_dat["content"]) + + llm_kwargs['llm_model'] = arguments.llm_to_learn + for batch in fetch_items(dat, arguments.batch): + res = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array=[f"{arguments.prompt_prefix}\n\n{b}" for b in (batch)], + inputs_show_user_array=[f"Show Nothing" for _ in (batch)], + llm_kwargs=llm_kwargs, + chatbot=chatbot, + history_array=[[] for _ in (batch)], + sys_prompt_array=[arguments.system_prompt for _ in (batch)], + max_workers=10 # OpenAI所允许的最大并行过载 + ) + + with open(txt+'.generated.json', 'a+', encoding='utf8') as f: + for b, r in zip(batch, res[1::2]): + f.write(json.dumps({"content":b, "summary":r}, ensure_ascii=False)+'\n') + + promote_file_to_downloadzone(txt+'.generated.json', rename_file='generated.json', chatbot=chatbot) + return diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index a10f3c2..a614aac 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -211,22 +211,30 @@ def test_Latex(): # # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # cli_printer.print(cb) # print(cb) +def test_chatglm_finetune(): + from crazy_functions.chatglm微调工具 import 微调数据集生成 + txt = 'build/dev.json' + plugin_kwargs = {"advanced_arg":"--llm_to_learn=gpt-3.5-turbo --prompt_prefix='根据下面的服装类型提示,想象一个穿着者,对这个人外貌、身处的环境、内心世界、人设进行描写。要求:100字以内,用第二人称。' --system_prompt=''" } + + for cookies, cb, hist, msg in (微调数据集生成)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) -# test_解析一个Python项目() -# test_Latex英文润色() -# test_Markdown中译英() -# test_批量翻译PDF文档() -# test_谷歌检索小助手() -# test_总结word文档() -# test_下载arxiv论文并翻译摘要() -# test_解析一个Cpp项目() -# test_联网回答问题() -# test_解析ipynb文件() -# test_数学动画生成manim() -# test_Langchain知识库() -# test_Langchain知识库读取() if __name__ == "__main__": - test_Latex() + # test_解析一个Python项目() + # test_Latex英文润色() + # test_Markdown中译英() + # test_批量翻译PDF文档() + # test_谷歌检索小助手() + # test_总结word文档() + # test_下载arxiv论文并翻译摘要() + # test_解析一个Cpp项目() + # test_联网回答问题() + # test_解析ipynb文件() + # test_数学动画生成manim() + # test_Langchain知识库() + # test_Langchain知识库读取() + # test_Latex() + test_chatglm_finetune() input("程序完成,回车退出。") print("退出。") \ No newline at end of file diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index a1b1493..ffe95e2 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -130,6 +130,11 @@ def request_gpt_model_in_new_thread_with_ui_alive( yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息 return final_result +def can_multi_process(llm): + if llm.startswith('gpt-'): return True + if llm.startswith('api2d-'): return True + if llm.startswith('azure-'): return True + return False def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( inputs_array, inputs_show_user_array, llm_kwargs, @@ -175,7 +180,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( except: max_workers = 8 if max_workers <= 0: max_workers = 3 # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿 - if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')): + if not can_multi_process(llm_kwargs['llm_model']): max_workers = 1 executor = ThreadPoolExecutor(max_workers=max_workers) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index be5a367..003d474 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -189,6 +189,18 @@ def rm_comments(main_file): main_file = re.sub(r'(?交互功能模板函数' # 赋予插件锁定 锁定插件回调路径,当下一次用户提交时,会直接转到该函数 + chatbot._cookies['plugin_state_0001'] = 'wait_user_keyword' # 赋予插件状态 + + chatbot.append(("第一次调用:", "请输入关键词, 我将为您查找相关壁纸, 建议使用英文单词, 插件锁定中,请直接提交即可。")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + if state == 'wait_user_keyword': + chatbot._cookies['lock_plugin'] = None # 解除插件锁定,避免遗忘导致死锁 + chatbot._cookies['plugin_state_0001'] = None # 解除插件状态,避免遗忘导致死锁 + + # 解除插件锁定 + chatbot.append((f"获取关键词:{txt}", "")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + page_return = get_image_page_by_keyword(txt) + inputs=inputs_show_user=f"Extract all image urls in this html page, pick the first 5 images and show them with markdown format: \n\n {page_return}" + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=inputs, inputs_show_user=inputs_show_user, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], + sys_prompt="When you want to show an image, use markdown format. e.g. ![image_description](image_url). If there are no image url provided, answer 'no image url provided'" + ) + chatbot[-1] = [chatbot[-1][0], gpt_say] + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + +# --------------------------------------------------------------------------------- + +def get_image_page_by_keyword(keyword): + import requests + from bs4 import BeautifulSoup + response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2) + res = "image urls: \n" + for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"): + try: + res += image_element["data-src"] + res += "\n" + except: + pass + return res diff --git a/crazy_functions/总结word文档.py b/crazy_functions/总结word文档.py index eada69d..5af6960 100644 --- a/crazy_functions/总结word文档.py +++ b/crazy_functions/总结word文档.py @@ -14,17 +14,19 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot doc = Document(fp) file_content = "\n".join([para.text for para in doc.paragraphs]) else: - import win32com.client - word = win32com.client.Dispatch("Word.Application") - word.visible = False - # 打开文件 - print('fp', os.getcwd()) - doc = word.Documents.Open(os.getcwd() + '/' + fp) - # file_content = doc.Content.Text - doc = word.ActiveDocument - file_content = doc.Range().Text - doc.Close() - word.Quit() + try: + import win32com.client + word = win32com.client.Dispatch("Word.Application") + word.visible = False + # 打开文件 + doc = word.Documents.Open(os.getcwd() + '/' + fp) + # file_content = doc.Content.Text + doc = word.ActiveDocument + file_content = doc.Range().Text + doc.Close() + word.Quit() + except: + raise RuntimeError('请先将.doc文档转换为.docx文档。') print(file_content) # private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名 diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py index cbda23b..fc65f5c 100644 --- a/crazy_functions/批量总结PDF文档.py +++ b/crazy_functions/批量总结PDF文档.py @@ -1,121 +1,107 @@ -from toolbox import update_ui +from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str from toolbox import CatchException, report_execption, write_results_to_file -import re -import unicodedata -fast_debug = False from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from .crazy_utils import read_and_clean_pdf_text +from .crazy_utils import input_clipping -def is_paragraph_break(match): - """ - 根据给定的匹配结果来判断换行符是否表示段落分隔。 - 如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。 - 也可以根据之前的内容长度来判断段落是否已经足够长。 - """ - prev_char, next_char = match.groups() - # 句子结束标志 - sentence_endings = ".!?" - - # 设定一个最小段落长度阈值 - min_paragraph_length = 140 - - if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length: - return "\n\n" - else: - return " " - -def normalize_text(text): - """ - 通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。 - 例如,将连字 "fi" 转换为 "f" 和 "i"。 - """ - # 对文本进行归一化处理,分解连字 - normalized_text = unicodedata.normalize("NFKD", text) - - # 替换其他特殊字符 - cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text) - - return cleaned_text - -def clean_text(raw_text): - """ - 对从 PDF 提取出的原始文本进行清洗和格式化处理。 - 1. 对原始文本进行归一化处理。 - 2. 替换跨行的连词 - 3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换 - """ - # 对文本进行归一化处理 - normalized_text = normalize_text(raw_text) - - # 替换跨行的连词 - text = re.sub(r'(\w+-\n\w+)', lambda m: m.group(1).replace('-\n', ''), normalized_text) - - # 根据前后相邻字符的特点,找到原文本中的换行符 - newlines = re.compile(r'(\S)\n(\S)') - - # 根据 heuristic 规则,用空格或段落分隔符替换原换行符 - final_text = re.sub(newlines, lambda m: m.group(1) + is_paragraph_break(m) + m.group(2), text) - - return final_text.strip() def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): - import time, glob, os, fitz - print('begin analysis on:', file_manifest) - for index, fp in enumerate(file_manifest): - with fitz.open(fp) as doc: - file_content = "" - for page in doc: - file_content += page.get_text() - file_content = clean_text(file_content) - print(file_content) + file_write_buffer = [] + for file_name in file_manifest: + print('begin analysis on:', file_name) + ############################## <第 0 步,切割PDF> ################################## + # 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割) + # 的长度必须小于 2500 个 Token + file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF + file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + + TOKEN_LIMIT_PER_FRAGMENT = 2500 - prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else "" - i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```' - i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}' - chatbot.append((i_say_show_user, "[Local Message] waiting gpt response.")) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] + def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) + paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( + txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) + page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( + txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + # 为了更好的效果,我们剥离Introduction之后的部分(如果有) + paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] + + ############################## <第 1 步,从摘要中提取高价值信息,放到history中> ################################## + final_results = [] + final_results.append(paper_meta) - if not fast_debug: - msg = '正常' - # ** gpt request ** - gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( - inputs=i_say, - inputs_show_user=i_say_show_user, - llm_kwargs=llm_kwargs, - chatbot=chatbot, - history=[], - sys_prompt="总结文章。" - ) # 带超时倒计时 - + ############################## <第 2 步,迭代地历遍整个文章,提取精炼信息> ################################## + i_say_show_user = f'首先你在中文语境下通读整篇论文。'; gpt_say = "[Local Message] 收到。" # 用户提示 + chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=[]) # 更新UI - chatbot[-1] = (i_say_show_user, gpt_say) - history.append(i_say_show_user); history.append(gpt_say) - yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 - if not fast_debug: time.sleep(2) + iteration_results = [] + last_iteration_result = paper_meta # 初始值是摘要 + MAX_WORD_TOTAL = 4096 * 0.7 + n_fragment = len(paper_fragments) + if n_fragment >= 20: print('文章极长,不能达到预期效果') + for i in range(n_fragment): + NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment + i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i]}" + i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i][:200]}" + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, # i_say=真正给chatgpt的提问, i_say_show_user=给用户看的提问 + llm_kwargs, chatbot, + history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果 + sys_prompt="Extract the main idea of this section with Chinese." # 提示 + ) + iteration_results.append(gpt_say) + last_iteration_result = gpt_say - all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)]) - i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。' - chatbot.append((i_say, "[Local Message] waiting gpt response.")) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - - if not fast_debug: - msg = '正常' - # ** gpt request ** + ############################## <第 3 步,整理history,提取总结> ################################## + final_results.extend(iteration_results) + final_results.append(f'Please conclude this paper discussed above。') + # This prompt is from https://github.com/kaixindelele/ChatPaper/blob/main/chat_paper.py + NUM_OF_WORD = 1000 + i_say = """ +1. Mark the title of the paper (with Chinese translation) +2. list all the authors' names (use English) +3. mark the first author's affiliation (output Chinese translation only) +4. mark the keywords of this article (use English) +5. link to the paper, Github code link (if available, fill in Github:None if not) +6. summarize according to the following four points.Be sure to use Chinese answers (proper nouns need to be marked in English) + - (1):What is the research background of this article? + - (2):What are the past methods? What are the problems with them? Is the approach well motivated? + - (3):What is the research methodology proposed in this paper? + - (4):On what task and what performance is achieved by the methods in this paper? Can the performance support their goals? +Follow the format of the output that follows: +1. Title: xxx\n\n +2. Authors: xxx\n\n +3. Affiliation: xxx\n\n +4. Keywords: xxx\n\n +5. Urls: xxx or xxx , xxx \n\n +6. Summary: \n\n + - (1):xxx;\n + - (2):xxx;\n + - (3):xxx;\n + - (4):xxx.\n\n +Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, +do not have too much repetitive information, numerical values using the original numbers. + """ + # This prompt is from https://github.com/kaixindelele/ChatPaper/blob/main/chat_paper.py + file_write_buffer.extend(final_results) + i_say, final_results = input_clipping(i_say, final_results, max_token_limit=2000) gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( - inputs=i_say, - inputs_show_user=i_say, - llm_kwargs=llm_kwargs, - chatbot=chatbot, - history=history, - sys_prompt="总结文章。" - ) # 带超时倒计时 + inputs=i_say, inputs_show_user='开始最终总结', + llm_kwargs=llm_kwargs, chatbot=chatbot, history=final_results, + sys_prompt= f"Extract the main idea of this paper with less than {NUM_OF_WORD} Chinese characters" + ) + final_results.append(gpt_say) + file_write_buffer.extend([i_say, gpt_say]) + ############################## <第 4 步,设置一个token上限> ################################## + _, final_results = input_clipping("", final_results, max_token_limit=3200) + yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了 - chatbot[-1] = (i_say, gpt_say) - history.append(i_say); history.append(gpt_say) - yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 - res = write_results_to_file(history) - chatbot.append(("完成了吗?", res)) - yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 + res = write_results_to_file(file_write_buffer, file_name=gen_time_str()) + promote_file_to_downloadzone(res.split('\t')[-1], chatbot=chatbot) + yield from update_ui(chatbot=chatbot, history=final_results) # 刷新界面 @CatchException @@ -151,10 +137,7 @@ def 批量总结PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst return # 搜索需要处理的文件清单 - file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \ - # [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \ - # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \ - # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)] + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # 如果没找到任何文件 if len(file_manifest) == 0: diff --git a/crazy_functions/询问多个大语言模型.py b/crazy_functions/询问多个大语言模型.py index ec9fd4a..9cb85d3 100644 --- a/crazy_functions/询问多个大语言模型.py +++ b/crazy_functions/询问多个大语言模型.py @@ -6,7 +6,7 @@ def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt """ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 - plugin_kwargs 插件模型的参数,如温度和top_p等,一般原样传递下去就行 + plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数 chatbot 聊天显示框的句柄,用于显示给用户 history 聊天历史,前情提要 system_prompt 给gpt的静默提醒 @@ -35,7 +35,7 @@ def 同时问询_指定模型(txt, llm_kwargs, plugin_kwargs, chatbot, history, """ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 - plugin_kwargs 插件模型的参数,如温度和top_p等,一般原样传递下去就行 + plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数 chatbot 聊天显示框的句柄,用于显示给用户 history 聊天历史,前情提要 system_prompt 给gpt的静默提醒 diff --git a/crazy_functions/高级功能函数模板.py b/crazy_functions/高级功能函数模板.py index 73ae45f..abcbbc6 100644 --- a/crazy_functions/高级功能函数模板.py +++ b/crazy_functions/高级功能函数模板.py @@ -1,13 +1,12 @@ from toolbox import CatchException, update_ui from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive -import datetime, re - +import datetime @CatchException def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): """ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 - plugin_kwargs 插件模型的参数,暂时没有用武之地 + plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数 chatbot 聊天显示框的句柄,用于显示给用户 history 聊天历史,前情提要 system_prompt 给gpt的静默提醒 @@ -19,34 +18,12 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s for i in range(5): currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day - i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?用中文列举两条,然后分别给出描述事件的两个英文单词。' + '当你给出关键词时,使用以下json格式:{"KeyWords":[EnglishKeyWord1,EnglishKeyWord2]}。' + i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。' gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( inputs=i_say, inputs_show_user=i_say, llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], - sys_prompt='输出格式示例:1908年,美国消防救援事业发展的“美国消防协会”成立。关键词:{"KeyWords":["Fire","American"]}。' + sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。" ) - gpt_say = get_images(gpt_say) chatbot[-1] = (i_say, gpt_say) history.append(i_say);history.append(gpt_say) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 - - -def get_images(gpt_say): - def get_image_by_keyword(keyword): - import requests - from bs4 import BeautifulSoup - response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2) - for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"): - if "data-src" in image_element: break - return image_element["data-src"] - - for keywords in re.findall('{"KeyWords":\[(.*?)\]}', gpt_say): - keywords = [n.strip('"') for n in keywords.split(',')] - try: - description = keywords[0] - url = get_image_by_keyword(keywords[0]) - img_tag = f"\n\n![{description}]({url})" - gpt_say += img_tag - except: - continue - return gpt_say \ No newline at end of file + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 0a0dcda..210312b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ version: '3' services: gpt_academic_nolocalllms: - image: ghcr.io/binary-husky/gpt_academic_nolocal:master + image: ghcr.io/binary-husky/gpt_academic_nolocal:master # (Auto Built by Dockerfile: docs/GithubAction+NoLocal) environment: # 请查阅 `config.py` 以查看所有的配置信息 API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ' @@ -33,7 +33,7 @@ services: version: '3' services: gpt_academic_with_chatglm: - image: ghcr.io/binary-husky/gpt_academic_chatglm_moss:master + image: ghcr.io/binary-husky/gpt_academic_chatglm_moss:master # (Auto Built by Dockerfile: docs/Dockerfile+ChatGLM) environment: # 请查阅 `config.py` 以查看所有的配置信息 API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,fkxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ' @@ -63,7 +63,7 @@ services: version: '3' services: gpt_academic_with_rwkv: - image: fuqingxu/gpt_academic:jittorllms # [option 2] 如果需要运行ChatGLM本地模型 + image: fuqingxu/gpt_academic:jittorllms environment: # 请查阅 `config.py` 以查看所有的配置信息 API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,fkxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ' @@ -111,7 +111,7 @@ services: version: '3' services: gpt_academic_with_latex: - image: ghcr.io/binary-husky/gpt_academic_with_latex:master + image: ghcr.io/binary-husky/gpt_academic_with_latex:master # (Auto Built by Dockerfile: docs/GithubAction+NoLocal+Latex) environment: # 请查阅 `config.py` 以查看所有的配置信息 API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ' diff --git a/docs/translate_english.json b/docs/translate_english.json index 57e008b..33faebc 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -1667,5 +1667,294 @@ "段音频的主要内容": "The main content of the segment audio is", "z$ 分别是空间直角坐标系中的三个坐标": "z$, respectively, are the three coordinates in the spatial rectangular coordinate system", "这个是怎么识别的呢我也不清楚": "I'm not sure how this is recognized", - "从现在起": "From now on" + "从现在起": "From now on", + "连接bing搜索回答问题": "ConnectBingSearchAnswerQuestion", + "联网的ChatGPT_bing版": "OnlineChatGPT_BingEdition", + "Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage", + "Langchain知识库": "LangchainKnowledgeBase", + "Latex英文纠错加PDF对比": "CorrectEnglishInLatexWithPDFComparison", + "Latex输出PDF结果": "OutputPDFFromLatex", + "Latex翻译中文并重新编译PDF": "TranslateChineseToEnglishInLatexAndRecompilePDF", + "sprint亮靛": "SprintIndigo", + "寻找Latex主文件": "FindLatexMainFile", + "专业词汇声明": "ProfessionalTerminologyDeclaration", + "Latex精细分解与转化": "DecomposeAndConvertLatex", + "编译Latex": "CompileLatex", + "如果您是论文原作者": "If you are the original author of the paper", + "正在编译对比PDF": "Compiling the comparison PDF", + "将 \\include 命令转换为 \\input 命令": "Converting the \\include command to the \\input command", + "取评分最高者返回": "Returning the highest-rated one", + "不要修改!! 高危设置!通过修改此设置": "Do not modify!! High-risk setting! By modifying this setting", + "Tex源文件缺失!": "Tex source file is missing!", + "6.25 加入判定latex模板的代码": "Added code to determine the latex template on June 25", + "正在精细切分latex文件": "Finely splitting the latex file", + "获取response失败": "Failed to get response", + "手动指定语言": "Manually specify the language", + "输入arxivID": "Enter arxivID", + "对输入的word文档进行摘要生成": "Generate a summary of the input word document", + "将指定目录下的PDF文件从英文翻译成中文": "Translate PDF files from English to Chinese in the specified directory", + "如果分析错误": "If the analysis is incorrect", + "尝试第": "Try the", + "用户填3": "User fills in 3", + "请在此处追加更细致的矫错指令": "Please append more detailed correction instructions here", + "为了防止大语言模型的意外谬误产生扩散影响": "To prevent the accidental spread of errors in large language models", + "前面是中文冒号": "The colon before is in Chinese", + "内含已经翻译的Tex文档": "Contains a Tex document that has been translated", + "成功啦": "Success!", + "刷新页面即可以退出UpdateKnowledgeArchive模式": "Refresh the page to exit UpdateKnowledgeArchive mode", + "或者不在环境变量PATH中": "Or not in the environment variable PATH", + "--读取文件": "--Read the file", + "才能继续下面的步骤": "To continue with the next steps", + "代理数据解析失败": "Proxy data parsing failed", + "详见项目主README.md": "See the main README.md of the project for details", + "临时存储用于调试": "Temporarily stored for debugging", + "屏蔽空行和太短的句子": "Filter out empty lines and sentences that are too short", + "gpt 多线程请求": "GPT multi-threaded request", + "编译已经开始": "Compilation has started", + "无法找到一个主Tex文件": "Cannot find a main Tex file", + "修复括号": "Fix parentheses", + "请您不要删除或修改这行警告": "Please do not delete or modify this warning", + "请登录OpenAI查看详情 https": "Please log in to OpenAI to view details at https", + "调用函数": "Call a function", + "请查看终端的输出或耐心等待": "Please check the output in the terminal or wait patiently", + "LatexEnglishCorrection+高亮修正位置": "Latex English correction + highlight correction position", + "行": "line", + "Newbing 请求失败": "Newbing request failed", + "转化PDF编译是否成功": "Check if the conversion to PDF and compilation were successful", + "建议更换代理协议": "Recommend changing the proxy protocol", + "========================================= 插件主程序1 =====================================================": "========================================= Plugin Main Program 1 =====================================================", + "终端": "terminal", + "请先上传文件素材": "Please upload file materials first", + "前面是中文逗号": "There is a Chinese comma in front", + "请尝试把以下指令复制到高级参数区": "Please try copying the following instructions to the advanced parameters section", + "翻译-": "Translation -", + "请耐心等待": "Please be patient", + "将前后断行符脱离": "Remove line breaks before and after", + "json等": "JSON, etc.", + "生成中文PDF": "Generate Chinese PDF", + "用红色标注处保留区": "Use red color to highlight the reserved area", + "对比PDF编译是否成功": "Compare if the PDF compilation was successful", + "回答完问题后": "After answering the question", + "其他操作系统表现未知": "Unknown performance on other operating systems", + "-构建知识库": "Build knowledge base", + "还原原文": "Restore original text", + "或者重启之后再度尝试": "Or try again after restarting", + "免费": "Free", + "仅在Windows系统进行了测试": "Tested only on Windows system", + "欢迎加REAME中的QQ联系开发者": "Feel free to contact the developer via QQ in REAME", + "当前知识库内的有效文件": "Valid files in the current knowledge base", + "您可以到Github Issue区": "You can go to the Github Issue area", + "刷新Gradio前端界面": "Refresh the Gradio frontend interface", + "吸收title与作者以上的部分": "Include the title and the above part of the author", + "给出一些判定模板文档的词作为扣分项": "Provide some words in the template document as deduction items", + "--读取参数": "-- Read parameters", + "然后进行问答": "And then perform question-answering", + "根据自然语言执行插件命令": "Execute plugin commands based on natural language", + "*{\\scriptsize\\textbf{警告": "*{\\scriptsize\\textbf{Warning", + "但请查收结果": "But please check the results", + "翻译内容可靠性无保障": "No guarantee of translation accuracy", + "寻找主文件": "Find the main file", + "消耗时间的函数": "Time-consuming function", + "当前语言模型温度设定": "Current language model temperature setting", + "这需要一段时间计算": "This requires some time to calculate", + "为啥chatgpt会把cite里面的逗号换成中文逗号呀": "Why does ChatGPT change commas inside 'cite' to Chinese commas?", + "发现已经存在翻译好的PDF文档": "Found an already translated PDF document", + "待提取的知识库名称id": "Knowledge base name ID to be extracted", + "文本碎片重组为完整的tex片段": "Reassemble text fragments into complete tex fragments", + "注意事项": "Notes", + "参数说明": "Parameter description", + "或代理节点": "Or proxy node", + "构建知识库": "Building knowledge base", + "报错信息如下. 如果是与网络相关的问题": "Error message as follows. If it is related to network issues", + "功能描述": "Function description", + "禁止移除或修改此警告": "Removal or modification of this warning is prohibited", + "Arixv翻译": "Arixv translation", + "读取优先级": "Read priority", + "包含documentclass关键字": "Contains the documentclass keyword", + "根据文本使用GPT模型生成相应的图像": "Generate corresponding images using GPT model based on the text", + "图像生成所用到的提示文本": "Prompt text used for image generation", + "Your account is not active. OpenAI以账户失效为由": "Your account is not active. OpenAI states that it is due to account expiration", + "快捷的调试函数": "Convenient debugging function", + "在多Tex文档中": "In multiple Tex documents", + "因此选择GenerateImage函数": "Therefore, choose the GenerateImage function", + "当前工作路径为": "The current working directory is", + "实际得到格式": "Obtained format in reality", + "这段代码定义了一个名为TempProxy的空上下文管理器": "This code defines an empty context manager named TempProxy", + "吸收其他杂项": "Absorb other miscellaneous items", + "请输入要翻译成哪种语言": "Please enter which language to translate into", + "的单词": "of the word", + "正在尝试自动安装": "Attempting automatic installation", + "如果有必要": "If necessary", + "开始下载": "Start downloading", + "项目Github地址 \\url{https": "Project GitHub address \\url{https", + "将根据报错信息修正tex源文件并重试": "The Tex source file will be corrected and retried based on the error message", + "发送至azure openai api": "Send to Azure OpenAI API", + "吸收匿名公式": "Absorb anonymous formulas", + "用该压缩包+ConversationHistoryArchive进行反馈": "Provide feedback using the compressed package + ConversationHistoryArchive", + "需要特殊依赖": "Requires special dependencies", + "还原部分原文": "Restore part of the original text", + "构建完成": "Build completed", + "解析arxiv网址失败": "Failed to parse arXiv URL", + "输入问题后点击该插件": "Click the plugin after entering the question", + "请求子进程": "Requesting subprocess", + "请务必用 pip install -r requirements.txt 指令安装依赖": "Please make sure to install the dependencies using the 'pip install -r requirements.txt' command", + "如果程序停顿5分钟以上": "If the program pauses for more than 5 minutes", + "转化PDF编译已经成功": "Conversion to PDF compilation was successful", + "虽然PDF生成失败了": "Although PDF generation failed", + "分析上述回答": "Analyze the above answer", + "吸收在42行以内的begin-end组合": "Absorb the begin-end combination within 42 lines", + "推荐http": "Recommend http", + "Latex没有安装": "Latex is not installed", + "用latex编译为PDF对修正处做高亮": "Compile to PDF using LaTeX and highlight the corrections", + "reverse 操作必须放在最后": "'reverse' operation must be placed at the end", + "AZURE OPENAI API拒绝了请求": "AZURE OPENAI API rejected the request", + "该项目的Latex主文件是": "The main LaTeX file of this project is", + "You are associated with a deactivated account. OpenAI以账户失效为由": "You are associated with a deactivated account. OpenAI considers it as an account expiration", + "它*必须*被包含在AVAIL_LLM_MODELS列表中": "It *must* be included in the AVAIL_LLM_MODELS list", + "未知指令": "Unknown command", + "尝试执行Latex指令失败": "Failed to execute the LaTeX command", + "摘要生成后的文档路径": "Path of the document after summary generation", + "GPT结果已输出": "GPT result has been outputted", + "使用Newbing": "Using Newbing", + "其他模型转化效果未知": "Unknown conversion effect of other models", + "P.S. 但愿没人把latex模板放在里面传进来": "P.S. Hopefully, no one passes a LaTeX template in it", + "定位主Latex文件": "Locate the main LaTeX file", + "后面是英文冒号": "English colon follows", + "文档越长耗时越长": "The longer the document, the longer it takes.", + "压缩包": "Compressed file", + "但通常不会出现在正文": "But usually does not appear in the body.", + "正在预热文本向量化模组": "Preheating text vectorization module", + "5刀": "5 dollars", + "提问吧! 但注意": "Ask questions! But be careful", + "发送至AZURE OPENAI API": "Send to AZURE OPENAI API", + "请仔细鉴别并以原文为准": "Please carefully verify and refer to the original text", + "如果需要使用AZURE 详情请见额外文档 docs\\use_azure.md": "If you need to use AZURE, please refer to the additional document docs\\use_azure.md for details", + "使用正则表达式查找半行注释": "Use regular expressions to find inline comments", + "只有第二步成功": "Only the second step is successful", + "P.S. 顺便把CTEX塞进去以支持中文": "P.S. By the way, include CTEX to support Chinese", + "安装方法https": "Installation method: https", + "则跳过GPT请求环节": "Then skip the GPT request process", + "请切换至“UpdateKnowledgeArchive”插件进行知识库访问": "Please switch to the 'UpdateKnowledgeArchive' plugin for knowledge base access", + "=================================== 工具函数 ===============================================": "=================================== Utility functions ===============================================", + "填入azure openai api的密钥": "Fill in the Azure OpenAI API key", + "上传Latex压缩包": "Upload LaTeX compressed file", + "远程云服务器部署": "Deploy to remote cloud server", + "用黑色标注转换区": "Use black color to annotate the conversion area", + "音频文件的路径": "Path to the audio file", + "必须包含documentclass": "Must include documentclass", + "再列出用户可能提出的三个问题": "List three more questions that the user might ask", + "根据需要切换prompt": "Switch the prompt as needed", + "将文件复制一份到下载区": "Make a copy of the file in the download area", + "次编译": "Second compilation", + "Latex文件融合完成": "LaTeX file merging completed", + "返回": "Return", + "后面是英文逗号": "Comma after this", + "对不同latex源文件扣分": "Deduct points for different LaTeX source files", + "失败啦": "Failed", + "编译BibTex": "Compile BibTeX", + "Linux下必须使用Docker安装": "Must install using Docker on Linux", + "报错信息": "Error message", + "删除或修改歧义文件": "Delete or modify ambiguous files", + "-预热文本向量化模组": "- Preheating text vectorization module", + "将每次对话记录写入Markdown格式的文件中": "Write each conversation record into a file in Markdown format", + "其他类型文献转化效果未知": "Unknown conversion effect for other types of literature", + "获取线程锁": "Acquire thread lock", + "使用英文": "Use English", + "如果存在调试缓存文件": "If there is a debug cache file", + "您需要首先调用构建知识库": "You need to call the knowledge base building first", + "原始PDF编译是否成功": "Whether the original PDF compilation is successful", + "生成 azure openai api请求": "Generate Azure OpenAI API requests", + "正在编译PDF": "Compiling PDF", + "仅调试": "Debug only", + "========================================= 插件主程序2 =====================================================": "========================================= Plugin Main Program 2 =====================================================", + "多线程翻译开始": "Multithreaded translation begins", + "出问题了": "There is a problem", + "版权归原文作者所有": "Copyright belongs to the original author", + "当前大语言模型": "Current large language model", + "目前对机器学习类文献转化效果最好": "Currently, the best conversion effect for machine learning literature", + "这个paper有个input命令文件名大小写错误!": "This paper has an input command with a filename case error!", + "期望格式例如": "Expected format, for example", + "解决部分词汇翻译不准确的问题": "Resolve the issue of inaccurate translation for some terms", + "待注入的知识库名称id": "Name/ID of the knowledge base to be injected", + "精细切分latex文件": "Fine-grained segmentation of LaTeX files", + "永远给定None": "Always given None", + "work_folder = Latex预处理": "work_folder = LaTeX preprocessing", + "请直接去该路径下取回翻译结果": "Please directly go to the path to retrieve the translation results", + "寻找主tex文件": "Finding the main .tex file", + "模型参数": "Model parameters", + "返回找到的第一个": "Return the first one found", + "编译转化后的PDF": "Compile the converted PDF", + "\\SEAFILE_LOCALŅ03047\\我的资料库\\music\\Akie秋绘-未来轮廓.mp3": "\\SEAFILE_LOCALŅ03047\\My Library\\music\\Akie秋绘-未来轮廓.mp3", + "拆分过长的latex片段": "Splitting overly long LaTeX fragments", + "没有找到任何可读取文件": "No readable files found", + "暗色模式 / 亮色模式": "Dark mode / Light mode", + "检测到arxiv文档连接": "Detected arXiv document link", + "此插件Windows支持最佳": "This plugin has best support for Windows", + "from crazy_functions.虚空终端 import 终端": "from crazy_functions.null_terminal import Terminal", + "本地论文翻译": "Local paper translation", + "输出html调试文件": "Output HTML debugging file", + "以下所有配置也都支持利用环境变量覆写": "All the following configurations can also be overridden using environment variables", + "PDF文件所在的路径": "Path of the PDF file", + "也是可读的": "It is also readable", + "将消耗较长时间下载中文向量化模型": "Downloading Chinese vectorization model will take a long time", + "环境变量配置格式见docker-compose.yml": "See docker-compose.yml for the format of environment variable configuration", + "编译文献交叉引用": "Compile bibliographic cross-references", + "默认为default": "Default is 'default'", + "或者使用此插件继续上传更多文件": "Or use this plugin to continue uploading more files", + "该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成": "This PDF is generated by the GPT-Academic open-source project using a large language model + LaTeX translation plugin", + "使用latexdiff生成论文转化前后对比": "Use latexdiff to generate before and after comparison of paper transformation", + "正在编译PDF文档": "Compiling PDF document", + "读取config.py文件中关于AZURE OPENAI API的信息": "Read the information about AZURE OPENAI API from the config.py file", + "配置教程&视频教程": "Configuration tutorial & video tutorial", + "临时地启动代理网络": "Temporarily start proxy network", + "临时地激活代理网络": "Temporarily activate proxy network", + "功能尚不稳定": "Functionality is unstable", + "默认为Chinese": "Default is Chinese", + "请查收结果": "Please check the results", + "将 chatglm 直接对齐到 chatglm2": "Align chatglm directly to chatglm2", + "中读取数据构建知识库": "Build a knowledge base by reading data in", + "用于给一小段代码上代理": "Used to proxy a small piece of code", + "分析结果": "Analysis results", + "依赖不足": "Insufficient dependencies", + "Markdown翻译": "Markdown translation", + "除非您是论文的原作者": "Unless you are the original author of the paper", + "test_LangchainKnowledgeBase读取": "test_LangchainKnowledgeBase read", + "将多文件tex工程融合为一个巨型tex": "Merge multiple tex projects into one giant tex", + "吸收iffalse注释": "Absorb iffalser comments", + "您接下来不能再使用其他插件了": "You can no longer use other plugins next", + "正在构建知识库": "Building knowledge base", + "需Latex": "Requires Latex", + "即找不到": "That is not found", + "保证括号正确": "Ensure parentheses are correct", + "= 2 通过一些Latex模板中常见": "= 2 through some common Latex templates", + "请立即终止程序": "Please terminate the program immediately", + "解压失败! 需要安装pip install rarfile来解压rar文件": "Decompression failed! Install 'pip install rarfile' to decompress rar files", + "请在此处给出自定义翻译命令": "Please provide custom translation command here", + "解压失败! 需要安装pip install py7zr来解压7z文件": "Decompression failed! Install 'pip install py7zr' to decompress 7z files", + "执行错误": "Execution error", + "目前仅支持GPT3.5/GPT4": "Currently only supports GPT3.5/GPT4", + "P.S. 顺便把Latex的注释去除": "P.S. Also remove comments from Latex", + "写出文件": "Write out the file", + "当前报错的latex代码处于第": "The current error in the LaTeX code is on line", + "主程序即将开始": "Main program is about to start", + "详情信息见requirements.txt": "See details in requirements.txt", + "释放线程锁": "Release thread lock", + "由于最为关键的转化PDF编译失败": "Due to the critical failure of PDF conversion and compilation", + "即将退出": "Exiting soon", + "尝试下载": "Attempting to download", + "删除整行的空注释": "Remove empty comments from the entire line", + "也找不到": "Not found either", + "从一批文件": "From a batch of files", + "编译结束": "Compilation finished", + "调用缓存": "Calling cache", + "只有GenerateImage和生成图像相关": "Only GenerateImage and image generation related", + "待处理的word文档路径": "Path of the word document to be processed", + "是否在提交时自动清空输入框": "Whether to automatically clear the input box upon submission", + "检查结果": "Check the result", + "生成时间戳": "Generate a timestamp", + "编译原始PDF": "Compile the original PDF", + "填入ENGINE": "Fill in ENGINE", + "填入api版本": "Fill in the API version", + "中文Bing版": "Chinese Bing version", + "当前支持的格式包括": "Currently supported formats include" } \ No newline at end of file diff --git a/docs/use_azure.md b/docs/use_azure.md index f1c27ef..f7e7b77 100644 --- a/docs/use_azure.md +++ b/docs/use_azure.md @@ -90,62 +90,29 @@ 到现在为止,申请操作就完成了,需要记下来的有下面几个东西: -● 密钥(1或2都可以) +● 密钥(对应AZURE_API_KEY,1或2都可以) -● 终结点 +● 终结点 (对应AZURE_ENDPOINT) + +● 部署名(对应AZURE_ENGINE,不是模型名) -● 部署名(不是模型名) # 修改 config.py ``` -AZURE_ENDPOINT = "填入终结点" +LLM_MODEL = "azure-gpt-3.5" # 指定启动时的默认模型,当然事后从下拉菜单选也ok + +AZURE_ENDPOINT = "填入终结点" # 见上述图片 AZURE_API_KEY = "填入azure openai api的密钥" AZURE_API_VERSION = "2023-05-15" # 默认使用 2023-05-15 版本,无需修改 -AZURE_ENGINE = "填入部署名" - -``` -# API的使用 - -接下来就是具体怎么使用API了,还是可以参考官方文档:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python) - -和openai自己的api调用有点类似,都需要安装openai库,不同的是调用方式 - -``` -import openai -openai.api_type = "azure" #固定格式,无需修改 -openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") #这里填入“终结点” -openai.api_version = "2023-05-15" #固定格式,无需修改 -openai.api_key = os.getenv("AZURE_OPENAI_KEY") #这里填入“密钥1”或“密钥2” - -response = openai.ChatCompletion.create( - engine="gpt-35-turbo", #这里填入的不是模型名,是部署名 - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"}, - {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."}, - {"role": "user", "content": "Do other Azure Cognitive Services support this too?"} - ] -) - -print(response) -print(response['choices'][0]['message']['content']) +AZURE_ENGINE = "填入部署名" # 见上述图片 ``` -需要注意的是: - -1.  engine那里填入的是部署名,不是模型名 - -2.  通过openai库获得的这个 response 和通过 request 库访问 url 获得的 response 不同,不需要 decode,已经是解析好的 json 了,直接根据键值读取即可。 - -更细节的使用方法,详见官方API文档。 # 关于费用 -Azure OpenAI API 还是需要一些费用的(免费订阅只有1个月有效期),费用如下: - -![image.png](https://note.youdao.com/yws/res/18095/WEBRESOURCEeba0ab6d3127b79e143ef2d5627c0e44) +Azure OpenAI API 还是需要一些费用的(免费订阅只有1个月有效期) 具体可以可以看这个网址 :[Azure OpenAI 服务 - 定价| Microsoft Azure](https://azure.microsoft.com/zh-cn/pricing/details/cognitive-services/openai-service/?cdn=disable) diff --git a/main.py b/main.py index b8355cb..13c152f 100644 --- a/main.py +++ b/main.py @@ -4,13 +4,12 @@ def main(): import gradio as gr if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt" from request_llm.bridge_all import predict - from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith + from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 - proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS = \ - get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS') + proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \ + get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT') ENABLE_AUDIO, AUTO_CLEAR_TXT = \ get_conf('ENABLE_AUDIO', 'AUTO_CLEAR_TXT') - # 如果WEB_PORT是-1, 则随机选取WEB端口 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT if not AUTHENTICATION: AUTHENTICATION = None @@ -47,23 +46,23 @@ def main(): proxy_info = check_proxy(proxies) gr_L1 = lambda: gr.Row().style() - gr_L2 = lambda scale: gr.Column(scale=scale) + gr_L2 = lambda scale, elem_id: gr.Column(scale=scale, elem_id=elem_id) if LAYOUT == "TOP-DOWN": gr_L1 = lambda: DummyWith() - gr_L2 = lambda scale: gr.Row() + gr_L2 = lambda scale, elem_id: gr.Row() CHATBOT_HEIGHT /= 2 cancel_handles = [] with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo: gr.HTML(title_html) - cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL}) + cookies = gr.State(load_chat_cookies()) with gr_L1(): - with gr_L2(scale=2): - chatbot = gr.Chatbot(label=f"当前模型:{LLM_MODEL}") - chatbot.style(height=CHATBOT_HEIGHT) + with gr_L2(scale=2, elem_id="gpt-chat"): + chatbot = gr.Chatbot(label=f"当前模型:{LLM_MODEL}", elem_id="gpt-chatbot") + if LAYOUT == "TOP-DOWN": chatbot.style(height=CHATBOT_HEIGHT) history = gr.State([]) - with gr_L2(scale=1): - with gr.Accordion("输入区", open=True) as area_input_primary: + with gr_L2(scale=1, elem_id="gpt-panel"): + with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary: with gr.Row(): txt = gr.Textbox(show_label=False, placeholder="Input question here.").style(container=False) with gr.Row(): @@ -76,14 +75,14 @@ def main(): with gr.Row(): audio_mic = gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False) with gr.Row(): - status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}") - with gr.Accordion("基础功能区", open=True) as area_basic_fn: + status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel") + with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn: with gr.Row(): for k in functional: if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue variant = functional[k]["Color"] if "Color" in functional[k] else "secondary" functional[k]["Button"] = gr.Button(k, variant=variant) - with gr.Accordion("函数插件区", open=True) as area_crazy_fn: + with gr.Accordion("函数插件区", open=True, elem_id="plugin-panel") as area_crazy_fn: with gr.Row(): gr.Markdown("插件可读取“输入区”文本/路径作为参数(上传文件自动修正路径)") with gr.Row(): @@ -105,7 +104,7 @@ def main(): with gr.Row(): with gr.Accordion("点击展开“文件上传区”。上传本地文件/压缩包供函数插件调用。", open=False) as area_file_up: file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple") - with gr.Accordion("更换模型 & SysPrompt & 交互界面布局", open=(LAYOUT == "TOP-DOWN")): + with gr.Accordion("更换模型 & SysPrompt & 交互界面布局", open=(LAYOUT == "TOP-DOWN"), elem_id="interact-panel"): system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt) top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",) temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",) @@ -114,7 +113,7 @@ def main(): md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False) gr.Markdown(description) - with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary: + with gr.Accordion("备选输入区", open=True, visible=False, elem_id="input-panel2") as area_input_secondary: with gr.Row(): txt2 = gr.Textbox(show_label=False, placeholder="Input question here.", label="输入区2").style(container=False) with gr.Row(): @@ -181,9 +180,9 @@ def main(): return {chatbot: gr.update(label="当前模型:"+k)} md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] ) # 随变按钮的回调函数注册 - def route(k, *args, **kwargs): + def route(request: gr.Request, k, *args, **kwargs): if k in [r"打开插件列表", r"请先从插件列表中选择"]: return - yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs) + yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(request, *args, **kwargs) click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo) click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot]) cancel_handles.append(click_handle) @@ -191,12 +190,6 @@ def main(): stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) - def init_cookie(cookies, chatbot): - # 为每一位访问的用户赋予一个独一无二的uuid编码 - cookies.update({'uuid': uuid.uuid4()}) - return cookies - demo.load(init_cookie, inputs=[cookies, chatbot], outputs=[cookies]) - if ENABLE_AUDIO: from crazy_functions.live_audio.audio_io import RealtimeAudioDistribution rad = RealtimeAudioDistribution() @@ -204,6 +197,14 @@ def main(): rad.feed(cookies['uuid'].hex, audio) audio_mic.stream(deal_audio, inputs=[audio_mic, cookies]) + def init_cookie(cookies, chatbot): + # 为每一位访问的用户赋予一个独一无二的uuid编码 + cookies.update({'uuid': uuid.uuid4()}) + return cookies + demo.load(init_cookie, inputs=[cookies, chatbot], outputs=[cookies]) + demo.load(lambda: 0, inputs=None, outputs=None, _js='()=>{ChatBotHeight();}') + + # gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数 def auto_opentab_delay(): import threading, webbrowser, time diff --git a/multi_language.py b/multi_language.py index 6c72598..b4a8834 100644 --- a/multi_language.py +++ b/multi_language.py @@ -33,7 +33,7 @@ import pickle import time CACHE_FOLDER = "gpt_log" -blacklist = ['multi-language', 'gpt_log', '.git', 'private_upload', 'multi_language.py'] +blacklist = ['multi-language', 'gpt_log', '.git', 'private_upload', 'multi_language.py', 'build', '.github', '.vscode', '__pycache__', 'venv'] # LANG = "TraditionalChinese" # TransPrompt = f"Replace each json value `#` with translated results in Traditional Chinese, e.g., \"原始文本\":\"翻譯後文字\". Keep Json format. Do not answer #." @@ -301,6 +301,7 @@ def step_1_core_key_translate(): elif isinstance(node, ast.ImportFrom): for n in node.names: if contains_chinese(n.name): syntax.append(n.name) + # if node.module is None: print(node.module) for k in node.module.split('.'): if contains_chinese(k): syntax.append(k) return syntax @@ -310,6 +311,7 @@ def step_1_core_key_translate(): for root, dirs, files in os.walk(directory_path): if any([b in root for b in blacklist]): continue + print(files) for file in files: if file.endswith('.py'): file_path = os.path.join(root, file) @@ -505,6 +507,6 @@ def step_2_core_key_translate(): with open(file_path_new, 'w', encoding='utf-8') as f: f.write(content) os.remove(file_path) - step_1_core_key_translate() step_2_core_key_translate() +print('Finished, checkout generated results at ./multi-language/') \ No newline at end of file diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index d33f161..13f49bd 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -16,15 +16,9 @@ from toolbox import get_conf, trimmed_format_exc from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui from .bridge_chatgpt import predict as chatgpt_ui -from .bridge_azure_test import predict_no_ui_long_connection as azure_noui -from .bridge_azure_test import predict as azure_ui - from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui from .bridge_chatglm import predict as chatglm_ui -from .bridge_newbing import predict_no_ui_long_connection as newbing_noui -from .bridge_newbing import predict as newbing_ui - # from .bridge_tgui import predict_no_ui_long_connection as tgui_noui # from .bridge_tgui import predict as tgui_ui @@ -51,10 +45,11 @@ class LazyloadTiktoken(object): return encoder.decode(*args, **kwargs) # Endpoint 重定向 -API_URL_REDIRECT, = get_conf("API_URL_REDIRECT") +API_URL_REDIRECT, AZURE_ENDPOINT, AZURE_ENGINE = get_conf("API_URL_REDIRECT", "AZURE_ENDPOINT", "AZURE_ENGINE") openai_endpoint = "https://api.openai.com/v1/chat/completions" api2d_endpoint = "https://openai.api2d.net/v1/chat/completions" newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub" +azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15' # 兼容旧版的配置 try: API_URL, = get_conf("API_URL") @@ -124,10 +119,10 @@ model_info = { }, # azure openai - "azure-gpt35":{ - "fn_with_ui": azure_ui, - "fn_without_ui": azure_noui, - "endpoint": get_conf("AZURE_ENDPOINT"), + "azure-gpt-3.5":{ + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": azure_endpoint, "max_token": 4096, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, @@ -169,16 +164,6 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, - - # newbing - "newbing": { - "fn_with_ui": newbing_ui, - "fn_without_ui": newbing_noui, - "endpoint": newbing_endpoint, - "max_token": 4096, - "tokenizer": tokenizer_gpt35, - "token_cnt": get_token_num_gpt35, - }, } @@ -267,6 +252,23 @@ if "newbing-free" in AVAIL_LLM_MODELS: }) except: print(trimmed_format_exc()) +if "newbing" in AVAIL_LLM_MODELS: # same with newbing-free + try: + from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui + from .bridge_newbingfree import predict as newbingfree_ui + # claude + model_info.update({ + "newbing": { + "fn_with_ui": newbingfree_ui, + "fn_without_ui": newbingfree_noui, + "endpoint": newbing_endpoint, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + } + }) + except: + print(trimmed_format_exc()) def LLM_CATCH_EXCEPTION(f): """ diff --git a/request_llm/bridge_azure_test.py b/request_llm/bridge_azure_test.py deleted file mode 100644 index edc68f7..0000000 --- a/request_llm/bridge_azure_test.py +++ /dev/null @@ -1,241 +0,0 @@ -""" - 该文件中主要包含三个函数 - - 不具备多线程能力的函数: - 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 - - 具备多线程调用能力的函数 - 2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑 - 3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 -""" - -import logging -import traceback -import importlib -import openai -import time - - -# 读取config.py文件中关于AZURE OPENAI API的信息 -from toolbox import get_conf, update_ui, clip_history, trimmed_format_exc -TIMEOUT_SECONDS, MAX_RETRY, AZURE_ENGINE, AZURE_ENDPOINT, AZURE_API_VERSION, AZURE_API_KEY = \ - get_conf('TIMEOUT_SECONDS', 'MAX_RETRY',"AZURE_ENGINE","AZURE_ENDPOINT", "AZURE_API_VERSION", "AZURE_API_KEY") - - -def get_full_error(chunk, stream_response): - """ - 获取完整的从Openai返回的报错 - """ - while True: - try: - chunk += next(stream_response) - except: - break - return chunk - -def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): - """ - 发送至azure openai api,流式获取输出。 - 用于基础的对话功能。 - inputs 是本次问询的输入 - top_p, temperature是chatGPT的内部调优参数 - history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) - chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 - additional_fn代表点击的哪个按钮,按钮见functional.py - """ - print(llm_kwargs["llm_model"]) - - if additional_fn is not None: - import core_functional - importlib.reload(core_functional) # 热更新prompt - core_functional = core_functional.get_core_functions() - if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) - inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] - - raw_input = inputs - logging.info(f'[raw_input] {raw_input}') - chatbot.append((inputs, "")) - yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 - - - payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream) - - history.append(inputs); history.append("") - - retry = 0 - while True: - try: - - openai.api_type = "azure" - openai.api_version = AZURE_API_VERSION - openai.api_base = AZURE_ENDPOINT - openai.api_key = AZURE_API_KEY - response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break - - except: - retry += 1 - chatbot[-1] = ((chatbot[-1][0], "获取response失败,重试中。。。")) - retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else "" - yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面 - if retry > MAX_RETRY: raise TimeoutError - - gpt_replying_buffer = "" - is_head_of_the_stream = True - if stream: - - stream_response = response - - while True: - try: - chunk = next(stream_response) - - except StopIteration: - from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```' - chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk)}") - yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk) # 刷新界面 - return - - if is_head_of_the_stream and (r'"object":"error"' not in chunk): - # 数据流的第一帧不携带content - is_head_of_the_stream = False; continue - - if chunk: - #print(chunk) - try: - if "delta" in chunk["choices"][0]: - if chunk["choices"][0]["finish_reason"] == "stop": - logging.info(f'[response] {gpt_replying_buffer}') - break - status_text = f"finish_reason: {chunk['choices'][0]['finish_reason']}" - gpt_replying_buffer = gpt_replying_buffer + chunk["choices"][0]["delta"]["content"] - - history[-1] = gpt_replying_buffer - chatbot[-1] = (history[-2], history[-1]) - yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面 - - except Exception as e: - traceback.print_exc() - yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面 - chunk = get_full_error(chunk, stream_response) - - error_msg = chunk - yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 - return - - -def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): - """ - 发送至AZURE OPENAI API,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 - inputs: - 是本次问询的输入 - sys_prompt: - 系统静默prompt - llm_kwargs: - chatGPT的内部调优参数 - history: - 是之前的对话列表 - observe_window = None: - 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 - """ - watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 - payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True) - retry = 0 - while True: - - try: - openai.api_type = "azure" - openai.api_version = AZURE_API_VERSION - openai.api_base = AZURE_ENDPOINT - openai.api_key = AZURE_API_KEY - response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break - - except: - retry += 1 - traceback.print_exc() - if retry > MAX_RETRY: raise TimeoutError - if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') - - - stream_response = response - result = '' - while True: - try: chunk = next(stream_response) - except StopIteration: - break - except: - chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 - - if len(chunk)==0: continue - if not chunk.startswith('data:'): - error_msg = get_full_error(chunk, stream_response) - if "reduce the length" in error_msg: - raise ConnectionAbortedError("AZURE OPENAI API拒绝了请求:" + error_msg) - else: - raise RuntimeError("AZURE OPENAI API拒绝了请求:" + error_msg) - if ('data: [DONE]' in chunk): break - - delta = chunk["delta"] - if len(delta) == 0: break - if "role" in delta: continue - if "content" in delta: - result += delta["content"] - if not console_slience: print(delta["content"], end='') - if observe_window is not None: - # 观测窗,把已经获取的数据显示出去 - if len(observe_window) >= 1: observe_window[0] += delta["content"] - # 看门狗,如果超过期限没有喂狗,则终止 - if len(observe_window) >= 2: - if (time.time()-observe_window[1]) > watch_dog_patience: - raise RuntimeError("用户取消了程序。") - else: raise RuntimeError("意外Json结构:"+delta) - if chunk['finish_reason'] == 'length': - raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。") - return result - - -def generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream): - """ - 整合所有信息,选择LLM模型,生成 azure openai api请求,为发送请求做准备 - """ - - conversation_cnt = len(history) // 2 - - messages = [{"role": "system", "content": system_prompt}] - if conversation_cnt: - for index in range(0, 2*conversation_cnt, 2): - what_i_have_asked = {} - what_i_have_asked["role"] = "user" - what_i_have_asked["content"] = history[index] - what_gpt_answer = {} - what_gpt_answer["role"] = "assistant" - what_gpt_answer["content"] = history[index+1] - if what_i_have_asked["content"] != "": - if what_gpt_answer["content"] == "": continue - messages.append(what_i_have_asked) - messages.append(what_gpt_answer) - else: - messages[-1]['content'] = what_gpt_answer['content'] - - what_i_ask_now = {} - what_i_ask_now["role"] = "user" - what_i_ask_now["content"] = inputs - messages.append(what_i_ask_now) - - payload = { - "model": llm_kwargs['llm_model'], - "messages": messages, - "temperature": llm_kwargs['temperature'], # 1.0, - "top_p": llm_kwargs['top_p'], # 1.0, - "n": 1, - "stream": stream, - "presence_penalty": 0, - "frequency_penalty": 0, - "engine": AZURE_ENGINE - } - try: - print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") - except: - print('输入中可能存在乱码。') - return payload - - diff --git a/request_llm/bridge_chatgpt.py b/request_llm/bridge_chatgpt.py index eef8fbf..ca47bd2 100644 --- a/request_llm/bridge_chatgpt.py +++ b/request_llm/bridge_chatgpt.py @@ -22,8 +22,8 @@ import importlib # config_private.py放自己的秘密如API和代理网址 # 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件 from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc -proxies, API_KEY, TIMEOUT_SECONDS, MAX_RETRY = \ - get_conf('proxies', 'API_KEY', 'TIMEOUT_SECONDS', 'MAX_RETRY') +proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG = \ + get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG') timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \ '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。' @@ -101,6 +101,8 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("用户取消了程序。") else: raise RuntimeError("意外Json结构:"+delta) + if json_data['finish_reason'] == 'content_filter': + raise RuntimeError("由于提问含不合规内容被Azure过滤。") if json_data['finish_reason'] == 'length': raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。") return result @@ -205,6 +207,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp chunk = get_full_error(chunk, stream_response) chunk_decoded = chunk.decode() error_msg = chunk_decoded + openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup' if "reduce the length" in error_msg: if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出 history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'], @@ -214,9 +217,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp elif "does not exist" in error_msg: chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.") elif "Incorrect API key" in error_msg: - chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务.") + chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website) elif "exceeded your current quota" in error_msg: - chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务.") + chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website) + elif "account is not active" in error_msg: + chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website) + elif "associated with a deactivated account" in error_msg: + chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website) elif "bad forward key" in error_msg: chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.") elif "Not enough point" in error_msg: @@ -241,6 +248,8 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream): "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } + if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG}) + if llm_kwargs['llm_model'].startswith('azure-'): headers.update({"api-key": api_key}) conversation_cnt = len(history) // 2 diff --git a/request_llm/bridge_newbing.py b/request_llm/bridge_newbing.py deleted file mode 100644 index 2136f01..0000000 --- a/request_llm/bridge_newbing.py +++ /dev/null @@ -1,254 +0,0 @@ -""" -======================================================================== -第一部分:来自EdgeGPT.py -https://github.com/acheong08/EdgeGPT -======================================================================== -""" -from .edge_gpt import NewbingChatbot -load_message = "等待NewBing响应。" - -""" -======================================================================== -第二部分:子进程Worker(调用主体) -======================================================================== -""" -import time -import json -import re -import logging -import asyncio -import importlib -import threading -from toolbox import update_ui, get_conf, trimmed_format_exc -from multiprocessing import Process, Pipe - -def preprocess_newbing_out(s): - pattern = r'\^(\d+)\^' # 匹配^数字^ - sub = lambda m: '('+m.group(1)+')' # 将匹配到的数字作为替换值 - result = re.sub(pattern, sub, s) # 替换操作 - if '[1]' in result: - result += '\n\n```reference\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n' - return result - -def preprocess_newbing_out_simple(result): - if '[1]' in result: - result += '\n\n```reference\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n' - return result - -class NewBingHandle(Process): - def __init__(self): - super().__init__(daemon=True) - self.parent, self.child = Pipe() - self.newbing_model = None - self.info = "" - self.success = True - self.local_history = [] - self.check_dependency() - self.start() - self.threadLock = threading.Lock() - - def check_dependency(self): - try: - self.success = False - import certifi, httpx, rich - self.info = "依赖检测通过,等待NewBing响应。注意目前不能多人同时调用NewBing接口(有线程锁),否则将导致每个人的NewBing问询历史互相渗透。调用NewBing时,会自动使用已配置的代理。" - self.success = True - except: - self.info = "缺少的依赖,如果要使用Newbing,除了基础的pip依赖以外,您还需要运行`pip install -r request_llm/requirements_newbing.txt`安装Newbing的依赖。" - self.success = False - - def ready(self): - return self.newbing_model is not None - - async def async_run(self): - # 读取配置 - NEWBING_STYLE, = get_conf('NEWBING_STYLE') - from request_llm.bridge_all import model_info - endpoint = model_info['newbing']['endpoint'] - while True: - # 等待 - kwargs = self.child.recv() - question=kwargs['query'] - history=kwargs['history'] - system_prompt=kwargs['system_prompt'] - - # 是否重置 - if len(self.local_history) > 0 and len(history)==0: - await self.newbing_model.reset() - self.local_history = [] - - # 开始问问题 - prompt = "" - if system_prompt not in self.local_history: - self.local_history.append(system_prompt) - prompt += system_prompt + '\n' - - # 追加历史 - for ab in history: - a, b = ab - if a not in self.local_history: - self.local_history.append(a) - prompt += a + '\n' - # if b not in self.local_history: - # self.local_history.append(b) - # prompt += b + '\n' - - # 问题 - prompt += question - self.local_history.append(question) - print('question:', prompt) - # 提交 - async for final, response in self.newbing_model.ask_stream( - prompt=question, - conversation_style=NEWBING_STYLE, # ["creative", "balanced", "precise"] - wss_link=endpoint, # "wss://sydney.bing.com/sydney/ChatHub" - ): - if not final: - print(response) - self.child.send(str(response)) - else: - print('-------- receive final ---------') - self.child.send('[Finish]') - # self.local_history.append(response) - - - def run(self): - """ - 这个函数运行在子进程 - """ - # 第一次运行,加载参数 - self.success = False - self.local_history = [] - if (self.newbing_model is None) or (not self.success): - # 代理设置 - proxies, = get_conf('proxies') - if proxies is None: - self.proxies_https = None - else: - self.proxies_https = proxies['https'] - # cookie - NEWBING_COOKIES, = get_conf('NEWBING_COOKIES') - try: - cookies = json.loads(NEWBING_COOKIES) - except: - self.success = False - tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n' - self.child.send(f'[Local Message] 不能加载Newbing组件。NEWBING_COOKIES未填写或有格式错误。') - self.child.send('[Fail]') - self.child.send('[Finish]') - raise RuntimeError(f"不能加载Newbing组件。NEWBING_COOKIES未填写或有格式错误。") - - try: - self.newbing_model = NewbingChatbot(proxy=self.proxies_https, cookies=cookies) - except: - self.success = False - tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n' - self.child.send(f'[Local Message] 不能加载Newbing组件。{tb_str}') - self.child.send('[Fail]') - self.child.send('[Finish]') - raise RuntimeError(f"不能加载Newbing组件。") - - self.success = True - try: - # 进入任务等待状态 - asyncio.run(self.async_run()) - except Exception: - tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n' - self.child.send(f'[Local Message] Newbing失败 {tb_str}.') - self.child.send('[Fail]') - self.child.send('[Finish]') - - def stream_chat(self, **kwargs): - """ - 这个函数运行在主进程 - """ - self.threadLock.acquire() - self.parent.send(kwargs) # 发送请求到子进程 - while True: - res = self.parent.recv() # 等待newbing回复的片段 - if res == '[Finish]': - break # 结束 - elif res == '[Fail]': - self.success = False - break - else: - yield res # newbing回复的片段 - self.threadLock.release() - - -""" -======================================================================== -第三部分:主进程统一调用函数接口 -======================================================================== -""" -global newbing_handle -newbing_handle = None - -def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): - """ - 多线程方法 - 函数的说明请见 request_llm/bridge_all.py - """ - global newbing_handle - if (newbing_handle is None) or (not newbing_handle.success): - newbing_handle = NewBingHandle() - observe_window[0] = load_message + "\n\n" + newbing_handle.info - if not newbing_handle.success: - error = newbing_handle.info - newbing_handle = None - raise RuntimeError(error) - - # 没有 sys_prompt 接口,因此把prompt加入 history - history_feedin = [] - for i in range(len(history)//2): - history_feedin.append([history[2*i], history[2*i+1]] ) - - watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可 - response = "" - observe_window[0] = "[Local Message]: 等待NewBing响应中 ..." - for response in newbing_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): - observe_window[0] = preprocess_newbing_out_simple(response) - if len(observe_window) >= 2: - if (time.time()-observe_window[1]) > watch_dog_patience: - raise RuntimeError("程序终止。") - return preprocess_newbing_out_simple(response) - -def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): - """ - 单线程方法 - 函数的说明请见 request_llm/bridge_all.py - """ - chatbot.append((inputs, "[Local Message]: 等待NewBing响应中 ...")) - - global newbing_handle - if (newbing_handle is None) or (not newbing_handle.success): - newbing_handle = NewBingHandle() - chatbot[-1] = (inputs, load_message + "\n\n" + newbing_handle.info) - yield from update_ui(chatbot=chatbot, history=[]) - if not newbing_handle.success: - newbing_handle = None - return - - if additional_fn is not None: - import core_functional - importlib.reload(core_functional) # 热更新prompt - core_functional = core_functional.get_core_functions() - if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) - inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] - - history_feedin = [] - for i in range(len(history)//2): - history_feedin.append([history[2*i], history[2*i+1]] ) - - chatbot[-1] = (inputs, "[Local Message]: 等待NewBing响应中 ...") - response = "[Local Message]: 等待NewBing响应中 ..." - yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓慢,尚未完成全部响应,请耐心完成后再提交新问题。") - for response in newbing_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): - chatbot[-1] = (inputs, preprocess_newbing_out(response)) - yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓慢,尚未完成全部响应,请耐心完成后再提交新问题。") - if response == "[Local Message]: 等待NewBing响应中 ...": response = "[Local Message]: NewBing响应异常,请刷新界面重试 ..." - history.extend([inputs, response]) - logging.info(f'[raw_input] {inputs}') - logging.info(f'[response] {response}') - yield from update_ui(chatbot=chatbot, history=history, msg="完成全部响应,请提交新问题。") - diff --git a/request_llm/bridge_newbingfree.py b/request_llm/bridge_newbingfree.py index 38d2eb9..11c2ea7 100644 --- a/request_llm/bridge_newbingfree.py +++ b/request_llm/bridge_newbingfree.py @@ -89,9 +89,6 @@ class NewBingHandle(Process): if a not in self.local_history: self.local_history.append(a) prompt += a + '\n' - # if b not in self.local_history: - # self.local_history.append(b) - # prompt += b + '\n' # 问题 prompt += question @@ -101,7 +98,7 @@ class NewBingHandle(Process): async for final, response in self.newbing_model.ask_stream( prompt=question, conversation_style=NEWBING_STYLE, # ["creative", "balanced", "precise"] - wss_link=endpoint, # "wss://sydney.bing.com/sydney/ChatHub" + wss_link=endpoint, # "wss://sydney.bing.com/sydney/ChatHub" ): if not final: print(response) @@ -121,14 +118,26 @@ class NewBingHandle(Process): self.local_history = [] if (self.newbing_model is None) or (not self.success): # 代理设置 - proxies, = get_conf('proxies') + proxies, NEWBING_COOKIES = get_conf('proxies', 'NEWBING_COOKIES') if proxies is None: self.proxies_https = None else: self.proxies_https = proxies['https'] + if (NEWBING_COOKIES is not None) and len(NEWBING_COOKIES) > 100: + try: + cookies = json.loads(NEWBING_COOKIES) + except: + self.success = False + tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n' + self.child.send(f'[Local Message] NEWBING_COOKIES未填写或有格式错误。') + self.child.send('[Fail]'); self.child.send('[Finish]') + raise RuntimeError(f"NEWBING_COOKIES未填写或有格式错误。") + else: + cookies = None + try: - self.newbing_model = NewbingChatbot(proxy=self.proxies_https) + self.newbing_model = NewbingChatbot(proxy=self.proxies_https, cookies=cookies) except: self.success = False tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n' @@ -143,7 +152,7 @@ class NewBingHandle(Process): asyncio.run(self.async_run()) except Exception: tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n' - self.child.send(f'[Local Message] Newbing失败 {tb_str}.') + self.child.send(f'[Local Message] Newbing 请求失败,报错信息如下. 如果是与网络相关的问题,建议更换代理协议(推荐http)或代理节点 {tb_str}.') self.child.send('[Fail]') self.child.send('[Finish]') @@ -151,18 +160,14 @@ class NewBingHandle(Process): """ 这个函数运行在主进程 """ - self.threadLock.acquire() - self.parent.send(kwargs) # 发送请求到子进程 + self.threadLock.acquire() # 获取线程锁 + self.parent.send(kwargs) # 请求子进程 while True: - res = self.parent.recv() # 等待newbing回复的片段 - if res == '[Finish]': - break # 结束 - elif res == '[Fail]': - self.success = False - break - else: - yield res # newbing回复的片段 - self.threadLock.release() + res = self.parent.recv() # 等待newbing回复的片段 + if res == '[Finish]': break # 结束 + elif res == '[Fail]': self.success = False; break # 失败 + else: yield res # newbing回复的片段 + self.threadLock.release() # 释放线程锁 """ diff --git a/request_llm/bridge_stackclaude.py b/request_llm/bridge_stackclaude.py index c674a8b..bbc1324 100644 --- a/request_llm/bridge_stackclaude.py +++ b/request_llm/bridge_stackclaude.py @@ -1,4 +1,4 @@ -from .bridge_newbing import preprocess_newbing_out, preprocess_newbing_out_simple +from .bridge_newbingfree import preprocess_newbing_out, preprocess_newbing_out_simple from multiprocessing import Process, Pipe from toolbox import update_ui, get_conf, trimmed_format_exc import threading diff --git a/request_llm/edge_gpt.py b/request_llm/edge_gpt.py deleted file mode 100644 index bbf8400..0000000 --- a/request_llm/edge_gpt.py +++ /dev/null @@ -1,409 +0,0 @@ -""" -======================================================================== -第一部分:来自EdgeGPT.py -https://github.com/acheong08/EdgeGPT -======================================================================== -""" - -import argparse -import asyncio -import json -import os -import random -import re -import ssl -import sys -import uuid -from enum import Enum -from typing import Generator -from typing import Literal -from typing import Optional -from typing import Union -import websockets.client as websockets - -DELIMITER = "\x1e" - - -# Generate random IP between range 13.104.0.0/14 -FORWARDED_IP = ( - f"13.{random.randint(104, 107)}.{random.randint(0, 255)}.{random.randint(0, 255)}" -) - -HEADERS = { - "accept": "application/json", - "accept-language": "en-US,en;q=0.9", - "content-type": "application/json", - "sec-ch-ua": '"Not_A Brand";v="99", "Microsoft Edge";v="110", "Chromium";v="110"', - "sec-ch-ua-arch": '"x86"', - "sec-ch-ua-bitness": '"64"', - "sec-ch-ua-full-version": '"109.0.1518.78"', - "sec-ch-ua-full-version-list": '"Chromium";v="110.0.5481.192", "Not A(Brand";v="24.0.0.0", "Microsoft Edge";v="110.0.1587.69"', - "sec-ch-ua-mobile": "?0", - "sec-ch-ua-model": "", - "sec-ch-ua-platform": '"Windows"', - "sec-ch-ua-platform-version": '"15.0.0"', - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-ms-client-request-id": str(uuid.uuid4()), - "x-ms-useragent": "azsdk-js-api-client-factory/1.0.0-beta.1 core-rest-pipeline/1.10.0 OS/Win32", - "Referer": "https://www.bing.com/search?q=Bing+AI&showconv=1&FORM=hpcodx", - "Referrer-Policy": "origin-when-cross-origin", - "x-forwarded-for": FORWARDED_IP, -} - -HEADERS_INIT_CONVER = { - "authority": "edgeservices.bing.com", - "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-US,en;q=0.9", - "cache-control": "max-age=0", - "sec-ch-ua": '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"', - "sec-ch-ua-arch": '"x86"', - "sec-ch-ua-bitness": '"64"', - "sec-ch-ua-full-version": '"110.0.1587.69"', - "sec-ch-ua-full-version-list": '"Chromium";v="110.0.5481.192", "Not A(Brand";v="24.0.0.0", "Microsoft Edge";v="110.0.1587.69"', - "sec-ch-ua-mobile": "?0", - "sec-ch-ua-model": '""', - "sec-ch-ua-platform": '"Windows"', - "sec-ch-ua-platform-version": '"15.0.0"', - "sec-fetch-dest": "document", - "sec-fetch-mode": "navigate", - "sec-fetch-site": "none", - "sec-fetch-user": "?1", - "upgrade-insecure-requests": "1", - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.69", - "x-edge-shopping-flag": "1", - "x-forwarded-for": FORWARDED_IP, -} - -def get_ssl_context(): - import certifi - ssl_context = ssl.create_default_context() - ssl_context.load_verify_locations(certifi.where()) - return ssl_context - - - -class NotAllowedToAccess(Exception): - pass - - -class ConversationStyle(Enum): - creative = "h3imaginative,clgalileo,gencontentv3" - balanced = "galileo" - precise = "h3precise,clgalileo" - - -CONVERSATION_STYLE_TYPE = Optional[ - Union[ConversationStyle, Literal["creative", "balanced", "precise"]] -] - - -def _append_identifier(msg: dict) -> str: - """ - Appends special character to end of message to identify end of message - """ - # Convert dict to json string - return json.dumps(msg) + DELIMITER - - -def _get_ran_hex(length: int = 32) -> str: - """ - Returns random hex string - """ - return "".join(random.choice("0123456789abcdef") for _ in range(length)) - - -class _ChatHubRequest: - """ - Request object for ChatHub - """ - - def __init__( - self, - conversation_signature: str, - client_id: str, - conversation_id: str, - invocation_id: int = 0, - ) -> None: - self.struct: dict = {} - - self.client_id: str = client_id - self.conversation_id: str = conversation_id - self.conversation_signature: str = conversation_signature - self.invocation_id: int = invocation_id - - def update( - self, - prompt, - conversation_style, - options, - ) -> None: - """ - Updates request object - """ - if options is None: - options = [ - "deepleo", - "enable_debug_commands", - "disable_emoji_spoken_text", - "enablemm", - ] - if conversation_style: - if not isinstance(conversation_style, ConversationStyle): - conversation_style = getattr(ConversationStyle, conversation_style) - options = [ - "nlu_direct_response_filter", - "deepleo", - "disable_emoji_spoken_text", - "responsible_ai_policy_235", - "enablemm", - conversation_style.value, - "dtappid", - "cricinfo", - "cricinfov2", - "dv3sugg", - ] - self.struct = { - "arguments": [ - { - "source": "cib", - "optionsSets": options, - "sliceIds": [ - "222dtappid", - "225cricinfo", - "224locals0", - ], - "traceId": _get_ran_hex(32), - "isStartOfSession": self.invocation_id == 0, - "message": { - "author": "user", - "inputMethod": "Keyboard", - "text": prompt, - "messageType": "Chat", - }, - "conversationSignature": self.conversation_signature, - "participant": { - "id": self.client_id, - }, - "conversationId": self.conversation_id, - }, - ], - "invocationId": str(self.invocation_id), - "target": "chat", - "type": 4, - } - self.invocation_id += 1 - - -class _Conversation: - """ - Conversation API - """ - - def __init__( - self, - cookies, - proxy, - ) -> None: - self.struct: dict = { - "conversationId": None, - "clientId": None, - "conversationSignature": None, - "result": {"value": "Success", "message": None}, - } - import httpx - self.proxy = proxy - proxy = ( - proxy - or os.environ.get("all_proxy") - or os.environ.get("ALL_PROXY") - or os.environ.get("https_proxy") - or os.environ.get("HTTPS_PROXY") - or None - ) - if proxy is not None and proxy.startswith("socks5h://"): - proxy = "socks5://" + proxy[len("socks5h://") :] - self.session = httpx.Client( - proxies=proxy, - timeout=30, - headers=HEADERS_INIT_CONVER, - ) - for cookie in cookies: - self.session.cookies.set(cookie["name"], cookie["value"]) - - # Send GET request - response = self.session.get( - url=os.environ.get("BING_PROXY_URL") - or "https://edgeservices.bing.com/edgesvc/turing/conversation/create", - ) - if response.status_code != 200: - response = self.session.get( - "https://edge.churchless.tech/edgesvc/turing/conversation/create", - ) - if response.status_code != 200: - print(f"Status code: {response.status_code}") - print(response.text) - print(response.url) - raise Exception("Authentication failed") - try: - self.struct = response.json() - except (json.decoder.JSONDecodeError, NotAllowedToAccess) as exc: - raise Exception( - "Authentication failed. You have not been accepted into the beta.", - ) from exc - if self.struct["result"]["value"] == "UnauthorizedRequest": - raise NotAllowedToAccess(self.struct["result"]["message"]) - - -class _ChatHub: - """ - Chat API - """ - - def __init__(self, conversation) -> None: - self.wss = None - self.request: _ChatHubRequest - self.loop: bool - self.task: asyncio.Task - print(conversation.struct) - self.request = _ChatHubRequest( - conversation_signature=conversation.struct["conversationSignature"], - client_id=conversation.struct["clientId"], - conversation_id=conversation.struct["conversationId"], - ) - - async def ask_stream( - self, - prompt: str, - wss_link: str, - conversation_style: CONVERSATION_STYLE_TYPE = None, - raw: bool = False, - options: dict = None, - ) -> Generator[str, None, None]: - """ - Ask a question to the bot - """ - if self.wss and not self.wss.closed: - await self.wss.close() - # Check if websocket is closed - self.wss = await websockets.connect( - wss_link, - extra_headers=HEADERS, - max_size=None, - ssl=get_ssl_context() - ) - await self._initial_handshake() - # Construct a ChatHub request - self.request.update( - prompt=prompt, - conversation_style=conversation_style, - options=options, - ) - # Send request - await self.wss.send(_append_identifier(self.request.struct)) - final = False - while not final: - objects = str(await self.wss.recv()).split(DELIMITER) - for obj in objects: - if obj is None or not obj: - continue - response = json.loads(obj) - if response.get("type") != 2 and raw: - yield False, response - elif response.get("type") == 1 and response["arguments"][0].get( - "messages", - ): - resp_txt = response["arguments"][0]["messages"][0]["adaptiveCards"][ - 0 - ]["body"][0].get("text") - yield False, resp_txt - elif response.get("type") == 2: - final = True - yield True, response - - async def _initial_handshake(self) -> None: - await self.wss.send(_append_identifier({"protocol": "json", "version": 1})) - await self.wss.recv() - - async def close(self) -> None: - """ - Close the connection - """ - if self.wss and not self.wss.closed: - await self.wss.close() - - -class NewbingChatbot: - """ - Combines everything to make it seamless - """ - - def __init__( - self, - cookies, - proxy - ) -> None: - if cookies is None: - cookies = {} - self.cookies = cookies - self.proxy = proxy - self.chat_hub: _ChatHub = _ChatHub( - _Conversation(self.cookies, self.proxy), - ) - - async def ask( - self, - prompt: str, - wss_link: str, - conversation_style: CONVERSATION_STYLE_TYPE = None, - options: dict = None, - ) -> dict: - """ - Ask a question to the bot - """ - async for final, response in self.chat_hub.ask_stream( - prompt=prompt, - conversation_style=conversation_style, - wss_link=wss_link, - options=options, - ): - if final: - return response - await self.chat_hub.wss.close() - return None - - async def ask_stream( - self, - prompt: str, - wss_link: str, - conversation_style: CONVERSATION_STYLE_TYPE = None, - raw: bool = False, - options: dict = None, - ) -> Generator[str, None, None]: - """ - Ask a question to the bot - """ - async for response in self.chat_hub.ask_stream( - prompt=prompt, - conversation_style=conversation_style, - wss_link=wss_link, - raw=raw, - options=options, - ): - yield response - - async def close(self) -> None: - """ - Close the connection - """ - await self.chat_hub.close() - - async def reset(self) -> None: - """ - Reset the conversation - """ - await self.close() - self.chat_hub = _ChatHub(_Conversation(self.cookies, self.proxy)) - - diff --git a/requirements.txt b/requirements.txt index 1d70323..8def763 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ ./docs/gradio-3.32.2-py3-none-any.whl +pydantic==1.10.11 tiktoken>=0.3.3 requests[socks] transformers @@ -15,4 +16,4 @@ pymupdf openai numpy arxiv -rich \ No newline at end of file +rich diff --git a/toolbox.py b/toolbox.py index 256d99c..3a7d89d 100644 --- a/toolbox.py +++ b/toolbox.py @@ -4,6 +4,7 @@ import time import inspect import re import os +import gradio from latex2mathml.converter import convert as tex2mathml from functools import wraps, lru_cache pj = os.path.join @@ -40,7 +41,7 @@ def ArgsGeneralWrapper(f): """ 装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。 """ - def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg, *args): + def decorated(request: gradio.Request, cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg, *args): txt_passon = txt if txt == "" and txt2 != "": txt_passon = txt2 # 引入一个有cookie的chatbot @@ -54,13 +55,21 @@ def ArgsGeneralWrapper(f): 'top_p':top_p, 'max_length': max_length, 'temperature':temperature, + 'client_ip': request.client.host, } plugin_kwargs = { "advanced_arg": plugin_advanced_arg, } chatbot_with_cookie = ChatBotWithCookies(cookies) chatbot_with_cookie.write_list(chatbot) - yield from f(txt_passon, llm_kwargs, plugin_kwargs, chatbot_with_cookie, history, system_prompt, *args) + if cookies.get('lock_plugin', None) is None: + # 正常状态 + yield from f(txt_passon, llm_kwargs, plugin_kwargs, chatbot_with_cookie, history, system_prompt, *args) + else: + # 处理个别特殊插件的锁定状态 + module, fn_name = cookies['lock_plugin'].split('->') + f_hot_reload = getattr(importlib.import_module(module, fn_name), fn_name) + yield from f_hot_reload(txt_passon, llm_kwargs, plugin_kwargs, chatbot_with_cookie, history, system_prompt, *args) return decorated @@ -68,8 +77,21 @@ def update_ui(chatbot, history, msg='正常', **kwargs): # 刷新界面 """ 刷新用户界面 """ - assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时,可用clear将其清空,然后用for+append循环重新赋值。" - yield chatbot.get_cookies(), chatbot, history, msg + assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时, 可用clear将其清空, 然后用for+append循环重新赋值。" + cookies = chatbot.get_cookies() + + # 解决插件锁定时的界面显示问题 + if cookies.get('lock_plugin', None): + label = cookies.get('llm_model', "") + " | " + "正在锁定插件" + cookies.get('lock_plugin', None) + chatbot_gr = gradio.update(value=chatbot, label=label) + if cookies.get('label', "") != label: cookies['label'] = label # 记住当前的label + elif cookies.get('label', None): + chatbot_gr = gradio.update(value=chatbot, label=cookies.get('llm_model', "")) + cookies['label'] = None # 清空label + else: + chatbot_gr = chatbot + + yield cookies, chatbot_gr, history, msg def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面 """ @@ -192,7 +214,7 @@ def write_results_to_file(history, file_name=None): # remove everything that cannot be handled by utf8 f.write(content.encode('utf-8', 'ignore').decode()) f.write('\n\n') - res = '以上材料已经被写入' + os.path.abspath(f'./gpt_log/{file_name}') + res = '以上材料已经被写入:\t' + os.path.abspath(f'./gpt_log/{file_name}') print(res) return res @@ -445,8 +467,11 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): import shutil if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}' new_path = os.path.join(f'./gpt_log/', rename_file) + # 如果已经存在,先删除 if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path) + # 把文件复制过去 if not os.path.exists(new_path): shutil.copyfile(file, new_path) + # 将文件添加到chatbot cookie中,避免多用户干扰 if chatbot: if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote'] else: current = [] @@ -505,16 +530,24 @@ def on_report_generated(cookies, files, chatbot): chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) return cookies, report_files, chatbot +def load_chat_cookies(): + API_KEY, LLM_MODEL, AZURE_API_KEY = get_conf('API_KEY', 'LLM_MODEL', 'AZURE_API_KEY') + if is_any_api_key(AZURE_API_KEY): + if is_any_api_key(API_KEY): API_KEY = API_KEY + ',' + AZURE_API_KEY + else: API_KEY = AZURE_API_KEY + return {'api_key': API_KEY, 'llm_model': LLM_MODEL} + def is_openai_api_key(key): API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key) + return bool(API_MATCH_ORIGINAL) + +def is_azure_api_key(key): API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key) - return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE) + return bool(API_MATCH_AZURE) def is_api2d_key(key): - if key.startswith('fk') and len(key) == 41: - return True - else: - return False + API_MATCH_API2D = re.match(r"fk[a-zA-Z0-9]{6}-[a-zA-Z0-9]{32}$", key) + return bool(API_MATCH_API2D) def is_any_api_key(key): if ',' in key: @@ -523,10 +556,10 @@ def is_any_api_key(key): if is_any_api_key(k): return True return False else: - return is_openai_api_key(key) or is_api2d_key(key) + return is_openai_api_key(key) or is_api2d_key(key) or is_azure_api_key(key) def what_keys(keys): - avail_key_list = {'OpenAI Key':0, "API2D Key":0} + avail_key_list = {'OpenAI Key':0, "Azure Key":0, "API2D Key":0} key_list = keys.split(',') for k in key_list: @@ -537,7 +570,11 @@ def what_keys(keys): if is_api2d_key(k): avail_key_list['API2D Key'] += 1 - return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']} 个,API2D Key {avail_key_list['API2D Key']} 个" + for k in key_list: + if is_azure_api_key(k): + avail_key_list['Azure Key'] += 1 + + return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']} 个, Azure Key {avail_key_list['Azure Key']} 个, API2D Key {avail_key_list['API2D Key']} 个" def select_api_key(keys, llm_model): import random @@ -552,8 +589,12 @@ def select_api_key(keys, llm_model): for k in key_list: if is_api2d_key(k): avail_key_list.append(k) + if llm_model.startswith('azure-'): + for k in key_list: + if is_azure_api_key(k): avail_key_list.append(k) + if len(avail_key_list) == 0: - raise RuntimeError(f"您提供的api-key不满足要求,不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源。") + raise RuntimeError(f"您提供的api-key不满足要求,不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源(右下角更换模型菜单中可切换openai,azure和api2d请求源)") api_key = random.choice(avail_key_list) # 随机负载均衡 return api_key diff --git a/version b/version index 6353b34..0a59059 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.42, + "version": 3.44, "show_feature": true, - "new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" + "new_feature": "[改善UI] 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件" }