diff --git a/.gitignore b/.gitignore index efe469e..213f3e7 100644 --- a/.gitignore +++ b/.gitignore @@ -55,7 +55,6 @@ coverage.xml *.pot github .github -.idea/ TEMP TRASH diff --git a/Dockerfile+ChatGLM b/Dockerfile+ChatGLM new file mode 100644 index 0000000..f99f2a6 --- /dev/null +++ b/Dockerfile+ChatGLM @@ -0,0 +1,50 @@ +# How to build | 如何构建: docker build -t gpt-academic --network=host -f Dockerfile+ChatGLM . +# How to run | 如何运行 (1) 直接运行: docker run --rm -it --net=host --gpus=all gpt-academic +# How to run | 如何运行 (2) 我想运行之前进容器做一些调整: docker run --rm -it --net=host --gpus=all gpt-academic bash + +# 从NVIDIA源,从而支持显卡运损(检查宿主的nvidia-smi中的cuda版本必须>=11.3) +FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04 +ARG useProxyNetwork='' +RUN apt-get update +RUN apt-get install -y curl proxychains curl +RUN apt-get install -y git python python3 python-dev python3-dev --fix-missing + +# 配置代理网络(构建Docker镜像时使用) +# # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除 +RUN $useProxyNetwork curl cip.cc +RUN sed -i '$ d' /etc/proxychains.conf +RUN sed -i '$ d' /etc/proxychains.conf +RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf +ARG useProxyNetwork=proxychains +# # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除 + + +# use python3 as the system default python +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8 + +# 下载分支 +WORKDIR /gpt +RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b v3.0 +WORKDIR /gpt/chatgpt_academic +RUN $useProxyNetwork python3 -m pip install -r requirements.txt +RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt +RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113 + +# 预热CHATGLM参数(非必要 可选步骤) +RUN echo ' \n\ +from transformers import AutoModel, AutoTokenizer \n\ +chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) \n\ +chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() ' >> warm_up_chatglm.py +RUN python3 -u warm_up_chatglm.py +RUN $useProxyNetwork git pull + +# 为chatgpt-academic配置代理和API-KEY (非必要 可选步骤) +RUN echo ' \n\ +API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \n\ +USE_PROXY = True \n\ +LLM_MODEL = "chatglm" \n\ +LOCAL_MODEL_DEVICE = "cuda" \n\ +proxies = { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' >> config_private.py + +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/config.py b/config.py index 658de99..8db5d46 100644 --- a/config.py +++ b/config.py @@ -45,7 +45,10 @@ WEB_PORT = -1 MAX_RETRY = 2 # OpenAI模型选择是(gpt4现在只对申请成功的人开放) -LLM_MODEL = "gpt-3.5-turbo" +LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm", "tgui:anymodel@localhost:7865" + +# 本地LLM模型如ChatGLM的执行方式 CPU/GPU +LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda" # OpenAI的API_URL API_URL = "https://api.openai.com/v1/chat/completions" diff --git a/crazy_functional.py b/crazy_functional.py index f701167..96c16b8 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -16,15 +16,20 @@ def get_crazy_functions(): from crazy_functions.高级功能函数模板 import 高阶功能模板函数 from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文 from crazy_functions.Latex全文润色 import Latex英文润色 + from crazy_functions.询问多个大语言模型 import 同时问询 from crazy_functions.解析项目源代码 import 解析一个Lua项目 function_plugins = { - + "询问多个GPT模型": { + "Color": "stop", # 按钮颜色 + "Function": HotReload(同时问询) + }, "解析整个Python项目": { "Color": "stop", # 按钮颜色 "Function": HotReload(解析一个Python项目) }, "解析整个C++项目头文件": { "Color": "stop", # 按钮颜色 + "AsButton": False, # 加入下拉菜单中 "Function": HotReload(解析一个C项目的头文件) }, "解析整个C++项目(.cpp/.hpp/.c/.h)": { diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 189d948..cc43b53 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -61,7 +61,7 @@ def request_gpt_model_in_new_thread_with_ui_alive( """ import time from concurrent.futures import ThreadPoolExecutor - from request_llm.bridge_chatgpt import predict_no_ui_long_connection + from request_llm.bridge_all import predict_no_ui_long_connection # 用户反馈 chatbot.append([inputs_show_user, ""]) yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 @@ -167,13 +167,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( """ import time, random from concurrent.futures import ThreadPoolExecutor - from request_llm.bridge_chatgpt import predict_no_ui_long_connection + from request_llm.bridge_all import predict_no_ui_long_connection assert len(inputs_array) == len(history_array) assert len(inputs_array) == len(sys_prompt_array) if max_workers == -1: # 读取配置文件 try: max_workers, = get_conf('DEFAULT_WORKER_NUM') except: max_workers = 8 if max_workers <= 0 or max_workers >= 20: max_workers = 8 + # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿 + if not llm_kwargs['llm_model'].startswith('gpt-'): + max_workers = 1 + executor = ThreadPoolExecutor(max_workers=max_workers) n_frag = len(inputs_array) # 用户反馈 diff --git a/crazy_functions/代码重写为全英文_多线程.py b/crazy_functions/代码重写为全英文_多线程.py index 40bc45d..a09a3f5 100644 --- a/crazy_functions/代码重写为全英文_多线程.py +++ b/crazy_functions/代码重写为全英文_多线程.py @@ -1,5 +1,5 @@ import threading -from request_llm.bridge_chatgpt import predict_no_ui_long_connection +from request_llm.bridge_all import predict_no_ui_long_connection from toolbox import update_ui from toolbox import CatchException, write_results_to_file, report_execption from .crazy_utils import breakdown_txt_to_satisfy_token_limit diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/解析项目源代码.py index 3af7497..9b476cb 100644 --- a/crazy_functions/解析项目源代码.py +++ b/crazy_functions/解析项目源代码.py @@ -12,7 +12,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, sys_prompt_array = [] report_part_1 = [] - assert len(file_manifest) <= 1024, "源文件太多(超过1024个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。" + assert len(file_manifest) <= 512, "源文件太多(超过512个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。" ############################## <第一步,逐个文件分析,多线程> ################################## for index, fp in enumerate(file_manifest): # 读取文件 diff --git a/crazy_functions/询问多个大语言模型.py b/crazy_functions/询问多个大语言模型.py new file mode 100644 index 0000000..a3c98c1 --- /dev/null +++ b/crazy_functions/询问多个大语言模型.py @@ -0,0 +1,28 @@ +from toolbox import CatchException, update_ui +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +import datetime +@CatchException +def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 + plugin_kwargs 插件模型的参数,如温度和top_p等,一般原样传递下去就行 + chatbot 聊天显示框的句柄,用于显示给用户 + history 聊天历史,前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + history = [] # 清空历史,以免输入溢出 + chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + + llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo' + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=txt, inputs_show_user=txt, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, + sys_prompt=system_prompt + ) + + history.append(txt) + history.append(gpt_say) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 \ No newline at end of file diff --git a/main.py b/main.py index 2e4e275..d8257a2 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,6 @@ import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染 import gradio as gr -from request_llm.bridge_chatgpt import predict +from request_llm.bridge_all import predict from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 @@ -97,7 +97,10 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled= system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt) top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",) temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",) + max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="MaxLength",) checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区") + md_dropdown = gr.Dropdown(["gpt-3.5-turbo", "chatglm"], value=LLM_MODEL, label="").style(container=False) + gr.Markdown(description) with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary: with gr.Row(): @@ -118,7 +121,7 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled= return ret checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] ) # 整理反复出现的控件句柄组合 - input_combo = [cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt] + input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt] output_combo = [cookies, chatbot, history, status] predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo) # 提交按钮、重置按钮 diff --git a/request_llm/README.md b/request_llm/README.md index 3277e4a..973adea 100644 --- a/request_llm/README.md +++ b/request_llm/README.md @@ -1,35 +1,53 @@ # 如何使用其他大语言模型(v3.0分支测试中) -## 1. 先运行text-generation +## ChatGLM + +- 安装依赖 `pip install -r request_llm/requirements_chatglm.txt` +- 修改配置,在config.py中将LLM_MODEL的值改为"chatglm" + ``` sh -# 下载模型( text-generation 这么牛的项目,别忘了给人家star ) +LLM_MODEL = "chatglm" +``` +- 运行! +``` sh +`python main.py` +``` + + +--- +## Text-Generation-UI (TGUI) + +### 1. 部署TGUI +``` sh +# 1 下载模型 git clone https://github.com/oobabooga/text-generation-webui.git - -# 安装text-generation的额外依赖 -pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers - -# 切换路径 +# 2 这个仓库的最新代码有问题,回滚到几周之前 +git reset --hard fcda3f87767e642d1c0411776e549e1d3894843d +# 3 切换路径 cd text-generation-webui - -# 下载模型 +# 4 安装text-generation的额外依赖 +pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers +# 5 下载模型 python download-model.py facebook/galactica-1.3b # 其他可选如 facebook/opt-1.3b +# facebook/galactica-1.3b # facebook/galactica-6.7b # facebook/galactica-120b # facebook/pygmalion-1.3b 等 # 详情见 https://github.com/oobabooga/text-generation-webui -# 启动text-generation,注意把模型的斜杠改成下划线 -python server.py --cpu --listen --listen-port 7860 --model facebook_galactica-1.3b +# 6 启动text-generation +python server.py --cpu --listen --listen-port 7865 --model facebook_galactica-1.3b ``` -## 2. 修改config.py +### 2. 修改config.py + ``` sh -# LLM_MODEL格式较复杂 TGUI:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致 -LLM_MODEL = "TGUI:galactica-1.3b@localhost:7860" +# LLM_MODEL格式: tgui:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致 +LLM_MODEL = "tgui:galactica-1.3b@localhost:7860" ``` -## 3. 运行! +### 3. 运行! ``` sh cd chatgpt-academic python main.py diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py new file mode 100644 index 0000000..f02a1c8 --- /dev/null +++ b/request_llm/bridge_all.py @@ -0,0 +1,135 @@ + +""" + 该文件中主要包含2个函数 + + 不具备多线程能力的函数: + 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 + + 具备多线程调用能力的函数 + 2. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 +""" + +from concurrent.futures import ThreadPoolExecutor + +from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui +from .bridge_chatgpt import predict as chatgpt_ui + +from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui +from .bridge_chatglm import predict as chatglm_ui + +from .bridge_tgui import predict_no_ui_long_connection as tgui_noui +from .bridge_tgui import predict as tgui_ui + +methods = { + "openai-no-ui": chatgpt_noui, + "openai-ui": chatgpt_ui, + + "chatglm-no-ui": chatglm_noui, + "chatglm-ui": chatglm_ui, + + "tgui-no-ui": tgui_noui, + "tgui-ui": tgui_ui, +} + +def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False): + """ + 发送至LLM,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 + inputs: + 是本次问询的输入 + sys_prompt: + 系统静默prompt + llm_kwargs: + LLM的内部调优参数 + history: + 是之前的对话列表 + observe_window = None: + 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 + """ + import threading, time, copy + + model = llm_kwargs['llm_model'] + n_model = 1 + if '&' not in model: + assert not model.startswith("tgui"), "TGUI不支持函数插件的实现" + + # 如果只询问1个大语言模型: + if model.startswith('gpt'): + method = methods['openai-no-ui'] + elif model == 'chatglm': + method = methods['chatglm-no-ui'] + elif model.startswith('tgui'): + method = methods['tgui-no-ui'] + return method(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience) + else: + # 如果同时询问多个大语言模型: + executor = ThreadPoolExecutor(max_workers=16) + models = model.split('&') + n_model = len(models) + + window_len = len(observe_window) + if window_len==0: + window_mutex = [[] for _ in range(n_model)] + [True] + elif window_len==1: + window_mutex = [[""] for _ in range(n_model)] + [True] + elif window_len==2: + window_mutex = [["", time.time()] for _ in range(n_model)] + [True] + + futures = [] + for i in range(n_model): + model = models[i] + if model.startswith('gpt'): + method = methods['openai-no-ui'] + elif model == 'chatglm': + method = methods['chatglm-no-ui'] + elif model.startswith('tgui'): + method = methods['tgui-no-ui'] + llm_kwargs_feedin = copy.deepcopy(llm_kwargs) + llm_kwargs_feedin['llm_model'] = model + future = executor.submit(method, inputs, llm_kwargs_feedin, history, sys_prompt, window_mutex[i], console_slience) + futures.append(future) + + def mutex_manager(window_mutex, observe_window): + while True: + time.sleep(0.2) + if not window_mutex[-1]: break + # 看门狗(watchdog) + for i in range(n_model): + window_mutex[i][1] = observe_window[1] + # 观察窗(window) + chat_string = [] + for i in range(n_model): + chat_string.append( f"[{str(models[i])} 说]: {window_mutex[i][0]}" ) + res = '\n\n---\n\n'.join(chat_string) + # # # # # # # # # # # + observe_window[0] = res + + t_model = threading.Thread(target=mutex_manager, args=(window_mutex, observe_window), daemon=True) + t_model.start() + + return_string_collect = [] + for i, future in enumerate(futures): # wait and get + return_string_collect.append( f"[{str(models[i])} 说]: {future.result()}" ) + window_mutex[-1] = False # stop mutex thread + res = '\n\n---\n\n'.join(return_string_collect) + return res + + +def predict(inputs, llm_kwargs, *args, **kwargs): + """ + 发送至LLM,流式获取输出。 + 用于基础的对话功能。 + inputs 是本次问询的输入 + top_p, temperature是LLM的内部调优参数 + history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) + chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 + additional_fn代表点击的哪个按钮,按钮见functional.py + """ + if llm_kwargs['llm_model'].startswith('gpt'): + method = methods['openai-ui'] + elif llm_kwargs['llm_model'] == 'chatglm': + method = methods['chatglm-ui'] + elif llm_kwargs['llm_model'].startswith('tgui'): + method = methods['tgui-ui'] + + yield from method(inputs, llm_kwargs, *args, **kwargs) + diff --git a/request_llm/bridge_chatglm.py b/request_llm/bridge_chatglm.py new file mode 100644 index 0000000..d6f5eec --- /dev/null +++ b/request_llm/bridge_chatglm.py @@ -0,0 +1,83 @@ + +from transformers import AutoModel, AutoTokenizer +import time +import importlib +from toolbox import update_ui, get_conf + + +global chatglm_model, chatglm_tokenizer + +chatglm_model = None +chatglm_tokenizer = None + +def model_loader(): + global chatglm_model, chatglm_tokenizer + if chatglm_tokenizer is None: + chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) + if chatglm_model is None: # 尚未加载 + device, = get_conf('LOCAL_MODEL_DEVICE') + if device=='cpu': + chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() + else: + chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() + chatglm_model = chatglm_model.eval() + chatglm_model = chatglm_model.eval() + +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): + """ + 函数的说明请见 request_llm/bridge_all.py + """ + global chatglm_model, chatglm_tokenizer + if chatglm_model is None: + observe_window[0] = "ChatGLM尚未加载,加载需要一段时间 ……" + + model_loader() + # chatglm 没有 sys_prompt 接口,因此把prompt加入 history + history_feedin = [] + for i in range(len(history)//2): + history_feedin.append(["What can I do?", sys_prompt] ) + history_feedin.append([history[2*i], history[2*i+1]] ) + + watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可 + response = "" + for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'], + top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + # 观测窗,把已经获取的数据显示出去 + observe_window[0] = response + # 看门狗 (watchdog),如果超过期限没有喂狗,则终止 + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: + raise RuntimeError("程序终止。") + # if not console_slience: + # print(response) + return response + + +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + 函数的说明请见 request_llm/bridge_all.py + """ + global chatglm_model, chatglm_tokenizer + chatbot.append((inputs, "")) + if chatglm_model is None: + chatbot[-1] = (inputs, "ChatGLM尚未加载,加载需要一段时间 ……") + yield from update_ui(chatbot=chatbot, history=[]) + model_loader() + + if additional_fn is not None: + import core_functional + importlib.reload(core_functional) # 热更新prompt + core_functional = core_functional.get_core_functions() + if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) + inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] + + + history_feedin = [] + for i in range(len(history)//2): + history_feedin.append(["What can I do?", system_prompt] ) + history_feedin.append([history[2*i], history[2*i+1]] ) + + for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'], + top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + chatbot[-1] = (inputs, response) + yield from update_ui(chatbot=chatbot, history=history) \ No newline at end of file diff --git a/request_llm/bridge_tgui.py b/request_llm/bridge_tgui.py index 22a4075..fcf852f 100644 --- a/request_llm/bridge_tgui.py +++ b/request_llm/bridge_tgui.py @@ -13,23 +13,18 @@ import time import threading import importlib from toolbox import get_conf, update_ui -LLM_MODEL, = get_conf('LLM_MODEL') -# "TGUI:galactica-1.3b@localhost:7860" -model_name, addr_port = LLM_MODEL.split('@') -assert ':' in addr_port, "LLM_MODEL 格式不正确!" + LLM_MODEL -addr, port = addr_port.split(':') def random_hash(): letters = string.ascii_lowercase + string.digits return ''.join(random.choice(letters) for i in range(9)) -async def run(context, max_token=512): +async def run(context, max_token, temperature, top_p, addr, port): params = { 'max_new_tokens': max_token, 'do_sample': True, - 'temperature': 0.5, - 'top_p': 0.9, + 'temperature': temperature, + 'top_p': top_p, 'typical_p': 1, 'repetition_penalty': 1.05, 'encoder_repetition_penalty': 1.0, @@ -90,7 +85,7 @@ async def run(context, max_token=512): -def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): """ 发送至chatGPT,流式获取输出。 用于基础的对话功能。 @@ -108,18 +103,26 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt= inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] raw_input = "What I would like to say is the following: " + inputs - logging.info(f'[raw_input] {raw_input}') history.extend([inputs, ""]) chatbot.append([inputs, ""]) yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 - prompt = inputs + prompt = raw_input tgui_say = "" + model_name, addr_port = llm_kwargs['llm_model'].split('@') + assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model'] + addr, port = addr_port.split(':') + + mutable = ["", time.time()] def run_coorotine(mutable): async def get_result(mutable): - async for response in run(prompt): + # "tgui:galactica-1.3b@localhost:7860" + + async for response in run(context=prompt, max_token=llm_kwargs['max_length'], + temperature=llm_kwargs['temperature'], + top_p=llm_kwargs['top_p'], addr=addr, port=port): print(response[len(mutable[0]):]) mutable[0] = response if (time.time() - mutable[1]) > 3: @@ -140,28 +143,29 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt= chatbot[-1] = (history[-2], history[-1]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - logging.info(f'[response] {tgui_say}') -def predict_tgui_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""): +def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False): raw_input = "What I would like to say is the following: " + inputs - prompt = inputs + prompt = raw_input tgui_say = "" - mutable = ["", time.time()] - def run_coorotine(mutable): - async def get_result(mutable): - async for response in run(prompt, max_token=20): - print(response[len(mutable[0]):]) - mutable[0] = response - if (time.time() - mutable[1]) > 3: + model_name, addr_port = llm_kwargs['llm_model'].split('@') + assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model'] + addr, port = addr_port.split(':') + + + def run_coorotine(observe_window): + async def get_result(observe_window): + async for response in run(context=prompt, max_token=llm_kwargs['max_length'], + temperature=llm_kwargs['temperature'], + top_p=llm_kwargs['top_p'], addr=addr, port=port): + print(response[len(observe_window[0]):]) + observe_window[0] = response + if (time.time() - observe_window[1]) > 5: print('exit when no listener') break - asyncio.run(get_result(mutable)) - thread_listen = threading.Thread(target=run_coorotine, args=(mutable,)) + asyncio.run(get_result(observe_window)) + thread_listen = threading.Thread(target=run_coorotine, args=(observe_window,)) thread_listen.start() - while thread_listen.is_alive(): - time.sleep(1) - mutable[1] = time.time() - tgui_say = mutable[0] - return tgui_say + return observe_window[0] diff --git a/request_llm/requirements_chatglm.txt b/request_llm/requirements_chatglm.txt new file mode 100644 index 0000000..fa049ca --- /dev/null +++ b/request_llm/requirements_chatglm.txt @@ -0,0 +1,6 @@ +protobuf +transformers==4.27.1 +cpm_kernels +torch>=1.10 +mdtex2html +sentencepiece \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1cee117..910be08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -gradio==3.25.0 +gradio>=3.25.0 tiktoken>=0.3.3 requests[socks] transformers diff --git a/toolbox.py b/toolbox.py index 3ced653..0dd035a 100644 --- a/toolbox.py +++ b/toolbox.py @@ -27,7 +27,7 @@ def ArgsGeneralWrapper(f): """ 装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。 """ - def decorated(cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args): + def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args): txt_passon = txt if txt == "" and txt2 != "": txt_passon = txt2 # 引入一个有cookie的chatbot @@ -37,8 +37,9 @@ def ArgsGeneralWrapper(f): }) llm_kwargs = { 'api_key': cookies['api_key'], - 'llm_model': cookies['llm_model'], + 'llm_model': llm_model, 'top_p':top_p, + 'max_length': max_length, 'temperature':temperature, } plugin_kwargs = { @@ -75,66 +76,6 @@ def get_reduce_token_percent(text): except: return 0.5, '不详' -def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, llm_kwargs, history=[], sys_prompt='', long_connection=True): - """ - * 此函数未来将被弃用(替代函数 request_gpt_model_in_new_thread_with_ui_alive 文件 chatgpt_academic/crazy_functions/crazy_utils) - - 调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断 - i_say: 当前输入 - i_say_show_user: 显示到对话界面上的当前输入,例如,输入整个文件时,你绝对不想把文件的内容都糊到对话界面上 - chatbot: 对话界面句柄 - top_p, temperature: gpt参数 - history: gpt参数 对话历史 - sys_prompt: gpt参数 sys_prompt - long_connection: 是否采用更稳定的连接方式(推荐)(已弃用) - """ - import time - from request_llm.bridge_chatgpt import predict_no_ui_long_connection - from toolbox import get_conf - TIMEOUT_SECONDS, MAX_RETRY = get_conf('TIMEOUT_SECONDS', 'MAX_RETRY') - # 多线程的时候,需要一个mutable结构在不同线程之间传递信息 - # list就是最简单的mutable结构,我们第一个位置放gpt输出,第二个位置传递报错信息 - mutable = [None, ''] - # multi-threading worker - - def mt(i_say, history): - while True: - try: - mutable[0] = predict_no_ui_long_connection( - inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt) - - except ConnectionAbortedError as token_exceeded_error: - # 尝试计算比例,尽可能多地保留文本 - p_ratio, n_exceed = get_reduce_token_percent( - str(token_exceeded_error)) - if len(history) > 0: - history = [his[int(len(his) * p_ratio):] - for his in history if his is not None] - else: - i_say = i_say[: int(len(i_say) * p_ratio)] - mutable[1] = f'警告,文本过长将进行截断,Token溢出数:{n_exceed},截断比例:{(1-p_ratio):.0%}。' - except TimeoutError as e: - mutable[0] = '[Local Message] 请求超时。' - raise TimeoutError - except Exception as e: - mutable[0] = f'[Local Message] 异常:{str(e)}.' - raise RuntimeError(f'[Local Message] 异常:{str(e)}.') - # 创建新线程发出http请求 - thread_name = threading.Thread(target=mt, args=(i_say, history)) - thread_name.start() - # 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成 - cnt = 0 - while thread_name.is_alive(): - cnt += 1 - chatbot[-1] = (i_say_show_user, - f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt % 4))) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - time.sleep(1) - # 把gpt的输出从mutable中取出来 - gpt_say = mutable[0] - if gpt_say == '[Local Message] Failed with timeout.': - raise TimeoutError - return gpt_say def write_results_to_file(history, file_name=None): diff --git a/version b/version index e1a3466..59e288a 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 2.68, + "version": 3.0, "show_feature": true, - "new_feature": "改善理解pdf(chatpdf)功能 <-> 修复读取罕见字符的BUG <-> 如果一键更新失败,可前往github手动更新" + "new_feature": "支持ChatGLM <-> 支持多LLM模型同时对话" }