Rebase v3.0

This commit is contained in:
qingxu fu 2023-04-15 15:24:18 +08:00
parent ea6541c114
commit 91609d6d39
17 changed files with 397 additions and 118 deletions

1
.gitignore vendored
View File

@ -55,7 +55,6 @@ coverage.xml
*.pot *.pot
github github
.github .github
.idea/
TEMP TEMP
TRASH TRASH

50
Dockerfile+ChatGLM Normal file
View File

@ -0,0 +1,50 @@
# How to build | 如何构建: docker build -t gpt-academic --network=host -f Dockerfile+ChatGLM .
# How to run | 如何运行 (1) 直接运行: docker run --rm -it --net=host --gpus=all gpt-academic
# How to run | 如何运行 (2) 我想运行之前进容器做一些调整: docker run --rm -it --net=host --gpus=all gpt-academic bash
# 从NVIDIA源从而支持显卡运损检查宿主的nvidia-smi中的cuda版本必须>=11.3
FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04
ARG useProxyNetwork=''
RUN apt-get update
RUN apt-get install -y curl proxychains curl
RUN apt-get install -y git python python3 python-dev python3-dev --fix-missing
# 配置代理网络构建Docker镜像时使用
# # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除
RUN $useProxyNetwork curl cip.cc
RUN sed -i '$ d' /etc/proxychains.conf
RUN sed -i '$ d' /etc/proxychains.conf
RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf
ARG useProxyNetwork=proxychains
# # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除
# use python3 as the system default python
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
# 下载分支
WORKDIR /gpt
RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b v3.0
WORKDIR /gpt/chatgpt_academic
RUN $useProxyNetwork python3 -m pip install -r requirements.txt
RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt
RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113
# 预热CHATGLM参数非必要 可选步骤)
RUN echo ' \n\
from transformers import AutoModel, AutoTokenizer \n\
chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) \n\
chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() ' >> warm_up_chatglm.py
RUN python3 -u warm_up_chatglm.py
RUN $useProxyNetwork git pull
# 为chatgpt-academic配置代理和API-KEY (非必要 可选步骤)
RUN echo ' \n\
API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \n\
USE_PROXY = True \n\
LLM_MODEL = "chatglm" \n\
LOCAL_MODEL_DEVICE = "cuda" \n\
proxies = { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' >> config_private.py
# 启动
CMD ["python3", "-u", "main.py"]

View File

@ -45,7 +45,10 @@ WEB_PORT = -1
MAX_RETRY = 2 MAX_RETRY = 2
# OpenAI模型选择是gpt4现在只对申请成功的人开放 # OpenAI模型选择是gpt4现在只对申请成功的人开放
LLM_MODEL = "gpt-3.5-turbo" LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm", "tgui:anymodel@localhost:7865"
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
# OpenAI的API_URL # OpenAI的API_URL
API_URL = "https://api.openai.com/v1/chat/completions" API_URL = "https://api.openai.com/v1/chat/completions"

View File

@ -16,15 +16,20 @@ def get_crazy_functions():
from crazy_functions.高级功能函数模板 import 高阶功能模板函数 from crazy_functions.高级功能函数模板 import 高阶功能模板函数
from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文 from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
from crazy_functions.Latex全文润色 import Latex英文润色 from crazy_functions.Latex全文润色 import Latex英文润色
from crazy_functions.询问多个大语言模型 import 同时问询
from crazy_functions.解析项目源代码 import 解析一个Lua项目 from crazy_functions.解析项目源代码 import 解析一个Lua项目
function_plugins = { function_plugins = {
"询问多个GPT模型": {
"Color": "stop", # 按钮颜色
"Function": HotReload(同时问询)
},
"解析整个Python项目": { "解析整个Python项目": {
"Color": "stop", # 按钮颜色 "Color": "stop", # 按钮颜色
"Function": HotReload(解析一个Python项目) "Function": HotReload(解析一个Python项目)
}, },
"解析整个C++项目头文件": { "解析整个C++项目头文件": {
"Color": "stop", # 按钮颜色 "Color": "stop", # 按钮颜色
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(解析一个C项目的头文件) "Function": HotReload(解析一个C项目的头文件)
}, },
"解析整个C++项目(.cpp/.hpp/.c/.h": { "解析整个C++项目(.cpp/.hpp/.c/.h": {

View File

@ -61,7 +61,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
""" """
import time import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from request_llm.bridge_chatgpt import predict_no_ui_long_connection from request_llm.bridge_all import predict_no_ui_long_connection
# 用户反馈 # 用户反馈
chatbot.append([inputs_show_user, ""]) chatbot.append([inputs_show_user, ""])
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
@ -167,13 +167,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
""" """
import time, random import time, random
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from request_llm.bridge_chatgpt import predict_no_ui_long_connection from request_llm.bridge_all import predict_no_ui_long_connection
assert len(inputs_array) == len(history_array) assert len(inputs_array) == len(history_array)
assert len(inputs_array) == len(sys_prompt_array) assert len(inputs_array) == len(sys_prompt_array)
if max_workers == -1: # 读取配置文件 if max_workers == -1: # 读取配置文件
try: max_workers, = get_conf('DEFAULT_WORKER_NUM') try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
except: max_workers = 8 except: max_workers = 8
if max_workers <= 0 or max_workers >= 20: max_workers = 8 if max_workers <= 0 or max_workers >= 20: max_workers = 8
# 屏蔽掉 chatglm的多线程可能会导致严重卡顿
if not llm_kwargs['llm_model'].startswith('gpt-'):
max_workers = 1
executor = ThreadPoolExecutor(max_workers=max_workers) executor = ThreadPoolExecutor(max_workers=max_workers)
n_frag = len(inputs_array) n_frag = len(inputs_array)
# 用户反馈 # 用户反馈

View File

@ -1,5 +1,5 @@
import threading import threading
from request_llm.bridge_chatgpt import predict_no_ui_long_connection from request_llm.bridge_all import predict_no_ui_long_connection
from toolbox import update_ui from toolbox import update_ui
from toolbox import CatchException, write_results_to_file, report_execption from toolbox import CatchException, write_results_to_file, report_execption
from .crazy_utils import breakdown_txt_to_satisfy_token_limit from .crazy_utils import breakdown_txt_to_satisfy_token_limit

View File

@ -12,7 +12,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
sys_prompt_array = [] sys_prompt_array = []
report_part_1 = [] report_part_1 = []
assert len(file_manifest) <= 1024, "源文件太多(超过1024个), 请缩减输入文件的数量。或者您也可以选择删除此行警告并修改代码拆分file_manifest列表从而实现分批次处理。" assert len(file_manifest) <= 512, "源文件太多(超过512个, 请缩减输入文件的数量。或者您也可以选择删除此行警告并修改代码拆分file_manifest列表从而实现分批次处理。"
############################## <第一步,逐个文件分析,多线程> ################################## ############################## <第一步,逐个文件分析,多线程> ##################################
for index, fp in enumerate(file_manifest): for index, fp in enumerate(file_manifest):
# 读取文件 # 读取文件

View File

@ -0,0 +1,28 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import datetime
@CatchException
def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
txt 输入栏用户输入的文本例如需要翻译的一段话再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数如温度和top_p等一般原样传递下去就行
chatbot 聊天显示框的句柄用于显示给用户
history 聊天历史前情提要
system_prompt 给gpt的静默提醒
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=txt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt=system_prompt
)
history.append(txt)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

View File

@ -1,6 +1,6 @@
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染 import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
import gradio as gr import gradio as gr
from request_llm.bridge_chatgpt import predict from request_llm.bridge_all import predict
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
@ -97,7 +97,10 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt) system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",) top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",) temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="MaxLength",)
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区") checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
md_dropdown = gr.Dropdown(["gpt-3.5-turbo", "chatglm"], value=LLM_MODEL, label="").style(container=False)
gr.Markdown(description) gr.Markdown(description)
with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary: with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
with gr.Row(): with gr.Row():
@ -118,7 +121,7 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
return ret return ret
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] ) checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] )
# 整理反复出现的控件句柄组合 # 整理反复出现的控件句柄组合
input_combo = [cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt] input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
output_combo = [cookies, chatbot, history, status] output_combo = [cookies, chatbot, history, status]
predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo) predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
# 提交按钮、重置按钮 # 提交按钮、重置按钮

View File

@ -1,35 +1,53 @@
# 如何使用其他大语言模型v3.0分支测试中) # 如何使用其他大语言模型v3.0分支测试中)
## 1. 先运行text-generation ## ChatGLM
- 安装依赖 `pip install -r request_llm/requirements_chatglm.txt`
- 修改配置在config.py中将LLM_MODEL的值改为"chatglm"
``` sh ``` sh
# 下载模型( text-generation 这么牛的项目别忘了给人家star LLM_MODEL = "chatglm"
```
- 运行!
``` sh
`python main.py`
```
---
## Text-Generation-UI (TGUI)
### 1. 部署TGUI
``` sh
# 1 下载模型
git clone https://github.com/oobabooga/text-generation-webui.git git clone https://github.com/oobabooga/text-generation-webui.git
# 2 这个仓库的最新代码有问题,回滚到几周之前
# 安装text-generation的额外依赖 git reset --hard fcda3f87767e642d1c0411776e549e1d3894843d
pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers # 3 切换路径
# 切换路径
cd text-generation-webui cd text-generation-webui
# 4 安装text-generation的额外依赖
# 下载模型 pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
# 5 下载模型
python download-model.py facebook/galactica-1.3b python download-model.py facebook/galactica-1.3b
# 其他可选如 facebook/opt-1.3b # 其他可选如 facebook/opt-1.3b
# facebook/galactica-1.3b
# facebook/galactica-6.7b # facebook/galactica-6.7b
# facebook/galactica-120b # facebook/galactica-120b
# facebook/pygmalion-1.3b 等 # facebook/pygmalion-1.3b 等
# 详情见 https://github.com/oobabooga/text-generation-webui # 详情见 https://github.com/oobabooga/text-generation-webui
# 启动text-generation,注意把模型的斜杠改成下划线 # 6 启动text-generation
python server.py --cpu --listen --listen-port 7860 --model facebook_galactica-1.3b python server.py --cpu --listen --listen-port 7865 --model facebook_galactica-1.3b
``` ```
## 2. 修改config.py ### 2. 修改config.py
``` sh ``` sh
# LLM_MODEL格式较复杂 TGUI:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致 # LLM_MODEL格式: tgui:[模型]@[ws地址]:[ws端口] , 端口要和上面给定的端口一致
LLM_MODEL = "TGUI:galactica-1.3b@localhost:7860" LLM_MODEL = "tgui:galactica-1.3b@localhost:7860"
``` ```
## 3. 运行! ### 3. 运行!
``` sh ``` sh
cd chatgpt-academic cd chatgpt-academic
python main.py python main.py

135
request_llm/bridge_all.py Normal file
View File

@ -0,0 +1,135 @@
"""
该文件中主要包含2个函数
不具备多线程能力的函数
1. predict: 正常对话时使用具备完备的交互功能不可多线程
具备多线程调用能力的函数
2. predict_no_ui_long_connection在实验过程中发现调用predict_no_ui处理长文档时和openai的连接容易断掉这个函数用stream的方式解决这个问题同样支持多线程
"""
from concurrent.futures import ThreadPoolExecutor
from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
from .bridge_chatgpt import predict as chatgpt_ui
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
from .bridge_chatglm import predict as chatglm_ui
from .bridge_tgui import predict_no_ui_long_connection as tgui_noui
from .bridge_tgui import predict as tgui_ui
methods = {
"openai-no-ui": chatgpt_noui,
"openai-ui": chatgpt_ui,
"chatglm-no-ui": chatglm_noui,
"chatglm-ui": chatglm_ui,
"tgui-no-ui": tgui_noui,
"tgui-ui": tgui_ui,
}
def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
"""
发送至LLM等待回复一次性完成不显示中间过程但内部用stream的方法避免中途网线被掐
inputs
是本次问询的输入
sys_prompt:
系统静默prompt
llm_kwargs
LLM的内部调优参数
history
是之前的对话列表
observe_window = None
用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可observe_window[0]观测窗observe_window[1]看门狗
"""
import threading, time, copy
model = llm_kwargs['llm_model']
n_model = 1
if '&' not in model:
assert not model.startswith("tgui"), "TGUI不支持函数插件的实现"
# 如果只询问1个大语言模型
if model.startswith('gpt'):
method = methods['openai-no-ui']
elif model == 'chatglm':
method = methods['chatglm-no-ui']
elif model.startswith('tgui'):
method = methods['tgui-no-ui']
return method(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience)
else:
# 如果同时询问多个大语言模型:
executor = ThreadPoolExecutor(max_workers=16)
models = model.split('&')
n_model = len(models)
window_len = len(observe_window)
if window_len==0:
window_mutex = [[] for _ in range(n_model)] + [True]
elif window_len==1:
window_mutex = [[""] for _ in range(n_model)] + [True]
elif window_len==2:
window_mutex = [["", time.time()] for _ in range(n_model)] + [True]
futures = []
for i in range(n_model):
model = models[i]
if model.startswith('gpt'):
method = methods['openai-no-ui']
elif model == 'chatglm':
method = methods['chatglm-no-ui']
elif model.startswith('tgui'):
method = methods['tgui-no-ui']
llm_kwargs_feedin = copy.deepcopy(llm_kwargs)
llm_kwargs_feedin['llm_model'] = model
future = executor.submit(method, inputs, llm_kwargs_feedin, history, sys_prompt, window_mutex[i], console_slience)
futures.append(future)
def mutex_manager(window_mutex, observe_window):
while True:
time.sleep(0.2)
if not window_mutex[-1]: break
# 看门狗watchdog
for i in range(n_model):
window_mutex[i][1] = observe_window[1]
# 观察窗window
chat_string = []
for i in range(n_model):
chat_string.append( f"[{str(models[i])} 说]: {window_mutex[i][0]}" )
res = '\n\n---\n\n'.join(chat_string)
# # # # # # # # # # #
observe_window[0] = res
t_model = threading.Thread(target=mutex_manager, args=(window_mutex, observe_window), daemon=True)
t_model.start()
return_string_collect = []
for i, future in enumerate(futures): # wait and get
return_string_collect.append( f"[{str(models[i])} 说]: {future.result()}" )
window_mutex[-1] = False # stop mutex thread
res = '\n\n---\n\n'.join(return_string_collect)
return res
def predict(inputs, llm_kwargs, *args, **kwargs):
"""
发送至LLM流式获取输出
用于基础的对话功能
inputs 是本次问询的输入
top_p, temperature是LLM的内部调优参数
history 是之前的对话列表注意无论是inputs还是history内容太长了都会触发token数量溢出的错误
chatbot 为WebUI中显示的对话列表修改它然后yeild出去可以直接修改对话界面内容
additional_fn代表点击的哪个按钮按钮见functional.py
"""
if llm_kwargs['llm_model'].startswith('gpt'):
method = methods['openai-ui']
elif llm_kwargs['llm_model'] == 'chatglm':
method = methods['chatglm-ui']
elif llm_kwargs['llm_model'].startswith('tgui'):
method = methods['tgui-ui']
yield from method(inputs, llm_kwargs, *args, **kwargs)

View File

@ -0,0 +1,83 @@
from transformers import AutoModel, AutoTokenizer
import time
import importlib
from toolbox import update_ui, get_conf
global chatglm_model, chatglm_tokenizer
chatglm_model = None
chatglm_tokenizer = None
def model_loader():
global chatglm_model, chatglm_tokenizer
if chatglm_tokenizer is None:
chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
if chatglm_model is None: # 尚未加载
device, = get_conf('LOCAL_MODEL_DEVICE')
if device=='cpu':
chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
else:
chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
chatglm_model = chatglm_model.eval()
chatglm_model = chatglm_model.eval()
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
"""
函数的说明请见 request_llm/bridge_all.py
"""
global chatglm_model, chatglm_tokenizer
if chatglm_model is None:
observe_window[0] = "ChatGLM尚未加载加载需要一段时间 ……"
model_loader()
# chatglm 没有 sys_prompt 接口因此把prompt加入 history
history_feedin = []
for i in range(len(history)//2):
history_feedin.append(["What can I do?", sys_prompt] )
history_feedin.append([history[2*i], history[2*i+1]] )
watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
response = ""
for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'],
top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
# 观测窗,把已经获取的数据显示出去
observe_window[0] = response
# 看门狗 (watchdog),如果超过期限没有喂狗,则终止
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("程序终止。")
# if not console_slience:
# print(response)
return response
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
"""
函数的说明请见 request_llm/bridge_all.py
"""
global chatglm_model, chatglm_tokenizer
chatbot.append((inputs, ""))
if chatglm_model is None:
chatbot[-1] = (inputs, "ChatGLM尚未加载加载需要一段时间 ……")
yield from update_ui(chatbot=chatbot, history=[])
model_loader()
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
history_feedin = []
for i in range(len(history)//2):
history_feedin.append(["What can I do?", system_prompt] )
history_feedin.append([history[2*i], history[2*i+1]] )
for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'],
top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
chatbot[-1] = (inputs, response)
yield from update_ui(chatbot=chatbot, history=history)

View File

@ -13,23 +13,18 @@ import time
import threading import threading
import importlib import importlib
from toolbox import get_conf, update_ui from toolbox import get_conf, update_ui
LLM_MODEL, = get_conf('LLM_MODEL')
# "TGUI:galactica-1.3b@localhost:7860"
model_name, addr_port = LLM_MODEL.split('@')
assert ':' in addr_port, "LLM_MODEL 格式不正确!" + LLM_MODEL
addr, port = addr_port.split(':')
def random_hash(): def random_hash():
letters = string.ascii_lowercase + string.digits letters = string.ascii_lowercase + string.digits
return ''.join(random.choice(letters) for i in range(9)) return ''.join(random.choice(letters) for i in range(9))
async def run(context, max_token=512): async def run(context, max_token, temperature, top_p, addr, port):
params = { params = {
'max_new_tokens': max_token, 'max_new_tokens': max_token,
'do_sample': True, 'do_sample': True,
'temperature': 0.5, 'temperature': temperature,
'top_p': 0.9, 'top_p': top_p,
'typical_p': 1, 'typical_p': 1,
'repetition_penalty': 1.05, 'repetition_penalty': 1.05,
'encoder_repetition_penalty': 1.0, 'encoder_repetition_penalty': 1.0,
@ -90,7 +85,7 @@ async def run(context, max_token=512):
def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
""" """
发送至chatGPT流式获取输出 发送至chatGPT流式获取输出
用于基础的对话功能 用于基础的对话功能
@ -108,18 +103,26 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt=
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
raw_input = "What I would like to say is the following: " + inputs raw_input = "What I would like to say is the following: " + inputs
logging.info(f'[raw_input] {raw_input}')
history.extend([inputs, ""]) history.extend([inputs, ""])
chatbot.append([inputs, ""]) chatbot.append([inputs, ""])
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
prompt = inputs prompt = raw_input
tgui_say = "" tgui_say = ""
model_name, addr_port = llm_kwargs['llm_model'].split('@')
assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model']
addr, port = addr_port.split(':')
mutable = ["", time.time()] mutable = ["", time.time()]
def run_coorotine(mutable): def run_coorotine(mutable):
async def get_result(mutable): async def get_result(mutable):
async for response in run(prompt): # "tgui:galactica-1.3b@localhost:7860"
async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
temperature=llm_kwargs['temperature'],
top_p=llm_kwargs['top_p'], addr=addr, port=port):
print(response[len(mutable[0]):]) print(response[len(mutable[0]):])
mutable[0] = response mutable[0] = response
if (time.time() - mutable[1]) > 3: if (time.time() - mutable[1]) > 3:
@ -140,28 +143,29 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt=
chatbot[-1] = (history[-2], history[-1]) chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
logging.info(f'[response] {tgui_say}')
def predict_tgui_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""): def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
raw_input = "What I would like to say is the following: " + inputs raw_input = "What I would like to say is the following: " + inputs
prompt = inputs prompt = raw_input
tgui_say = "" tgui_say = ""
mutable = ["", time.time()] model_name, addr_port = llm_kwargs['llm_model'].split('@')
def run_coorotine(mutable): assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model']
async def get_result(mutable): addr, port = addr_port.split(':')
async for response in run(prompt, max_token=20):
print(response[len(mutable[0]):])
mutable[0] = response def run_coorotine(observe_window):
if (time.time() - mutable[1]) > 3: async def get_result(observe_window):
async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
temperature=llm_kwargs['temperature'],
top_p=llm_kwargs['top_p'], addr=addr, port=port):
print(response[len(observe_window[0]):])
observe_window[0] = response
if (time.time() - observe_window[1]) > 5:
print('exit when no listener') print('exit when no listener')
break break
asyncio.run(get_result(mutable)) asyncio.run(get_result(observe_window))
thread_listen = threading.Thread(target=run_coorotine, args=(mutable,)) thread_listen = threading.Thread(target=run_coorotine, args=(observe_window,))
thread_listen.start() thread_listen.start()
while thread_listen.is_alive(): return observe_window[0]
time.sleep(1)
mutable[1] = time.time()
tgui_say = mutable[0]
return tgui_say

View File

@ -0,0 +1,6 @@
protobuf
transformers==4.27.1
cpm_kernels
torch>=1.10
mdtex2html
sentencepiece

View File

@ -1,4 +1,4 @@
gradio==3.25.0 gradio>=3.25.0
tiktoken>=0.3.3 tiktoken>=0.3.3
requests[socks] requests[socks]
transformers transformers

View File

@ -27,7 +27,7 @@ def ArgsGeneralWrapper(f):
""" """
装饰器函数用于重组输入参数改变输入参数的顺序与结构 装饰器函数用于重组输入参数改变输入参数的顺序与结构
""" """
def decorated(cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args): def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
txt_passon = txt txt_passon = txt
if txt == "" and txt2 != "": txt_passon = txt2 if txt == "" and txt2 != "": txt_passon = txt2
# 引入一个有cookie的chatbot # 引入一个有cookie的chatbot
@ -37,8 +37,9 @@ def ArgsGeneralWrapper(f):
}) })
llm_kwargs = { llm_kwargs = {
'api_key': cookies['api_key'], 'api_key': cookies['api_key'],
'llm_model': cookies['llm_model'], 'llm_model': llm_model,
'top_p':top_p, 'top_p':top_p,
'max_length': max_length,
'temperature':temperature, 'temperature':temperature,
} }
plugin_kwargs = { plugin_kwargs = {
@ -75,66 +76,6 @@ def get_reduce_token_percent(text):
except: except:
return 0.5, '不详' return 0.5, '不详'
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, llm_kwargs, history=[], sys_prompt='', long_connection=True):
"""
* 此函数未来将被弃用替代函数 request_gpt_model_in_new_thread_with_ui_alive 文件 chatgpt_academic/crazy_functions/crazy_utils
调用简单的predict_no_ui接口但是依然保留了些许界面心跳功能当对话太长时会自动采用二分法截断
i_say: 当前输入
i_say_show_user: 显示到对话界面上的当前输入例如输入整个文件时你绝对不想把文件的内容都糊到对话界面上
chatbot: 对话界面句柄
top_p, temperature: gpt参数
history: gpt参数 对话历史
sys_prompt: gpt参数 sys_prompt
long_connection: 是否采用更稳定的连接方式推荐已弃用
"""
import time
from request_llm.bridge_chatgpt import predict_no_ui_long_connection
from toolbox import get_conf
TIMEOUT_SECONDS, MAX_RETRY = get_conf('TIMEOUT_SECONDS', 'MAX_RETRY')
# 多线程的时候需要一个mutable结构在不同线程之间传递信息
# list就是最简单的mutable结构我们第一个位置放gpt输出第二个位置传递报错信息
mutable = [None, '']
# multi-threading worker
def mt(i_say, history):
while True:
try:
mutable[0] = predict_no_ui_long_connection(
inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt)
except ConnectionAbortedError as token_exceeded_error:
# 尝试计算比例,尽可能多地保留文本
p_ratio, n_exceed = get_reduce_token_percent(
str(token_exceeded_error))
if len(history) > 0:
history = [his[int(len(his) * p_ratio):]
for his in history if his is not None]
else:
i_say = i_say[: int(len(i_say) * p_ratio)]
mutable[1] = f'警告文本过长将进行截断Token溢出数{n_exceed},截断比例:{(1-p_ratio):.0%}'
except TimeoutError as e:
mutable[0] = '[Local Message] 请求超时。'
raise TimeoutError
except Exception as e:
mutable[0] = f'[Local Message] 异常:{str(e)}.'
raise RuntimeError(f'[Local Message] 异常:{str(e)}.')
# 创建新线程发出http请求
thread_name = threading.Thread(target=mt, args=(i_say, history))
thread_name.start()
# 原来的线程则负责持续更新UI实现一个超时倒计时并等待新线程的任务完成
cnt = 0
while thread_name.is_alive():
cnt += 1
chatbot[-1] = (i_say_show_user,
f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt % 4)))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
time.sleep(1)
# 把gpt的输出从mutable中取出来
gpt_say = mutable[0]
if gpt_say == '[Local Message] Failed with timeout.':
raise TimeoutError
return gpt_say
def write_results_to_file(history, file_name=None): def write_results_to_file(history, file_name=None):

View File

@ -1,5 +1,5 @@
{ {
"version": 2.68, "version": 3.0,
"show_feature": true, "show_feature": true,
"new_feature": "改善理解pdfchatpdf功能 <-> 修复读取罕见字符的BUG <-> 如果一键更新失败可前往github手动更新" "new_feature": "支持ChatGLM <-> 支持多LLM模型同时对话"
} }