diff --git a/config.py b/config.py index 1b397ba..803129f 100644 --- a/config.py +++ b/config.py @@ -1,5 +1,5 @@ # [step 1]>> 例如: API_KEY = "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" (此key无效) -API_KEY = "sk-此处填API密钥" +API_KEY = "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" # [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改 USE_PROXY = False diff --git a/predict.py b/predict.py index 10e58bb..1310d3f 100644 --- a/predict.py +++ b/predict.py @@ -244,7 +244,10 @@ def generate_payload(inputs, top_p, temperature, history, system_prompt, stream) if not LLM_MODEL.startswith('gpt'): - from request_llm.bridge_tgui import predict_tgui + # 函数重载到另一个文件 + from request_llm.bridge_tgui import predict_tgui, predict_tgui_no_ui predict = predict_tgui + predict_no_ui = predict_tgui_no_ui + predict_no_ui_long_connection = predict_tgui_no_ui \ No newline at end of file diff --git a/request_llm/README.md b/request_llm/README.md new file mode 100644 index 0000000..26f0dde --- /dev/null +++ b/request_llm/README.md @@ -0,0 +1,39 @@ +# 如何使用其他大语言模型 + +## 1. 先运行text-generation +``` sh +# 下载模型 +git clone https://github.com/oobabooga/text-generation-webui.git + +# 安装text-generation的额外依赖 +pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers + +# 切换路径 +cd text-generation-webui + +# 下载模型 +python download-model.py facebook/opt-1.3b + +# 其他可选如 facebook/galactica-1.3b +# facebook/galactica-6.7b +# facebook/galactica-120b + +# Pymalion 6B is a proof-of-concept dialogue model based on EleutherAI's GPT-J-6B. +# facebook/pygmalion-1.3b + +# 启动text-generation,注意把模型的斜杠改成下划线 +python server.py --cpu --listen --listen-port 7860 --model facebook_galactica-1.3b +``` + +## 2. 修改config.py +``` +# LLM_MODEL格式为 [模型]@[ws地址] @[ws端口] +LLM_MODEL = "pygmalion-1.3b@localhost@7860" +``` + + +## 3. 运行! +``` +cd chatgpt-academic +python main.py +``` diff --git a/request_llm/bridge_tgui.py b/request_llm/bridge_tgui.py index 37f3826..1c7103f 100644 --- a/request_llm/bridge_tgui.py +++ b/request_llm/bridge_tgui.py @@ -11,6 +11,7 @@ import websockets import logging import time import threading +import importlib from toolbox import get_conf LLM_MODEL, = get_conf('LLM_MODEL') @@ -22,7 +23,7 @@ def random_hash(): async def run(context): params = { - 'max_new_tokens': 200, + 'max_new_tokens': 1024, 'do_sample': True, 'temperature': 0.5, 'top_p': 0.9, @@ -103,9 +104,10 @@ def predict_tgui(inputs, top_p, temperature, chatbot=[], history=[], system_prom if "PreProcess" in functional[additional_fn]: inputs = functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) inputs = functional[additional_fn]["Prefix"] + inputs + functional[additional_fn]["Suffix"] - raw_input = inputs + raw_input = "What I would like to say is the following: " + inputs logging.info(f'[raw_input] {raw_input}') - chatbot.append((inputs, "")) + history.extend([inputs, ""]) + chatbot.append([inputs, ""]) yield chatbot, history, "等待响应" prompt = inputs @@ -113,11 +115,11 @@ def predict_tgui(inputs, top_p, temperature, chatbot=[], history=[], system_prom mutable = [""] def run_coorotine(mutable): - async def get_result(): + async def get_result(mutable): async for response in run(prompt): # Print intermediate steps - mutable += response - asyncio.run(get_result()) + mutable[0] = response + asyncio.run(get_result(mutable)) thread_listen = threading.Thread(target=run_coorotine, args=(mutable,)) thread_listen.start() @@ -129,9 +131,25 @@ def predict_tgui(inputs, top_p, temperature, chatbot=[], history=[], system_prom tgui_say = mutable[0] history[-1] = tgui_say chatbot[-1] = (history[-2], history[-1]) - yield chatbot, history, status_text + yield chatbot, history, "status_text" logging.info(f'[response] {tgui_say}') - \ No newline at end of file + +def predict_tgui_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""): + raw_input = "What I would like to say is the following: " + inputs + prompt = inputs + tgui_say = "" + mutable = [""] + def run_coorotine(mutable): + async def get_result(mutable): + async for response in run(prompt): + # Print intermediate steps + mutable[0] = response + asyncio.run(get_result(mutable)) + thread_listen = threading.Thread(target=run_coorotine, args=(mutable,)) + thread_listen.start() + thread_listen.join() + tgui_say = mutable[0] + return tgui_say