diff --git a/docs/Dockerfile+JittorLLM b/docs/Dockerfile+JittorLLM new file mode 100644 index 0000000..62dae31 --- /dev/null +++ b/docs/Dockerfile+JittorLLM @@ -0,0 +1,59 @@ +# How to build | 如何构建: docker build -t gpt-academic-jittor --network=host -f Dockerfile+ChatGLM . +# How to run | (1) 我想直接一键运行(选择0号GPU): docker run --rm -it --net=host --gpus \"device=0\" gpt-academic-jittor bash +# How to run | (2) 我想运行之前进容器做一些调整(选择1号GPU): docker run --rm -it --net=host --gpus \"device=1\" gpt-academic-jittor bash + +# 从NVIDIA源,从而支持显卡运损(检查宿主的nvidia-smi中的cuda版本必须>=11.3) +FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04 +ARG useProxyNetwork='' +RUN apt-get update +RUN apt-get install -y curl proxychains curl g++ +RUN apt-get install -y git python python3 python-dev python3-dev --fix-missing + +# 配置代理网络(构建Docker镜像时使用) +# # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除 +RUN $useProxyNetwork curl cip.cc +RUN sed -i '$ d' /etc/proxychains.conf +RUN sed -i '$ d' /etc/proxychains.conf +# 在这里填写主机的代理协议(用于从github拉取代码) +RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf +ARG useProxyNetwork=proxychains +# # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除 + + +# use python3 as the system default python +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8 +# 下载pytorch +RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113 +# 下载分支 +WORKDIR /gpt +RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b jittor +WORKDIR /gpt/chatgpt_academic +RUN $useProxyNetwork python3 -m pip install -r requirements.txt +RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt +RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_newbing.txt +RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I + +# 下载JittorLLMs +RUN $useProxyNetwork git clone https://github.com/binary-husky/JittorLLMs.git --depth 1 request_llm/jittorllms + +# 禁用缓存,确保更新代码 +ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache +RUN $useProxyNetwork git pull + +# 预热Tiktoken模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 为chatgpt-academic配置代理和API-KEY (非必要 可选步骤) +# 可同时填写多个API-KEY,支持openai的key和api2d的key共存,用英文逗号分割,例如API_KEY = "sk-openaikey1,fkxxxx-api2dkey2,........" +# LLM_MODEL 是选择初始的模型 +# LOCAL_MODEL_DEVICE 是选择chatglm等本地模型运行的设备,可选 cpu 和 cuda +# [说明: 以下内容与`config.py`一一对应,请查阅config.py来完成一下配置的填写] +RUN echo ' \n\ +API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,fkxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \n\ +USE_PROXY = True \n\ +LLM_MODEL = "chatglm" \n\ +LOCAL_MODEL_DEVICE = "cuda" \n\ +proxies = { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' >> config_private.py + +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index fddc9a7..f42ee9f 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -133,6 +133,51 @@ model_info = { } +AVAIL_LLM_MODELS, = get_conf("AVAIL_LLM_MODELS") +if "jittorllms_rwkv" in AVAIL_LLM_MODELS: + from .bridge_jittorllms_rwkv import predict_no_ui_long_connection as rwkv_noui + from .bridge_jittorllms_rwkv import predict as rwkv_ui + model_info.update({ + "jittorllms_rwkv": { + "fn_with_ui": rwkv_ui, + "fn_without_ui": rwkv_noui, + "endpoint": None, + "max_token": 1024, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + }) +if "jittorllms_llama" in AVAIL_LLM_MODELS: + from .bridge_jittorllms_llama import predict_no_ui_long_connection as llama_noui + from .bridge_jittorllms_llama import predict as llama_ui + model_info.update({ + "jittorllms_llama": { + "fn_with_ui": llama_ui, + "fn_without_ui": llama_noui, + "endpoint": None, + "max_token": 1024, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + }) +if "jittorllms_pangualpha" in AVAIL_LLM_MODELS: + from .bridge_jittorllms_pangualpha import predict_no_ui_long_connection as pangualpha_noui + from .bridge_jittorllms_pangualpha import predict as pangualpha_ui + model_info.update({ + "jittorllms_pangualpha": { + "fn_with_ui": pangualpha_ui, + "fn_without_ui": pangualpha_noui, + "endpoint": None, + "max_token": 1024, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + }) + + + + + def LLM_CATCH_EXCEPTION(f): """ 装饰器函数,将错误显示出来 diff --git a/request_llm/bridge_jittorllms_llama.py b/request_llm/bridge_jittorllms_llama.py new file mode 100644 index 0000000..6dfac68 --- /dev/null +++ b/request_llm/bridge_jittorllms_llama.py @@ -0,0 +1,178 @@ + +from transformers import AutoModel, AutoTokenizer +import time +import threading +import importlib +from toolbox import update_ui, get_conf +from multiprocessing import Process, Pipe + +load_message = "jittorllms尚未加载,加载需要一段时间。注意,请避免混用多种jittor模型,否则可能导致显存溢出而造成卡顿,取决于`config.py`的配置,jittorllms消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……" + +################################################################################# +class GetGLMHandle(Process): + def __init__(self): + super().__init__(daemon=True) + self.parent, self.child = Pipe() + self.jittorllms_model = None + self.info = "" + self.local_history = [] + self.success = True + self.check_dependency() + self.start() + self.threadLock = threading.Lock() + + def check_dependency(self): + try: + import pandas + self.info = "依赖检测通过" + self.success = True + except: + from toolbox import trimmed_format_exc + self.info = r"缺少jittorllms的依赖,如果要使用jittorllms,除了基础的pip依赖以外,您还需要运行`pip install -r request_llm/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\ + r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llm/jittorllms`两个指令来安装jittorllms的依赖(在项目根目录运行这两个指令)。" +\ + r"警告:安装jittorllms依赖后将完全破坏现有的pytorch环境,建议使用docker环境!" + trimmed_format_exc() + self.success = False + + def ready(self): + return self.jittorllms_model is not None + + def run(self): + # 子进程执行 + # 第一次运行,加载参数 + def validate_path(): + import os, sys + dir_name = os.path.dirname(__file__) + env = os.environ.get("PATH", "") + os.environ["PATH"] = env.replace('/cuda/bin', '/x/bin') + root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..') + os.chdir(root_dir_assume + '/request_llm/jittorllms') + sys.path.append(root_dir_assume + '/request_llm/jittorllms') + validate_path() # validate path so you can run from base directory + + def load_model(): + import types + try: + if self.jittorllms_model is None: + device, = get_conf('LOCAL_MODEL_DEVICE') + from .jittorllms.models import get_model + # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"] + args_dict = {'model': 'llama'} + print('self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict))') + self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict)) + print('done get model') + except: + self.child.send('[Local Message] Call jittorllms fail 不能正常加载jittorllms的参数。') + raise RuntimeError("不能正常加载jittorllms的参数!") + print('load_model') + load_model() + + # 进入任务等待状态 + print('进入任务等待状态') + while True: + # 进入任务等待状态 + kwargs = self.child.recv() + query = kwargs['query'] + history = kwargs['history'] + # 是否重置 + if len(self.local_history) > 0 and len(history)==0: + print('触发重置') + self.jittorllms_model.reset() + self.local_history.append(query) + + print('收到消息,开始请求') + try: + for response in self.jittorllms_model.stream_chat(query, history): + print(response) + self.child.send(response) + except: + from toolbox import trimmed_format_exc + print(trimmed_format_exc()) + self.child.send('[Local Message] Call jittorllms fail.') + # 请求处理结束,开始下一个循环 + self.child.send('[Finish]') + + def stream_chat(self, **kwargs): + # 主进程执行 + self.threadLock.acquire() + self.parent.send(kwargs) + while True: + res = self.parent.recv() + if res != '[Finish]': + yield res + else: + break + self.threadLock.release() + +global llama_glm_handle +llama_glm_handle = None +################################################################################# +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): + """ + 多线程方法 + 函数的说明请见 request_llm/bridge_all.py + """ + global llama_glm_handle + if llama_glm_handle is None: + llama_glm_handle = GetGLMHandle() + if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + llama_glm_handle.info + if not llama_glm_handle.success: + error = llama_glm_handle.info + llama_glm_handle = None + raise RuntimeError(error) + + # jittorllms 没有 sys_prompt 接口,因此把prompt加入 history + history_feedin = [] + for i in range(len(history)//2): + history_feedin.append([history[2*i], history[2*i+1]] ) + + watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可 + response = "" + for response in llama_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + print(response) + if len(observe_window) >= 1: observe_window[0] = response + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: + raise RuntimeError("程序终止。") + return response + + + +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + 单线程方法 + 函数的说明请见 request_llm/bridge_all.py + """ + chatbot.append((inputs, "")) + + global llama_glm_handle + if llama_glm_handle is None: + llama_glm_handle = GetGLMHandle() + chatbot[-1] = (inputs, load_message + "\n\n" + llama_glm_handle.info) + yield from update_ui(chatbot=chatbot, history=[]) + if not llama_glm_handle.success: + llama_glm_handle = None + return + + if additional_fn is not None: + import core_functional + importlib.reload(core_functional) # 热更新prompt + core_functional = core_functional.get_core_functions() + if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) + inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] + + # 处理历史信息 + history_feedin = [] + for i in range(len(history)//2): + history_feedin.append([history[2*i], history[2*i+1]] ) + + # 开始接收jittorllms的回复 + response = "[Local Message]: 等待jittorllms响应中 ..." + for response in llama_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + chatbot[-1] = (inputs, response) + yield from update_ui(chatbot=chatbot, history=history) + + # 总结输出 + if response == "[Local Message]: 等待jittorllms响应中 ...": + response = "[Local Message]: jittorllms响应异常 ..." + history.extend([inputs, response]) + yield from update_ui(chatbot=chatbot, history=history) diff --git a/request_llm/bridge_jittorllms_pangualpha.py b/request_llm/bridge_jittorllms_pangualpha.py new file mode 100644 index 0000000..ad02565 --- /dev/null +++ b/request_llm/bridge_jittorllms_pangualpha.py @@ -0,0 +1,178 @@ + +from transformers import AutoModel, AutoTokenizer +import time +import threading +import importlib +from toolbox import update_ui, get_conf +from multiprocessing import Process, Pipe + +load_message = "jittorllms尚未加载,加载需要一段时间。注意,请避免混用多种jittor模型,否则可能导致显存溢出而造成卡顿,取决于`config.py`的配置,jittorllms消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……" + +################################################################################# +class GetGLMHandle(Process): + def __init__(self): + super().__init__(daemon=True) + self.parent, self.child = Pipe() + self.jittorllms_model = None + self.info = "" + self.local_history = [] + self.success = True + self.check_dependency() + self.start() + self.threadLock = threading.Lock() + + def check_dependency(self): + try: + import pandas + self.info = "依赖检测通过" + self.success = True + except: + from toolbox import trimmed_format_exc + self.info = r"缺少jittorllms的依赖,如果要使用jittorllms,除了基础的pip依赖以外,您还需要运行`pip install -r request_llm/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\ + r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llm/jittorllms`两个指令来安装jittorllms的依赖(在项目根目录运行这两个指令)。" +\ + r"警告:安装jittorllms依赖后将完全破坏现有的pytorch环境,建议使用docker环境!" + trimmed_format_exc() + self.success = False + + def ready(self): + return self.jittorllms_model is not None + + def run(self): + # 子进程执行 + # 第一次运行,加载参数 + def validate_path(): + import os, sys + dir_name = os.path.dirname(__file__) + env = os.environ.get("PATH", "") + os.environ["PATH"] = env.replace('/cuda/bin', '/x/bin') + root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..') + os.chdir(root_dir_assume + '/request_llm/jittorllms') + sys.path.append(root_dir_assume + '/request_llm/jittorllms') + validate_path() # validate path so you can run from base directory + + def load_model(): + import types + try: + if self.jittorllms_model is None: + device, = get_conf('LOCAL_MODEL_DEVICE') + from .jittorllms.models import get_model + # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"] + args_dict = {'model': 'pangualpha'} + print('self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict))') + self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict)) + print('done get model') + except: + self.child.send('[Local Message] Call jittorllms fail 不能正常加载jittorllms的参数。') + raise RuntimeError("不能正常加载jittorllms的参数!") + print('load_model') + load_model() + + # 进入任务等待状态 + print('进入任务等待状态') + while True: + # 进入任务等待状态 + kwargs = self.child.recv() + query = kwargs['query'] + history = kwargs['history'] + # 是否重置 + if len(self.local_history) > 0 and len(history)==0: + print('触发重置') + self.jittorllms_model.reset() + self.local_history.append(query) + + print('收到消息,开始请求') + try: + for response in self.jittorllms_model.stream_chat(query, history): + print(response) + self.child.send(response) + except: + from toolbox import trimmed_format_exc + print(trimmed_format_exc()) + self.child.send('[Local Message] Call jittorllms fail.') + # 请求处理结束,开始下一个循环 + self.child.send('[Finish]') + + def stream_chat(self, **kwargs): + # 主进程执行 + self.threadLock.acquire() + self.parent.send(kwargs) + while True: + res = self.parent.recv() + if res != '[Finish]': + yield res + else: + break + self.threadLock.release() + +global pangu_glm_handle +pangu_glm_handle = None +################################################################################# +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): + """ + 多线程方法 + 函数的说明请见 request_llm/bridge_all.py + """ + global pangu_glm_handle + if pangu_glm_handle is None: + pangu_glm_handle = GetGLMHandle() + if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + pangu_glm_handle.info + if not pangu_glm_handle.success: + error = pangu_glm_handle.info + pangu_glm_handle = None + raise RuntimeError(error) + + # jittorllms 没有 sys_prompt 接口,因此把prompt加入 history + history_feedin = [] + for i in range(len(history)//2): + history_feedin.append([history[2*i], history[2*i+1]] ) + + watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可 + response = "" + for response in pangu_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + print(response) + if len(observe_window) >= 1: observe_window[0] = response + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: + raise RuntimeError("程序终止。") + return response + + + +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + 单线程方法 + 函数的说明请见 request_llm/bridge_all.py + """ + chatbot.append((inputs, "")) + + global pangu_glm_handle + if pangu_glm_handle is None: + pangu_glm_handle = GetGLMHandle() + chatbot[-1] = (inputs, load_message + "\n\n" + pangu_glm_handle.info) + yield from update_ui(chatbot=chatbot, history=[]) + if not pangu_glm_handle.success: + pangu_glm_handle = None + return + + if additional_fn is not None: + import core_functional + importlib.reload(core_functional) # 热更新prompt + core_functional = core_functional.get_core_functions() + if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) + inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] + + # 处理历史信息 + history_feedin = [] + for i in range(len(history)//2): + history_feedin.append([history[2*i], history[2*i+1]] ) + + # 开始接收jittorllms的回复 + response = "[Local Message]: 等待jittorllms响应中 ..." + for response in pangu_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + chatbot[-1] = (inputs, response) + yield from update_ui(chatbot=chatbot, history=history) + + # 总结输出 + if response == "[Local Message]: 等待jittorllms响应中 ...": + response = "[Local Message]: jittorllms响应异常 ..." + history.extend([inputs, response]) + yield from update_ui(chatbot=chatbot, history=history) diff --git a/request_llm/bridge_jittorllms.py b/request_llm/bridge_jittorllms_rwkv.py similarity index 62% rename from request_llm/bridge_jittorllms.py rename to request_llm/bridge_jittorllms_rwkv.py index 28d0a7a..1252eea 100644 --- a/request_llm/bridge_jittorllms.py +++ b/request_llm/bridge_jittorllms_rwkv.py @@ -6,7 +6,7 @@ import importlib from toolbox import update_ui, get_conf from multiprocessing import Process, Pipe -load_message = "jittorllms尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,jittorllms消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……" +load_message = "jittorllms尚未加载,加载需要一段时间。注意,请避免混用多种jittor模型,否则可能导致显存溢出而造成卡顿,取决于`config.py`的配置,jittorllms消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……" ################################################################################# class GetGLMHandle(Process): @@ -15,6 +15,7 @@ class GetGLMHandle(Process): self.parent, self.child = Pipe() self.jittorllms_model = None self.info = "" + self.local_history = [] self.success = True self.check_dependency() self.start() @@ -22,13 +23,14 @@ class GetGLMHandle(Process): def check_dependency(self): try: - import jittor - from .jittorllms.models import get_model + import pandas self.info = "依赖检测通过" self.success = True except: - self.info = r"缺少jittorllms的依赖,如果要使用jittorllms,除了基础的pip依赖以外,您还需要运行`pip install -r request_llm/requirements_jittorllms.txt`"+\ - r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llm/jittorllms`两个指令来安装jittorllms的依赖(在项目根目录运行这两个指令)。" + from toolbox import trimmed_format_exc + self.info = r"缺少jittorllms的依赖,如果要使用jittorllms,除了基础的pip依赖以外,您还需要运行`pip install -r request_llm/requirements_jittorllms.txt -i https://pypi.jittor.org/simple -I`"+\ + r"和`git clone https://gitlink.org.cn/jittor/JittorLLMs.git --depth 1 request_llm/jittorllms`两个指令来安装jittorllms的依赖(在项目根目录运行这两个指令)。" +\ + r"警告:安装jittorllms依赖后将完全破坏现有的pytorch环境,建议使用docker环境!" + trimmed_format_exc() self.success = False def ready(self): @@ -37,6 +39,16 @@ class GetGLMHandle(Process): def run(self): # 子进程执行 # 第一次运行,加载参数 + def validate_path(): + import os, sys + dir_name = os.path.dirname(__file__) + env = os.environ.get("PATH", "") + os.environ["PATH"] = env.replace('/cuda/bin', '/x/bin') + root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..') + os.chdir(root_dir_assume + '/request_llm/jittorllms') + sys.path.append(root_dir_assume + '/request_llm/jittorllms') + validate_path() # validate path so you can run from base directory + def load_model(): import types try: @@ -44,23 +56,37 @@ class GetGLMHandle(Process): device, = get_conf('LOCAL_MODEL_DEVICE') from .jittorllms.models import get_model # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"] - args_dict = {'model': 'chatglm', 'RUN_DEVICE':'cpu'} + args_dict = {'model': 'chatrwkv'} + print('self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict))') self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict)) + print('done get model') except: self.child.send('[Local Message] Call jittorllms fail 不能正常加载jittorllms的参数。') raise RuntimeError("不能正常加载jittorllms的参数!") - + print('load_model') load_model() # 进入任务等待状态 + print('进入任务等待状态') while True: # 进入任务等待状态 kwargs = self.child.recv() - # 收到消息,开始请求 + query = kwargs['query'] + history = kwargs['history'] + # 是否重置 + if len(self.local_history) > 0 and len(history)==0: + print('触发重置') + self.jittorllms_model.reset() + self.local_history.append(query) + + print('收到消息,开始请求') try: - for response, history in self.jittorllms_model.run_web_demo(kwargs['query'], kwargs['history']): + for response in self.jittorllms_model.stream_chat(query, history): + print(response) self.child.send(response) except: + from toolbox import trimmed_format_exc + print(trimmed_format_exc()) self.child.send('[Local Message] Call jittorllms fail.') # 请求处理结束,开始下一个循环 self.child.send('[Finish]') @@ -77,32 +103,32 @@ class GetGLMHandle(Process): break self.threadLock.release() -global glm_handle -glm_handle = None +global rwkv_glm_handle +rwkv_glm_handle = None ################################################################################# def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): """ 多线程方法 函数的说明请见 request_llm/bridge_all.py """ - global glm_handle - if glm_handle is None: - glm_handle = GetGLMHandle() - if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + glm_handle.info - if not glm_handle.success: - error = glm_handle.info - glm_handle = None + global rwkv_glm_handle + if rwkv_glm_handle is None: + rwkv_glm_handle = GetGLMHandle() + if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + rwkv_glm_handle.info + if not rwkv_glm_handle.success: + error = rwkv_glm_handle.info + rwkv_glm_handle = None raise RuntimeError(error) # jittorllms 没有 sys_prompt 接口,因此把prompt加入 history history_feedin = [] - history_feedin.append(["What can I do?", sys_prompt]) for i in range(len(history)//2): history_feedin.append([history[2*i], history[2*i+1]] ) watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可 response = "" - for response in glm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + for response in rwkv_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + print(response) if len(observe_window) >= 1: observe_window[0] = response if len(observe_window) >= 2: if (time.time()-observe_window[1]) > watch_dog_patience: @@ -118,13 +144,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp """ chatbot.append((inputs, "")) - global glm_handle - if glm_handle is None: - glm_handle = GetGLMHandle() - chatbot[-1] = (inputs, load_message + "\n\n" + glm_handle.info) + global rwkv_glm_handle + if rwkv_glm_handle is None: + rwkv_glm_handle = GetGLMHandle() + chatbot[-1] = (inputs, load_message + "\n\n" + rwkv_glm_handle.info) yield from update_ui(chatbot=chatbot, history=[]) - if not glm_handle.success: - glm_handle = None + if not rwkv_glm_handle.success: + rwkv_glm_handle = None return if additional_fn is not None: @@ -136,13 +162,12 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp # 处理历史信息 history_feedin = [] - history_feedin.append(["What can I do?", system_prompt] ) for i in range(len(history)//2): history_feedin.append([history[2*i], history[2*i+1]] ) # 开始接收jittorllms的回复 response = "[Local Message]: 等待jittorllms响应中 ..." - for response in glm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): + for response in rwkv_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']): chatbot[-1] = (inputs, response) yield from update_ui(chatbot=chatbot, history=history) diff --git a/request_llm/requirements_jittorllms.txt b/request_llm/requirements_jittorllms.txt index 3713ce8..1d86ff8 100644 --- a/request_llm/requirements_jittorllms.txt +++ b/request_llm/requirements_jittorllms.txt @@ -1,4 +1,7 @@ jittor >= 1.3.7.9 jtorch >= 0.1.3 torch -torchvision \ No newline at end of file +torchvision +transformers==4.26.1 +pandas +jieba \ No newline at end of file diff --git a/request_llm/test_llms.py b/request_llm/test_llms.py index d043d62..bc54e13 100644 --- a/request_llm/test_llms.py +++ b/request_llm/test_llms.py @@ -1,6 +1,6 @@ -""" -对各个llm模型进行单元测试 -""" +# """ +# 对各个llm模型进行单元测试 +# """ def validate_path(): import os, sys dir_name = os.path.dirname(__file__) @@ -10,7 +10,9 @@ def validate_path(): validate_path() # validate path so you can run from base directory -from request_llm.bridge_jittorllms import predict_no_ui_long_connection +from request_llm.bridge_jittorllms_rwkv import predict_no_ui_long_connection +# from request_llm.bridge_jittorllms_pangualpha import predict_no_ui_long_connection +# from request_llm.bridge_jittorllms_llama import predict_no_ui_long_connection llm_kwargs = { 'max_length': 512, @@ -22,5 +24,54 @@ result = predict_no_ui_long_connection(inputs="你好", llm_kwargs=llm_kwargs, history=[], sys_prompt="") +print('final result:', result) -print('result') \ No newline at end of file + +result = predict_no_ui_long_connection(inputs="what is a hero?", + llm_kwargs=llm_kwargs, + history=["hello world"], + sys_prompt="") +print('final result:', result) + +result = predict_no_ui_long_connection(inputs="如何理解传奇?", + llm_kwargs=llm_kwargs, + history=[], + sys_prompt="") +print('final result:', result) + +# # print(result) +# from multiprocessing import Process, Pipe +# class GetGLMHandle(Process): +# def __init__(self): +# super().__init__(daemon=True) +# pass +# def run(self): +# # 子进程执行 +# # 第一次运行,加载参数 +# def validate_path(): +# import os, sys +# dir_name = os.path.dirname(__file__) +# root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..') +# os.chdir(root_dir_assume + '/request_llm/jittorllms') +# sys.path.append(root_dir_assume + '/request_llm/jittorllms') +# validate_path() # validate path so you can run from base directory + +# jittorllms_model = None +# import types +# try: +# if jittorllms_model is None: +# from models import get_model +# # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"] +# args_dict = {'model': 'chatrwkv'} +# print('self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict))') +# jittorllms_model = get_model(types.SimpleNamespace(**args_dict)) +# print('done get model') +# except: +# # self.child.send('[Local Message] Call jittorllms fail 不能正常加载jittorllms的参数。') +# raise RuntimeError("不能正常加载jittorllms的参数!") + +# x = GetGLMHandle() +# x.start() + + +# input() \ No newline at end of file