支持chatglm3

This commit is contained in:
binary-husky 2023-10-31 03:08:50 +08:00
parent 9a1aff5bb6
commit 08f036aafd
4 changed files with 139 additions and 13 deletions

View File

@ -19,8 +19,8 @@ from .bridge_chatgpt import predict as chatgpt_ui
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
from .bridge_chatglm import predict as chatglm_ui
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
from .bridge_chatglm import predict as chatglm_ui
from .bridge_chatglm3 import predict_no_ui_long_connection as chatglm3_noui
from .bridge_chatglm3 import predict as chatglm3_ui
from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui
from .bridge_qianfan import predict as qianfan_ui
@ -208,6 +208,14 @@ model_info = {
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"chatglm3": {
"fn_with_ui": chatglm3_ui,
"fn_without_ui": chatglm3_noui,
"endpoint": None,
"max_token": 8192,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qianfan": {
"fn_with_ui": qianfan_ui,
"fn_without_ui": qianfan_noui,

View File

@ -0,0 +1,78 @@
model_name = "ChatGLM3"
cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
from transformers import AutoModel, AutoTokenizer
from toolbox import get_conf, ProxyNetworkActivate
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
@SingletonLocalLLM
class GetONNXGLMHandle(LocalLLMHandle):
def load_model_info(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
self.model_name = model_name
self.cmd_to_install = cmd_to_install
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
import os, glob
import os
import platform
LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
if LOCAL_MODEL_QUANT == "INT4": # INT4
_model_name_ = "THUDM/chatglm3-6b-int4"
elif LOCAL_MODEL_QUANT == "INT8": # INT8
_model_name_ = "THUDM/chatglm3-6b-int8"
else:
_model_name_ = "THUDM/chatglm3-6b" # FP16
with ProxyNetworkActivate('Download_LLM'):
chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
if device=='cpu':
chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cpu').float()
else:
chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cuda')
chatglm_model = chatglm_model.eval()
self._model = chatglm_model
self._tokenizer = chatglm_tokenizer
return self._model, self._tokenizer
def llm_stream_generator(self, **kwargs):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
def adaptor(kwargs):
query = kwargs['query']
max_length = kwargs['max_length']
top_p = kwargs['top_p']
temperature = kwargs['temperature']
history = kwargs['history']
return query, max_length, top_p, temperature, history
query, max_length, top_p, temperature, history = adaptor(kwargs)
for response, history in self._model.stream_chat(self._tokenizer,
query,
history,
max_length=max_length,
top_p=top_p,
temperature=temperature,
):
yield response
def try_to_import_special_deps(self, **kwargs):
# import something that will raise error if the user does not install requirement_*.txt
# 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
import importlib
importlib.import_module('modelscope')
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 GPT-Academic Interface
# ------------------------------------------------------------------------------------------------------------------------
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name, history_format='chatglm3')

View File

@ -114,7 +114,7 @@ class LocalLLMHandle(Process):
def get_local_llm_predict_fns(LLMSingletonClass, model_name):
def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'):
load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……"
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
@ -126,11 +126,30 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info
if not _llm_handle.running: raise RuntimeError(_llm_handle.info)
# chatglm 没有 sys_prompt 接口因此把prompt加入 history
history_feedin = []
history_feedin.append([sys_prompt, "Certainly!"])
for i in range(len(history)//2):
history_feedin.append([history[2*i], history[2*i+1]] )
if history_format == 'classic':
# 没有 sys_prompt 接口因此把prompt加入 history
history_feedin = []
history_feedin.append([sys_prompt, "Certainly!"])
for i in range(len(history)//2):
history_feedin.append([history[2*i], history[2*i+1]] )
elif history_format == 'chatglm3':
# 有 sys_prompt 接口
conversation_cnt = len(history) // 2
history_feedin = [{"role": "system", "content": sys_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue
history_feedin.append(what_i_have_asked)
history_feedin.append(what_gpt_answer)
else:
history_feedin[-1]['content'] = what_gpt_answer['content']
watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
response = ""
@ -160,10 +179,30 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
# 处理历史信息
history_feedin = []
history_feedin.append([system_prompt, "Certainly!"])
for i in range(len(history)//2):
history_feedin.append([history[2*i], history[2*i+1]] )
if history_format == 'classic':
# 没有 sys_prompt 接口因此把prompt加入 history
history_feedin = []
history_feedin.append([system_prompt, "Certainly!"])
for i in range(len(history)//2):
history_feedin.append([history[2*i], history[2*i+1]] )
elif history_format == 'chatglm3':
# 有 sys_prompt 接口
conversation_cnt = len(history) // 2
history_feedin = [{"role": "system", "content": system_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue
history_feedin.append(what_i_have_asked)
history_feedin.append(what_gpt_answer)
else:
history_feedin[-1]['content'] = what_gpt_answer['content']
# 开始接收回复
response = f"[Local Message] 等待{model_name}响应中 ..."

View File

@ -18,7 +18,8 @@ if __name__ == "__main__":
# from request_llms.bridge_internlm import predict_no_ui_long_connection
# from request_llms.bridge_qwen import predict_no_ui_long_connection
# from request_llms.bridge_spark import predict_no_ui_long_connection
from request_llms.bridge_zhipu import predict_no_ui_long_connection
# from request_llms.bridge_zhipu import predict_no_ui_long_connection
from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
llm_kwargs = {
'max_length': 4096,