支持chatglm3
This commit is contained in:
parent
9a1aff5bb6
commit
08f036aafd
@ -19,8 +19,8 @@ from .bridge_chatgpt import predict as chatgpt_ui
|
||||
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
|
||||
from .bridge_chatglm import predict as chatglm_ui
|
||||
|
||||
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
|
||||
from .bridge_chatglm import predict as chatglm_ui
|
||||
from .bridge_chatglm3 import predict_no_ui_long_connection as chatglm3_noui
|
||||
from .bridge_chatglm3 import predict as chatglm3_ui
|
||||
|
||||
from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui
|
||||
from .bridge_qianfan import predict as qianfan_ui
|
||||
@ -208,6 +208,14 @@ model_info = {
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
"chatglm3": {
|
||||
"fn_with_ui": chatglm3_ui,
|
||||
"fn_without_ui": chatglm3_noui,
|
||||
"endpoint": None,
|
||||
"max_token": 8192,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
"qianfan": {
|
||||
"fn_with_ui": qianfan_ui,
|
||||
"fn_without_ui": qianfan_noui,
|
||||
|
78
request_llms/bridge_chatglm3.py
Normal file
78
request_llms/bridge_chatglm3.py
Normal file
@ -0,0 +1,78 @@
|
||||
model_name = "ChatGLM3"
|
||||
cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
|
||||
|
||||
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
from toolbox import get_conf, ProxyNetworkActivate
|
||||
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
|
||||
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 Local Model
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
@SingletonLocalLLM
|
||||
class GetONNXGLMHandle(LocalLLMHandle):
|
||||
|
||||
def load_model_info(self):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
self.model_name = model_name
|
||||
self.cmd_to_install = cmd_to_install
|
||||
|
||||
def load_model_and_tokenizer(self):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
import os, glob
|
||||
import os
|
||||
import platform
|
||||
LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
|
||||
|
||||
if LOCAL_MODEL_QUANT == "INT4": # INT4
|
||||
_model_name_ = "THUDM/chatglm3-6b-int4"
|
||||
elif LOCAL_MODEL_QUANT == "INT8": # INT8
|
||||
_model_name_ = "THUDM/chatglm3-6b-int8"
|
||||
else:
|
||||
_model_name_ = "THUDM/chatglm3-6b" # FP16
|
||||
with ProxyNetworkActivate('Download_LLM'):
|
||||
chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
|
||||
if device=='cpu':
|
||||
chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cpu').float()
|
||||
else:
|
||||
chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cuda')
|
||||
chatglm_model = chatglm_model.eval()
|
||||
|
||||
self._model = chatglm_model
|
||||
self._tokenizer = chatglm_tokenizer
|
||||
return self._model, self._tokenizer
|
||||
|
||||
def llm_stream_generator(self, **kwargs):
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
def adaptor(kwargs):
|
||||
query = kwargs['query']
|
||||
max_length = kwargs['max_length']
|
||||
top_p = kwargs['top_p']
|
||||
temperature = kwargs['temperature']
|
||||
history = kwargs['history']
|
||||
return query, max_length, top_p, temperature, history
|
||||
|
||||
query, max_length, top_p, temperature, history = adaptor(kwargs)
|
||||
|
||||
for response, history in self._model.stream_chat(self._tokenizer,
|
||||
query,
|
||||
history,
|
||||
max_length=max_length,
|
||||
top_p=top_p,
|
||||
temperature=temperature,
|
||||
):
|
||||
yield response
|
||||
|
||||
def try_to_import_special_deps(self, **kwargs):
|
||||
# import something that will raise error if the user does not install requirement_*.txt
|
||||
# 🏃♂️🏃♂️🏃♂️ 主进程执行
|
||||
import importlib
|
||||
importlib.import_module('modelscope')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 GPT-Academic Interface
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name, history_format='chatglm3')
|
@ -114,7 +114,7 @@ class LocalLLMHandle(Process):
|
||||
|
||||
|
||||
|
||||
def get_local_llm_predict_fns(LLMSingletonClass, model_name):
|
||||
def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'):
|
||||
load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……"
|
||||
|
||||
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
|
||||
@ -126,11 +126,30 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
|
||||
if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info
|
||||
if not _llm_handle.running: raise RuntimeError(_llm_handle.info)
|
||||
|
||||
# chatglm 没有 sys_prompt 接口,因此把prompt加入 history
|
||||
history_feedin = []
|
||||
history_feedin.append([sys_prompt, "Certainly!"])
|
||||
for i in range(len(history)//2):
|
||||
history_feedin.append([history[2*i], history[2*i+1]] )
|
||||
if history_format == 'classic':
|
||||
# 没有 sys_prompt 接口,因此把prompt加入 history
|
||||
history_feedin = []
|
||||
history_feedin.append([sys_prompt, "Certainly!"])
|
||||
for i in range(len(history)//2):
|
||||
history_feedin.append([history[2*i], history[2*i+1]] )
|
||||
elif history_format == 'chatglm3':
|
||||
# 有 sys_prompt 接口
|
||||
conversation_cnt = len(history) // 2
|
||||
history_feedin = [{"role": "system", "content": sys_prompt}]
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2*conversation_cnt, 2):
|
||||
what_i_have_asked = {}
|
||||
what_i_have_asked["role"] = "user"
|
||||
what_i_have_asked["content"] = history[index]
|
||||
what_gpt_answer = {}
|
||||
what_gpt_answer["role"] = "assistant"
|
||||
what_gpt_answer["content"] = history[index+1]
|
||||
if what_i_have_asked["content"] != "":
|
||||
if what_gpt_answer["content"] == "": continue
|
||||
history_feedin.append(what_i_have_asked)
|
||||
history_feedin.append(what_gpt_answer)
|
||||
else:
|
||||
history_feedin[-1]['content'] = what_gpt_answer['content']
|
||||
|
||||
watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
|
||||
response = ""
|
||||
@ -160,10 +179,30 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
|
||||
# 处理历史信息
|
||||
history_feedin = []
|
||||
history_feedin.append([system_prompt, "Certainly!"])
|
||||
for i in range(len(history)//2):
|
||||
history_feedin.append([history[2*i], history[2*i+1]] )
|
||||
if history_format == 'classic':
|
||||
# 没有 sys_prompt 接口,因此把prompt加入 history
|
||||
history_feedin = []
|
||||
history_feedin.append([system_prompt, "Certainly!"])
|
||||
for i in range(len(history)//2):
|
||||
history_feedin.append([history[2*i], history[2*i+1]] )
|
||||
elif history_format == 'chatglm3':
|
||||
# 有 sys_prompt 接口
|
||||
conversation_cnt = len(history) // 2
|
||||
history_feedin = [{"role": "system", "content": system_prompt}]
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2*conversation_cnt, 2):
|
||||
what_i_have_asked = {}
|
||||
what_i_have_asked["role"] = "user"
|
||||
what_i_have_asked["content"] = history[index]
|
||||
what_gpt_answer = {}
|
||||
what_gpt_answer["role"] = "assistant"
|
||||
what_gpt_answer["content"] = history[index+1]
|
||||
if what_i_have_asked["content"] != "":
|
||||
if what_gpt_answer["content"] == "": continue
|
||||
history_feedin.append(what_i_have_asked)
|
||||
history_feedin.append(what_gpt_answer)
|
||||
else:
|
||||
history_feedin[-1]['content'] = what_gpt_answer['content']
|
||||
|
||||
# 开始接收回复
|
||||
response = f"[Local Message] 等待{model_name}响应中 ..."
|
||||
|
@ -18,7 +18,8 @@ if __name__ == "__main__":
|
||||
# from request_llms.bridge_internlm import predict_no_ui_long_connection
|
||||
# from request_llms.bridge_qwen import predict_no_ui_long_connection
|
||||
# from request_llms.bridge_spark import predict_no_ui_long_connection
|
||||
from request_llms.bridge_zhipu import predict_no_ui_long_connection
|
||||
# from request_llms.bridge_zhipu import predict_no_ui_long_connection
|
||||
from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
|
||||
|
||||
llm_kwargs = {
|
||||
'max_length': 4096,
|
||||
|
Loading…
x
Reference in New Issue
Block a user