diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index b484d1f..27b91c2 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -19,8 +19,8 @@ from .bridge_chatgpt import predict as chatgpt_ui from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui from .bridge_chatglm import predict as chatglm_ui -from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui -from .bridge_chatglm import predict as chatglm_ui +from .bridge_chatglm3 import predict_no_ui_long_connection as chatglm3_noui +from .bridge_chatglm3 import predict as chatglm3_ui from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui from .bridge_qianfan import predict as qianfan_ui @@ -208,6 +208,14 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, + "chatglm3": { + "fn_with_ui": chatglm3_ui, + "fn_without_ui": chatglm3_noui, + "endpoint": None, + "max_token": 8192, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, "qianfan": { "fn_with_ui": qianfan_ui, "fn_without_ui": qianfan_noui, diff --git a/request_llms/bridge_chatglm3.py b/request_llms/bridge_chatglm3.py new file mode 100644 index 0000000..5f1ec54 --- /dev/null +++ b/request_llms/bridge_chatglm3.py @@ -0,0 +1,78 @@ +model_name = "ChatGLM3" +cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`" + + +from transformers import AutoModel, AutoTokenizer +from toolbox import get_conf, ProxyNetworkActivate +from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM + + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 Local Model +# ------------------------------------------------------------------------------------------------------------------------ +@SingletonLocalLLM +class GetONNXGLMHandle(LocalLLMHandle): + + def load_model_info(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + self.model_name = model_name + self.cmd_to_install = cmd_to_install + + def load_model_and_tokenizer(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + import os, glob + import os + import platform + LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE') + + if LOCAL_MODEL_QUANT == "INT4": # INT4 + _model_name_ = "THUDM/chatglm3-6b-int4" + elif LOCAL_MODEL_QUANT == "INT8": # INT8 + _model_name_ = "THUDM/chatglm3-6b-int8" + else: + _model_name_ = "THUDM/chatglm3-6b" # FP16 + with ProxyNetworkActivate('Download_LLM'): + chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True) + if device=='cpu': + chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cpu').float() + else: + chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cuda') + chatglm_model = chatglm_model.eval() + + self._model = chatglm_model + self._tokenizer = chatglm_tokenizer + return self._model, self._tokenizer + + def llm_stream_generator(self, **kwargs): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + def adaptor(kwargs): + query = kwargs['query'] + max_length = kwargs['max_length'] + top_p = kwargs['top_p'] + temperature = kwargs['temperature'] + history = kwargs['history'] + return query, max_length, top_p, temperature, history + + query, max_length, top_p, temperature, history = adaptor(kwargs) + + for response, history in self._model.stream_chat(self._tokenizer, + query, + history, + max_length=max_length, + top_p=top_p, + temperature=temperature, + ): + yield response + + def try_to_import_special_deps(self, **kwargs): + # import something that will raise error if the user does not install requirement_*.txt + # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行 + import importlib + importlib.import_module('modelscope') + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 GPT-Academic Interface +# ------------------------------------------------------------------------------------------------------------------------ +predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name, history_format='chatglm3') \ No newline at end of file diff --git a/request_llms/local_llm_class.py b/request_llms/local_llm_class.py index a421ddf..626db91 100644 --- a/request_llms/local_llm_class.py +++ b/request_llms/local_llm_class.py @@ -114,7 +114,7 @@ class LocalLLMHandle(Process): -def get_local_llm_predict_fns(LLMSingletonClass, model_name): +def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'): load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……" def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): @@ -126,11 +126,30 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name): if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info if not _llm_handle.running: raise RuntimeError(_llm_handle.info) - # chatglm 没有 sys_prompt 接口,因此把prompt加入 history - history_feedin = [] - history_feedin.append([sys_prompt, "Certainly!"]) - for i in range(len(history)//2): - history_feedin.append([history[2*i], history[2*i+1]] ) + if history_format == 'classic': + # 没有 sys_prompt 接口,因此把prompt加入 history + history_feedin = [] + history_feedin.append([sys_prompt, "Certainly!"]) + for i in range(len(history)//2): + history_feedin.append([history[2*i], history[2*i+1]] ) + elif history_format == 'chatglm3': + # 有 sys_prompt 接口 + conversation_cnt = len(history) // 2 + history_feedin = [{"role": "system", "content": sys_prompt}] + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": continue + history_feedin.append(what_i_have_asked) + history_feedin.append(what_gpt_answer) + else: + history_feedin[-1]['content'] = what_gpt_answer['content'] watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可 response = "" @@ -160,10 +179,30 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name): inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) # 处理历史信息 - history_feedin = [] - history_feedin.append([system_prompt, "Certainly!"]) - for i in range(len(history)//2): - history_feedin.append([history[2*i], history[2*i+1]] ) + if history_format == 'classic': + # 没有 sys_prompt 接口,因此把prompt加入 history + history_feedin = [] + history_feedin.append([system_prompt, "Certainly!"]) + for i in range(len(history)//2): + history_feedin.append([history[2*i], history[2*i+1]] ) + elif history_format == 'chatglm3': + # 有 sys_prompt 接口 + conversation_cnt = len(history) // 2 + history_feedin = [{"role": "system", "content": system_prompt}] + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": continue + history_feedin.append(what_i_have_asked) + history_feedin.append(what_gpt_answer) + else: + history_feedin[-1]['content'] = what_gpt_answer['content'] # 开始接收回复 response = f"[Local Message] 等待{model_name}响应中 ..." diff --git a/tests/test_llms.py b/tests/test_llms.py index f43f368..5c5d2f6 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -18,7 +18,8 @@ if __name__ == "__main__": # from request_llms.bridge_internlm import predict_no_ui_long_connection # from request_llms.bridge_qwen import predict_no_ui_long_connection # from request_llms.bridge_spark import predict_no_ui_long_connection - from request_llms.bridge_zhipu import predict_no_ui_long_connection + # from request_llms.bridge_zhipu import predict_no_ui_long_connection + from request_llms.bridge_chatglm3 import predict_no_ui_long_connection llm_kwargs = { 'max_length': 4096,