支持chatglm3

2023-10-31 03:08:50 +08:00 · 2023-10-31 03:08:50 +08:00 · 08f036aafd
commit 08f036aafd
parent 9a1aff5bb6
4 changed files with 139 additions and 13 deletions
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@ -19,8 +19,8 @@ from .bridge_chatgpt import predict as chatgpt_ui
 from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
 from .bridge_chatglm import predict as chatglm_ui

-from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
-from .bridge_chatglm import predict as chatglm_ui
+from .bridge_chatglm3 import predict_no_ui_long_connection as chatglm3_noui
+from .bridge_chatglm3 import predict as chatglm3_ui

 from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui
 from .bridge_qianfan import predict as qianfan_ui
@ -208,6 +208,14 @@ model_info = {
        "tokenizer": tokenizer_gpt35,
        "token_cnt": get_token_num_gpt35,
    },
+    "chatglm3": {
+        "fn_with_ui": chatglm3_ui,
+        "fn_without_ui": chatglm3_noui,
+        "endpoint": None,
+        "max_token": 8192,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
    "qianfan": {
        "fn_with_ui": qianfan_ui,
        "fn_without_ui": qianfan_noui,
--- a/request_llms/bridge_chatglm3.py
+++ b/request_llms/bridge_chatglm3.py
@ -0,0 +1,78 @@
+model_name = "ChatGLM3"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
+
+
+from transformers import AutoModel, AutoTokenizer
+from toolbox import get_conf, ProxyNetworkActivate
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns, SingletonLocalLLM
+
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+@SingletonLocalLLM
+class GetONNXGLMHandle(LocalLLMHandle):
+
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        import os, glob
+        import os
+        import platform
+        LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
+
+        if LOCAL_MODEL_QUANT == "INT4":         # INT4
+            _model_name_ = "THUDM/chatglm3-6b-int4"
+        elif LOCAL_MODEL_QUANT == "INT8":       # INT8
+            _model_name_ = "THUDM/chatglm3-6b-int8"
+        else:
+            _model_name_ = "THUDM/chatglm3-6b"  # FP16
+        with ProxyNetworkActivate('Download_LLM'):
+            chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
+            if device=='cpu':
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cpu').float()
+            else:
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cuda')
+            chatglm_model = chatglm_model.eval()
+
+        self._model = chatglm_model
+        self._tokenizer = chatglm_tokenizer
+        return self._model, self._tokenizer
+
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+
+        for response, history in self._model.stream_chat(self._tokenizer, 
+                                                         query, 
+                                                         history, 
+                                                         max_length=max_length,
+                                                         top_p=top_p,
+                                                         temperature=temperature,
+                                                         ):
+            yield response
+        
+    def try_to_import_special_deps(self, **kwargs):
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        import importlib
+        importlib.import_module('modelscope')
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name, history_format='chatglm3')
--- a/request_llms/local_llm_class.py
+++ b/request_llms/local_llm_class.py
@ -114,7 +114,7 @@ class LocalLLMHandle(Process):
    


-def get_local_llm_predict_fns(LLMSingletonClass, model_name):
+def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'):
    load_message = f"{model_name}尚未加载，加载需要一段时间。注意，取决于`config.py`的配置，{model_name}消耗大量的内存（CPU）或显存（GPU），也许会导致低配计算机卡死 ……"

    def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
@ -126,11 +126,30 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
        if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info
        if not _llm_handle.running: raise RuntimeError(_llm_handle.info)

-        # chatglm 没有 sys_prompt 接口，因此把prompt加入 history
-        history_feedin = []
-        history_feedin.append([sys_prompt, "Certainly!"])
-        for i in range(len(history)//2):
-            history_feedin.append([history[2*i], history[2*i+1]] )
+        if history_format == 'classic':
+            # 没有 sys_prompt 接口，因此把prompt加入 history
+            history_feedin = []
+            history_feedin.append([sys_prompt, "Certainly!"])
+            for i in range(len(history)//2):
+                history_feedin.append([history[2*i], history[2*i+1]] )
+        elif history_format == 'chatglm3':
+            # 有 sys_prompt 接口
+            conversation_cnt = len(history) // 2
+            history_feedin = [{"role": "system", "content": sys_prompt}]
+            if conversation_cnt:
+                for index in range(0, 2*conversation_cnt, 2):
+                    what_i_have_asked = {}
+                    what_i_have_asked["role"] = "user"
+                    what_i_have_asked["content"] = history[index]
+                    what_gpt_answer = {}
+                    what_gpt_answer["role"] = "assistant"
+                    what_gpt_answer["content"] = history[index+1]
+                    if what_i_have_asked["content"] != "":
+                        if what_gpt_answer["content"] == "": continue
+                        history_feedin.append(what_i_have_asked)
+                        history_feedin.append(what_gpt_answer)
+                    else:
+                        history_feedin[-1]['content'] = what_gpt_answer['content']

        watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
        response = ""
@ -160,10 +179,30 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
            inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)

        # 处理历史信息
-        history_feedin = []
-        history_feedin.append([system_prompt, "Certainly!"])
-        for i in range(len(history)//2):
-            history_feedin.append([history[2*i], history[2*i+1]] )
+        if history_format == 'classic':
+            # 没有 sys_prompt 接口，因此把prompt加入 history
+            history_feedin = []
+            history_feedin.append([system_prompt, "Certainly!"])
+            for i in range(len(history)//2):
+                history_feedin.append([history[2*i], history[2*i+1]] )
+        elif history_format == 'chatglm3':
+            # 有 sys_prompt 接口
+            conversation_cnt = len(history) // 2
+            history_feedin = [{"role": "system", "content": system_prompt}]
+            if conversation_cnt:
+                for index in range(0, 2*conversation_cnt, 2):
+                    what_i_have_asked = {}
+                    what_i_have_asked["role"] = "user"
+                    what_i_have_asked["content"] = history[index]
+                    what_gpt_answer = {}
+                    what_gpt_answer["role"] = "assistant"
+                    what_gpt_answer["content"] = history[index+1]
+                    if what_i_have_asked["content"] != "":
+                        if what_gpt_answer["content"] == "": continue
+                        history_feedin.append(what_i_have_asked)
+                        history_feedin.append(what_gpt_answer)
+                    else:
+                        history_feedin[-1]['content'] = what_gpt_answer['content']

        # 开始接收回复
        response = f"[Local Message] 等待{model_name}响应中 ..."
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@ -18,7 +18,8 @@ if __name__ == "__main__":
    # from request_llms.bridge_internlm import predict_no_ui_long_connection
    # from request_llms.bridge_qwen import predict_no_ui_long_connection
    # from request_llms.bridge_spark import predict_no_ui_long_connection
-    from request_llms.bridge_zhipu import predict_no_ui_long_connection
+    # from request_llms.bridge_zhipu import predict_no_ui_long_connection
+    from request_llms.bridge_chatglm3 import predict_no_ui_long_connection

    llm_kwargs = {
        'max_length': 4096,