接入deepseek-coder

2023-11-24 02:35:44 +08:00 · 2023-11-24 02:35:44 +08:00 · 9916f59753
commit 9916f59753
parent e533ed6d12
5 changed files with 129 additions and 8 deletions
--- a/config.py
+++ b/config.py
@ -91,8 +91,8 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview",
                    "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
                    "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
                    "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4",
-                    "chatglm3", "moss", "newbing", "claude-2"]
+                    "chatglm3", "moss", "claude-2"]
-# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
+# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
 # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
@ -271,11 +271,27 @@ NUM_CUSTOM_BASIC_BTN = 4
 │   ├── BAIDU_CLOUD_API_KEY
 │   └── BAIDU_CLOUD_SECRET_KEY
 │
-├── "newbing" Newbing接口不再稳定，不推荐使用
+└── "newbing" Newbing接口不再稳定，不推荐使用
    ├── NEWBING_STYLE
    └── NEWBING_COOKIES
 本地大模型示意图
 │
 ├── "chatglm3"
 ├── "chatglm"
 ├── "chatglm_onnx"
 ├── "chatglmft"
 ├── "internlm"
 ├── "moss"
 ├── "jittorllms_pangualpha"
 ├── "jittorllms_llama"
 ├── "deepseekcoder"
 ├── "qwen"
 ├──  RWKV的支持见Wiki
 └── "llama2"
 用户图形界面布局依赖关系示意图
 │
 ├── CHATBOT_HEIGHT 对话窗的高度
@ -286,7 +302,7 @@ NUM_CUSTOM_BASIC_BTN = 4
 ├── THEME 色彩主题
 ├── AUTO_CLEAR_TXT 是否在提交时自动清空输入框
 ├── ADD_WAIFU 加一个live2d装饰
-├── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置，该功能具有一定的危险性
+└── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置，该功能具有一定的危险性
 插件在线服务配置依赖关系示意图
@ -298,7 +314,7 @@ NUM_CUSTOM_BASIC_BTN = 4
 │   ├── ALIYUN_ACCESSKEY
 │   └── ALIYUN_SECRET
 │
-├── PDF文档精准解析
+└── PDF文档精准解析
-│   └── GROBID_URLS
+    └── GROBID_URLS
 """
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@ -543,6 +543,22 @@ if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai
        })
    except:
        print(trimmed_format_exc())
 if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
    try:
        from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
        from .bridge_deepseekcoder import predict as deepseekcoder_ui
        model_info.update({
            "deepseekcoder": {
                "fn_with_ui": deepseekcoder_ui,
                "fn_without_ui": deepseekcoder_noui,
                "endpoint": None,
                "max_token": 4096,
                "tokenizer": tokenizer_gpt35,
                "token_cnt": get_token_num_gpt35,
            }
        })
    except:
        print(trimmed_format_exc())
 # <-- 用于定义和切换多个azure模型 -->
 AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
--- a/request_llms/bridge_deepseekcoder.py
+++ b/request_llms/bridge_deepseekcoder.py
@ -0,0 +1,88 @@
 model_name = "deepseek-coder-6.7b-instruct"
 cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
 import os
 from toolbox import ProxyNetworkActivate
 from toolbox import get_conf
 from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
 from threading import Thread
 def download_huggingface_model(model_name, max_retry, local_dir):
    from huggingface_hub import snapshot_download
    for i in range(1, max_retry):
        try:
            snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
            break
        except Exception as e:
            print(f'\n\n下载失败，重试第{i}次中...\n\n')
    return local_dir
 # ------------------------------------------------------------------------------------------------------------------------
 # 🔌💻 Local Model
 # ------------------------------------------------------------------------------------------------------------------------
 class GetONNXGLMHandle(LocalLLMHandle):
    def load_model_info(self):
        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
        self.model_name = model_name
        self.cmd_to_install = cmd_to_install
    def load_model_and_tokenizer(self):
        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
        with ProxyNetworkActivate('Download_LLM'):
            from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
            model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
            # local_dir = f"~/.cache/{model_name}"
            # if not os.path.exists(local_dir):
            #     tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir)
            tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
            self._streamer = TextIteratorStreamer(tokenizer)
            model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
            if get_conf('LOCAL_MODEL_DEVICE') != 'cpu':
                model = model.cuda()
        return model, tokenizer
    def llm_stream_generator(self, **kwargs):
        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
        def adaptor(kwargs):
            query = kwargs['query']
            max_length = kwargs['max_length']
            top_p = kwargs['top_p']
            temperature = kwargs['temperature']
            history = kwargs['history']
            return query, max_length, top_p, temperature, history
        query, max_length, top_p, temperature, history = adaptor(kwargs)
        history.append({ 'role': 'user', 'content': query})
        messages = history
        inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt").to(self._model.device)
        generation_kwargs = dict(
                                    inputs=inputs, 
                                    max_new_tokens=max_length,
                                    do_sample=False,
                                    top_p=top_p,
                                    streamer = self._streamer,
                                    top_k=50,
                                    temperature=temperature,
                                    num_return_sequences=1, 
                                    eos_token_id=32021,
                                )
        thread = Thread(target=self._model.generate, kwargs=generation_kwargs, daemon=True)
        thread.start()
        generated_text = ""
        for new_text in self._streamer:
            generated_text += new_text
            # print(generated_text)
            yield generated_text
    def try_to_import_special_deps(self, **kwargs): pass
        # import something that will raise error if the user does not install requirement_*.txt
        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
        # import importlib
        # importlib.import_module('modelscope')
 # ------------------------------------------------------------------------------------------------------------------------
 # 🔌💻 GPT-Academic Interface
 # ------------------------------------------------------------------------------------------------------------------------
 predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name, history_format='chatglm3')
--- a/request_llms/local_llm_class.py
+++ b/request_llms/local_llm_class.py
@ -198,7 +198,7 @@ class LocalLLMHandle(Process):
                if res.startswith(self.std_tag):
                    new_output = res[len(self.std_tag):]
                    std_out = std_out[:std_out_clip_len]
-                    # print(new_output, end='')
+                    print(new_output, end='')
                    std_out = new_output + std_out
                    yield self.std_tag + '\n```\n' + std_out + '\n```\n'
                elif res == '[Finish]':
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@ -15,7 +15,8 @@ if __name__ == "__main__":
    # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
    # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
    # from request_llms.bridge_claude import predict_no_ui_long_connection
-    from request_llms.bridge_internlm import predict_no_ui_long_connection
+    # from request_llms.bridge_internlm import predict_no_ui_long_connection
    from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
    # from request_llms.bridge_qwen import predict_no_ui_long_connection
    # from request_llms.bridge_spark import predict_no_ui_long_connection
    # from request_llms.bridge_zhipu import predict_no_ui_long_connection