From 9916f59753827e84c6dab7c11d77389062db3a29 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Fri, 24 Nov 2023 02:35:44 +0800
Subject: [PATCH] =?UTF-8?q?=E6=8E=A5=E5=85=A5deepseek-coder?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config.py                            | 28 +++++++--
 request_llms/bridge_all.py           | 16 +++++
 request_llms/bridge_deepseekcoder.py | 88 ++++++++++++++++++++++++++++
 request_llms/local_llm_class.py      |  2 +-
 tests/test_llms.py                   |  3 +-
 5 files changed, 129 insertions(+), 8 deletions(-)
 create mode 100644 request_llms/bridge_deepseekcoder.py

diff --git a/config.py b/config.py
index ea603c3..f5788be 100644
--- a/config.py
+++ b/config.py
@@ -91,8 +91,8 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview",
                     "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
                     "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
                     "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4",
-                    "chatglm3", "moss", "newbing", "claude-2"]
-# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
+                    "chatglm3", "moss", "claude-2"]
+# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
 # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
 
 
@@ -271,11 +271,27 @@ NUM_CUSTOM_BASIC_BTN = 4
 │   ├── BAIDU_CLOUD_API_KEY
 │   └── BAIDU_CLOUD_SECRET_KEY
 │
-├── "newbing" Newbing接口不再稳定，不推荐使用
+└── "newbing" Newbing接口不再稳定，不推荐使用
     ├── NEWBING_STYLE
     └── NEWBING_COOKIES
 
     
+本地大模型示意图
+│
+├── "chatglm3"
+├── "chatglm"
+├── "chatglm_onnx"
+├── "chatglmft"
+├── "internlm"
+├── "moss"
+├── "jittorllms_pangualpha"
+├── "jittorllms_llama"
+├── "deepseekcoder"
+├── "qwen"
+├──  RWKV的支持见Wiki
+└── "llama2"
+
+
 用户图形界面布局依赖关系示意图
 │
 ├── CHATBOT_HEIGHT 对话窗的高度
@@ -286,7 +302,7 @@ NUM_CUSTOM_BASIC_BTN = 4
 ├── THEME 色彩主题
 ├── AUTO_CLEAR_TXT 是否在提交时自动清空输入框
 ├── ADD_WAIFU 加一个live2d装饰
-├── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置，该功能具有一定的危险性
+└── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置，该功能具有一定的危险性
 
 
 插件在线服务配置依赖关系示意图
@@ -298,7 +314,7 @@ NUM_CUSTOM_BASIC_BTN = 4
 │   ├── ALIYUN_ACCESSKEY
 │   └── ALIYUN_SECRET
 │
-├── PDF文档精准解析
-│   └── GROBID_URLS
+└── PDF文档精准解析
+    └── GROBID_URLS
 
 """
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 88848a9..8dece54 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -543,6 +543,22 @@ if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai
         })
     except:
         print(trimmed_format_exc())
+if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
+    try:
+        from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
+        from .bridge_deepseekcoder import predict as deepseekcoder_ui
+        model_info.update({
+            "deepseekcoder": {
+                "fn_with_ui": deepseekcoder_ui,
+                "fn_without_ui": deepseekcoder_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
 
 # <-- 用于定义和切换多个azure模型 -->
 AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
diff --git a/request_llms/bridge_deepseekcoder.py b/request_llms/bridge_deepseekcoder.py
new file mode 100644
index 0000000..9791940
--- /dev/null
+++ b/request_llms/bridge_deepseekcoder.py
@@ -0,0 +1,88 @@
+model_name = "deepseek-coder-6.7b-instruct"
+cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
+
+import os
+from toolbox import ProxyNetworkActivate
+from toolbox import get_conf
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+from threading import Thread
+
+def download_huggingface_model(model_name, max_retry, local_dir):
+    from huggingface_hub import snapshot_download
+    for i in range(1, max_retry):
+        try:
+            snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
+            break
+        except Exception as e:
+            print(f'\n\n下载失败，重试第{i}次中...\n\n')
+    return local_dir
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetONNXGLMHandle(LocalLLMHandle):
+
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        with ProxyNetworkActivate('Download_LLM'):
+            from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+            model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
+            # local_dir = f"~/.cache/{model_name}"
+            # if not os.path.exists(local_dir):
+            #     tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir)
+            tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+            self._streamer = TextIteratorStreamer(tokenizer)
+            model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+            if get_conf('LOCAL_MODEL_DEVICE') != 'cpu':
+                model = model.cuda()
+        return model, tokenizer
+
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+        
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+        history.append({ 'role': 'user', 'content': query})
+        messages = history
+        inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt").to(self._model.device)
+        generation_kwargs = dict(
+                                    inputs=inputs, 
+                                    max_new_tokens=max_length,
+                                    do_sample=False,
+                                    top_p=top_p,
+                                    streamer = self._streamer,
+                                    top_k=50,
+                                    temperature=temperature,
+                                    num_return_sequences=1, 
+                                    eos_token_id=32021,
+                                )
+        thread = Thread(target=self._model.generate, kwargs=generation_kwargs, daemon=True)
+        thread.start()
+        generated_text = ""
+        for new_text in self._streamer:
+            generated_text += new_text
+            # print(generated_text)
+            yield generated_text
+
+
+    def try_to_import_special_deps(self, **kwargs): pass
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        # import importlib
+        # importlib.import_module('modelscope')
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name, history_format='chatglm3')
\ No newline at end of file
diff --git a/request_llms/local_llm_class.py b/request_llms/local_llm_class.py
index 091707a..413df03 100644
--- a/request_llms/local_llm_class.py
+++ b/request_llms/local_llm_class.py
@@ -198,7 +198,7 @@ class LocalLLMHandle(Process):
                 if res.startswith(self.std_tag):
                     new_output = res[len(self.std_tag):]
                     std_out = std_out[:std_out_clip_len]
-                    # print(new_output, end='')
+                    print(new_output, end='')
                     std_out = new_output + std_out
                     yield self.std_tag + '\n```\n' + std_out + '\n```\n'
                 elif res == '[Finish]':
diff --git a/tests/test_llms.py b/tests/test_llms.py
index 6285f03..8b68597 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -15,7 +15,8 @@ if __name__ == "__main__":
     # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
     # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
     # from request_llms.bridge_claude import predict_no_ui_long_connection
-    from request_llms.bridge_internlm import predict_no_ui_long_connection
+    # from request_llms.bridge_internlm import predict_no_ui_long_connection
+    from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
     # from request_llms.bridge_qwen import predict_no_ui_long_connection
     # from request_llms.bridge_spark import predict_no_ui_long_connection
     # from request_llms.bridge_zhipu import predict_no_ui_long_connection