接入deepseek-coder

This commit is contained in:
qingxu fu 2023-11-24 02:35:44 +08:00
parent e533ed6d12
commit 9916f59753
5 changed files with 129 additions and 8 deletions

View File

@ -91,8 +91,8 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview",
"gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
"api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
"gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4", "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4",
"chatglm3", "moss", "newbing", "claude-2"] "chatglm3", "moss", "claude-2"]
# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" # P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random"
# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"] # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
@ -271,11 +271,27 @@ NUM_CUSTOM_BASIC_BTN = 4
BAIDU_CLOUD_API_KEY BAIDU_CLOUD_API_KEY
BAIDU_CLOUD_SECRET_KEY BAIDU_CLOUD_SECRET_KEY
"newbing" Newbing接口不再稳定不推荐使用 "newbing" Newbing接口不再稳定不推荐使用
NEWBING_STYLE NEWBING_STYLE
NEWBING_COOKIES NEWBING_COOKIES
本地大模型示意图
"chatglm3"
"chatglm"
"chatglm_onnx"
"chatglmft"
"internlm"
"moss"
"jittorllms_pangualpha"
"jittorllms_llama"
"deepseekcoder"
"qwen"
RWKV的支持见Wiki
"llama2"
用户图形界面布局依赖关系示意图 用户图形界面布局依赖关系示意图
CHATBOT_HEIGHT 对话窗的高度 CHATBOT_HEIGHT 对话窗的高度
@ -286,7 +302,7 @@ NUM_CUSTOM_BASIC_BTN = 4
THEME 色彩主题 THEME 色彩主题
AUTO_CLEAR_TXT 是否在提交时自动清空输入框 AUTO_CLEAR_TXT 是否在提交时自动清空输入框
ADD_WAIFU 加一个live2d装饰 ADD_WAIFU 加一个live2d装饰
ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置该功能具有一定的危险性 ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置该功能具有一定的危险性
插件在线服务配置依赖关系示意图 插件在线服务配置依赖关系示意图
@ -298,7 +314,7 @@ NUM_CUSTOM_BASIC_BTN = 4
ALIYUN_ACCESSKEY ALIYUN_ACCESSKEY
ALIYUN_SECRET ALIYUN_SECRET
PDF文档精准解析 PDF文档精准解析
GROBID_URLS GROBID_URLS
""" """

View File

@ -543,6 +543,22 @@ if "zhipuai" in AVAIL_LLM_MODELS: # zhipuai
}) })
except: except:
print(trimmed_format_exc()) print(trimmed_format_exc())
if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
try:
from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
from .bridge_deepseekcoder import predict as deepseekcoder_ui
model_info.update({
"deepseekcoder": {
"fn_with_ui": deepseekcoder_ui,
"fn_without_ui": deepseekcoder_noui,
"endpoint": None,
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
})
except:
print(trimmed_format_exc())
# <-- 用于定义和切换多个azure模型 --> # <-- 用于定义和切换多个azure模型 -->
AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY") AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")

View File

@ -0,0 +1,88 @@
model_name = "deepseek-coder-6.7b-instruct"
cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
import os
from toolbox import ProxyNetworkActivate
from toolbox import get_conf
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
from threading import Thread
def download_huggingface_model(model_name, max_retry, local_dir):
from huggingface_hub import snapshot_download
for i in range(1, max_retry):
try:
snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
break
except Exception as e:
print(f'\n\n下载失败,重试第{i}次中...\n\n')
return local_dir
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
class GetONNXGLMHandle(LocalLLMHandle):
def load_model_info(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
self.model_name = model_name
self.cmd_to_install = cmd_to_install
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
with ProxyNetworkActivate('Download_LLM'):
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
# local_dir = f"~/.cache/{model_name}"
# if not os.path.exists(local_dir):
# tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
self._streamer = TextIteratorStreamer(tokenizer)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
if get_conf('LOCAL_MODEL_DEVICE') != 'cpu':
model = model.cuda()
return model, tokenizer
def llm_stream_generator(self, **kwargs):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
def adaptor(kwargs):
query = kwargs['query']
max_length = kwargs['max_length']
top_p = kwargs['top_p']
temperature = kwargs['temperature']
history = kwargs['history']
return query, max_length, top_p, temperature, history
query, max_length, top_p, temperature, history = adaptor(kwargs)
history.append({ 'role': 'user', 'content': query})
messages = history
inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt").to(self._model.device)
generation_kwargs = dict(
inputs=inputs,
max_new_tokens=max_length,
do_sample=False,
top_p=top_p,
streamer = self._streamer,
top_k=50,
temperature=temperature,
num_return_sequences=1,
eos_token_id=32021,
)
thread = Thread(target=self._model.generate, kwargs=generation_kwargs, daemon=True)
thread.start()
generated_text = ""
for new_text in self._streamer:
generated_text += new_text
# print(generated_text)
yield generated_text
def try_to_import_special_deps(self, **kwargs): pass
# import something that will raise error if the user does not install requirement_*.txt
# 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
# import importlib
# importlib.import_module('modelscope')
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 GPT-Academic Interface
# ------------------------------------------------------------------------------------------------------------------------
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetONNXGLMHandle, model_name, history_format='chatglm3')

View File

@ -198,7 +198,7 @@ class LocalLLMHandle(Process):
if res.startswith(self.std_tag): if res.startswith(self.std_tag):
new_output = res[len(self.std_tag):] new_output = res[len(self.std_tag):]
std_out = std_out[:std_out_clip_len] std_out = std_out[:std_out_clip_len]
# print(new_output, end='') print(new_output, end='')
std_out = new_output + std_out std_out = new_output + std_out
yield self.std_tag + '\n```\n' + std_out + '\n```\n' yield self.std_tag + '\n```\n' + std_out + '\n```\n'
elif res == '[Finish]': elif res == '[Finish]':

View File

@ -15,7 +15,8 @@ if __name__ == "__main__":
# from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
# from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
# from request_llms.bridge_claude import predict_no_ui_long_connection # from request_llms.bridge_claude import predict_no_ui_long_connection
from request_llms.bridge_internlm import predict_no_ui_long_connection # from request_llms.bridge_internlm import predict_no_ui_long_connection
from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
# from request_llms.bridge_qwen import predict_no_ui_long_connection # from request_llms.bridge_qwen import predict_no_ui_long_connection
# from request_llms.bridge_spark import predict_no_ui_long_connection # from request_llms.bridge_spark import predict_no_ui_long_connection
# from request_llms.bridge_zhipu import predict_no_ui_long_connection # from request_llms.bridge_zhipu import predict_no_ui_long_connection