我是来自达摩院的大规模语言模型,我叫通义千问。

This commit is contained in:
binary-husky 2023-08-07 01:58:35 +08:00
parent 4d70b3786f
commit c17fc2a9b5
3 changed files with 22 additions and 9 deletions

View File

@ -71,7 +71,7 @@ MAX_RETRY = 2
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 ) # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"] AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # P.S. 其他可用的模型还包括 ["qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# ChatGLM(2) Finetune Model Path 如果使用ChatGLM2微调模型需要把"chatglmft"加入AVAIL_LLM_MODELS中 # ChatGLM(2) Finetune Model Path 如果使用ChatGLM2微调模型需要把"chatglmft"加入AVAIL_LLM_MODELS中

View File

@ -335,6 +335,22 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS:
}) })
except: except:
print(trimmed_format_exc()) print(trimmed_format_exc())
if "qwen" in AVAIL_LLM_MODELS:
try:
from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
from .bridge_qwen import predict as qwen_ui
model_info.update({
"qwen": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"endpoint": None,
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
})
except:
print(trimmed_format_exc())
def LLM_CATCH_EXCEPTION(f): def LLM_CATCH_EXCEPTION(f):
""" """

View File

@ -32,15 +32,13 @@ class GetONNXGLMHandle(LocalLLMHandle):
model_id = 'qwen/Qwen-7B-Chat' model_id = 'qwen/Qwen-7B-Chat'
revision = 'v1.0.1' revision = 'v1.0.1'
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True) self._tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
# use fp16 # use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision, model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision, trust_remote_code=True, fp16=True).eval()
trust_remote_code=True, fp16=True).eval() model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
model.generation_config = GenerationConfig.from_pretrained(model_id,
trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
self._model = model self._model = model
return self._model, None return self._model, self._tokenizer
def llm_stream_generator(self, **kwargs): def llm_stream_generator(self, **kwargs):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
@ -54,8 +52,7 @@ class GetONNXGLMHandle(LocalLLMHandle):
query, max_length, top_p, temperature, history = adaptor(kwargs) query, max_length, top_p, temperature, history = adaptor(kwargs)
prompt = chat_template(history, query) for response in self._model.chat(self._tokenizer, query, history=history, stream=True):
for response in model.chat(tokenizer, query, history=history, stream=True):
yield response yield response
def try_to_import_special_deps(self, **kwargs): def try_to_import_special_deps(self, **kwargs):