From c17fc2a9b55b1c7447718a06a3eac4378828bb22 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 7 Aug 2023 01:58:35 +0800 Subject: [PATCH] =?UTF-8?q?=E6=88=91=E6=98=AF=E6=9D=A5=E8=87=AA=E8=BE=BE?= =?UTF-8?q?=E6=91=A9=E9=99=A2=E7=9A=84=E5=A4=A7=E8=A7=84=E6=A8=A1=E8=AF=AD?= =?UTF-8?q?=E8=A8=80=E6=A8=A1=E5=9E=8B=EF=BC=8C=E6=88=91=E5=8F=AB=E9=80=9A?= =?UTF-8?q?=E4=B9=89=E5=8D=83=E9=97=AE=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 +- request_llm/bridge_all.py | 16 ++++++++++++++++ request_llm/bridge_qwen.py | 13 +++++-------- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/config.py b/config.py index 1d43dd4..bfa4a3a 100644 --- a/config.py +++ b/config.py @@ -71,7 +71,7 @@ MAX_RETRY = 2 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 ) LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"] -# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] +# P.S. 其他可用的模型还包括 ["qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中) diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index f38711d..1f8a1dc 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -335,6 +335,22 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS: }) except: print(trimmed_format_exc()) +if "qwen" in AVAIL_LLM_MODELS: + try: + from .bridge_qwen import predict_no_ui_long_connection as qwen_noui + from .bridge_qwen import predict as qwen_ui + model_info.update({ + "qwen": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "endpoint": None, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + } + }) + except: + print(trimmed_format_exc()) def LLM_CATCH_EXCEPTION(f): """ diff --git a/request_llm/bridge_qwen.py b/request_llm/bridge_qwen.py index 3ca36ab..cd437e4 100644 --- a/request_llm/bridge_qwen.py +++ b/request_llm/bridge_qwen.py @@ -32,15 +32,13 @@ class GetONNXGLMHandle(LocalLLMHandle): model_id = 'qwen/Qwen-7B-Chat' revision = 'v1.0.1' - tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True) + self._tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True) # use fp16 - model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision, - trust_remote_code=True, fp16=True).eval() - model.generation_config = GenerationConfig.from_pretrained(model_id, - trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 + model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision, trust_remote_code=True, fp16=True).eval() + model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 self._model = model - return self._model, None + return self._model, self._tokenizer def llm_stream_generator(self, **kwargs): # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 @@ -54,8 +52,7 @@ class GetONNXGLMHandle(LocalLLMHandle): query, max_length, top_p, temperature, history = adaptor(kwargs) - prompt = chat_template(history, query) - for response in model.chat(tokenizer, query, history=history, stream=True): + for response in self._model.chat(self._tokenizer, query, history=history, stream=True): yield response def try_to_import_special_deps(self, **kwargs):