diff --git a/config.py b/config.py index 3d80962..17dac34 100644 --- a/config.py +++ b/config.py @@ -92,8 +92,9 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4", "chatglm3", "moss", "claude-2"] -# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" -# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"] +# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" +# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama" +# “qwen-turbo", "qwen-plus", "qwen-max"] # 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4" @@ -103,7 +104,11 @@ MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3" # 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用) # 如果你选择Qwen系列的模型,那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型 # 也可以是具体的模型路径 -QWEN_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8" +QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8" + + +# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/ +DASHSCOPE_API_KEY = "此处填阿里灵积云API秘钥" # 阿里灵积云API_KEY # 百度千帆(LLM_MODEL="qianfan") @@ -284,6 +289,9 @@ NUM_CUSTOM_BASIC_BTN = 4 │ ├── ZHIPUAI_API_KEY │ └── ZHIPUAI_MODEL │ +├── "qwen-turbo" 等通义千问大模型 +│ └── DASHSCOPE_API_KEY +│ └── "newbing" Newbing接口不再稳定,不推荐使用 ├── NEWBING_STYLE └── NEWBING_COOKIES @@ -300,7 +308,7 @@ NUM_CUSTOM_BASIC_BTN = 4 ├── "jittorllms_pangualpha" ├── "jittorllms_llama" ├── "deepseekcoder" -├── "qwen" +├── "qwen-local" ├── RWKV的支持见Wiki └── "llama2" diff --git a/docs/translate_english.json b/docs/translate_english.json index 3920e1f..c48ec6b 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -2932,7 +2932,7 @@ "3. 输入修改需求": "3. Enter modification requirements", "刷新界面 由于请求gpt需要一段时间": "Refreshing the interface takes some time due to the request for gpt", "随机小游戏": "Random mini game", - "那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型": "So please specify the specific model in QWEN_MODEL_SELECTION below", + "那么请在下面的QWEN_LOCAL_MODEL_SELECTION中指定具体的模型": "So please specify the specific model in QWEN_LOCAL_MODEL_SELECTION below", "表值": "Table value", "我画你猜": "I draw, you guess", "狗": "Dog", diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index dcfeba9..689b1f9 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -431,16 +431,48 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS: }) except: print(trimmed_format_exc()) -if "qwen" in AVAIL_LLM_MODELS: +if "qwen-local" in AVAIL_LLM_MODELS: + try: + from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui + from .bridge_qwen_local import predict as qwen_local_ui + model_info.update({ + "qwen-local": { + "fn_with_ui": qwen_local_ui, + "fn_without_ui": qwen_local_noui, + "endpoint": None, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + } + }) + except: + print(trimmed_format_exc()) +if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai try: from .bridge_qwen import predict_no_ui_long_connection as qwen_noui from .bridge_qwen import predict as qwen_ui model_info.update({ - "qwen": { + "qwen-turbo": { "fn_with_ui": qwen_ui, "fn_without_ui": qwen_noui, "endpoint": None, - "max_token": 4096, + "max_token": 6144, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen-plus": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "endpoint": None, + "max_token": 30720, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen-max": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "endpoint": None, + "max_token": 28672, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, } diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py index 940c41d..583def8 100644 --- a/request_llms/bridge_qwen.py +++ b/request_llms/bridge_qwen.py @@ -1,59 +1,66 @@ -model_name = "Qwen" -cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`" +import time +import os +from toolbox import update_ui, get_conf, update_ui_lastest_msg +from toolbox import check_packages, report_exception -from toolbox import ProxyNetworkActivate, get_conf -from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns +model_name = 'Qwen' + +def validate_key(): + DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY") + if DASHSCOPE_API_KEY == '': return False + return True + +if not validate_key(): + raise RuntimeError('请配置DASHSCOPE_API_KEY') +os.environ['DASHSCOPE_API_KEY'] = get_conf("DASHSCOPE_API_KEY") +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): + """ + ⭐多线程方法 + 函数的说明请见 request_llms/bridge_all.py + """ + watch_dog_patience = 5 + response = "" -# ------------------------------------------------------------------------------------------------------------------------ -# 🔌💻 Local Model -# ------------------------------------------------------------------------------------------------------------------------ -class GetQwenLMHandle(LocalLLMHandle): + from .com_qwenapi import QwenRequestInstance + sri = QwenRequestInstance() + for response in sri.generate(inputs, llm_kwargs, history, sys_prompt): + if len(observe_window) >= 1: + observe_window[0] = response + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。") + return response - def load_model_info(self): - # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 - self.model_name = model_name - self.cmd_to_install = cmd_to_install +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + ⭐单线程方法 + 函数的说明请见 request_llms/bridge_all.py + """ + chatbot.append((inputs, "")) + yield from update_ui(chatbot=chatbot, history=history) - def load_model_and_tokenizer(self): - # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 - # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig - from transformers import AutoModelForCausalLM, AutoTokenizer - from transformers.generation import GenerationConfig - with ProxyNetworkActivate('Download_LLM'): - model_id = get_conf('QWEN_MODEL_SELECTION') - self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True) - # use fp16 - model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval() - model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 - self._model = model + # 尝试导入依赖,如果缺少依赖,则给出安装建议 + try: + check_packages(["dashscope"]) + except: + yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade dashscope```。", + chatbot=chatbot, history=history, delay=0) + return - return self._model, self._tokenizer + if additional_fn is not None: + from core_functional import handle_core_functionality + inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) - def llm_stream_generator(self, **kwargs): - # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 - def adaptor(kwargs): - query = kwargs['query'] - max_length = kwargs['max_length'] - top_p = kwargs['top_p'] - temperature = kwargs['temperature'] - history = kwargs['history'] - return query, max_length, top_p, temperature, history + # 开始接收回复 + from .com_qwenapi import QwenRequestInstance + sri = QwenRequestInstance() + for response in sri.generate(inputs, llm_kwargs, history, system_prompt): + chatbot[-1] = (inputs, response) + yield from update_ui(chatbot=chatbot, history=history) - query, max_length, top_p, temperature, history = adaptor(kwargs) - - for response in self._model.chat_stream(self._tokenizer, query, history=history): - yield response - - def try_to_import_special_deps(self, **kwargs): - # import something that will raise error if the user does not install requirement_*.txt - # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行 - import importlib - importlib.import_module('modelscope') - - -# ------------------------------------------------------------------------------------------------------------------------ -# 🔌💻 GPT-Academic Interface -# ------------------------------------------------------------------------------------------------------------------------ -predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name) \ No newline at end of file + # 总结输出 + if response == f"[Local Message] 等待{model_name}响应中 ...": + response = f"[Local Message] {model_name}响应异常 ..." + history.extend([inputs, response]) + yield from update_ui(chatbot=chatbot, history=history) \ No newline at end of file diff --git a/request_llms/bridge_qwen_local.py b/request_llms/bridge_qwen_local.py new file mode 100644 index 0000000..4a0fa69 --- /dev/null +++ b/request_llms/bridge_qwen_local.py @@ -0,0 +1,59 @@ +model_name = "Qwen_local" +cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`" + +from toolbox import ProxyNetworkActivate, get_conf +from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns + + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 Local Model +# ------------------------------------------------------------------------------------------------------------------------ +class GetQwenLMHandle(LocalLLMHandle): + + def load_model_info(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + self.model_name = model_name + self.cmd_to_install = cmd_to_install + + def load_model_and_tokenizer(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig + from transformers import AutoModelForCausalLM, AutoTokenizer + from transformers.generation import GenerationConfig + with ProxyNetworkActivate('Download_LLM'): + model_id = get_conf('QWEN_LOCAL_MODEL_SELECTION') + self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True) + # use fp16 + model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval() + model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 + self._model = model + + return self._model, self._tokenizer + + def llm_stream_generator(self, **kwargs): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + def adaptor(kwargs): + query = kwargs['query'] + max_length = kwargs['max_length'] + top_p = kwargs['top_p'] + temperature = kwargs['temperature'] + history = kwargs['history'] + return query, max_length, top_p, temperature, history + + query, max_length, top_p, temperature, history = adaptor(kwargs) + + for response in self._model.chat_stream(self._tokenizer, query, history=history): + yield response + + def try_to_import_special_deps(self, **kwargs): + # import something that will raise error if the user does not install requirement_*.txt + # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行 + import importlib + importlib.import_module('modelscope') + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 GPT-Academic Interface +# ------------------------------------------------------------------------------------------------------------------------ +predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name) \ No newline at end of file diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py new file mode 100644 index 0000000..63ebdea --- /dev/null +++ b/request_llms/com_qwenapi.py @@ -0,0 +1,85 @@ +from http import HTTPStatus +from toolbox import get_conf +import threading +import logging + +timeout_bot_msg = '[Local Message] Request timeout. Network error.' + +class QwenRequestInstance(): + def __init__(self): + + self.time_to_yield_event = threading.Event() + self.time_to_exit_event = threading.Event() + + self.result_buf = "" + + def generate(self, inputs, llm_kwargs, history, system_prompt): + # import _thread as thread + from dashscope import Generation + QWEN_MODEL = { + 'qwen-turbo': Generation.Models.qwen_turbo, + 'qwen-plus': Generation.Models.qwen_plus, + 'qwen-max': Generation.Models.qwen_max, + }[llm_kwargs['llm_model']] + top_p = llm_kwargs.get('top_p', 0.8) + if top_p == 0: top_p += 1e-5 + if top_p == 1: top_p -= 1e-5 + + self.result_buf = "" + responses = Generation.call( + model=QWEN_MODEL, + messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt), + top_p=top_p, + temperature=llm_kwargs.get('temperature', 1.0), + result_format='message', + stream=True, + incremental_output=True + ) + + for response in responses: + if response.status_code == HTTPStatus.OK: + if response.output.choices[0].finish_reason == 'stop': + yield self.result_buf + break + elif response.output.choices[0].finish_reason == 'length': + self.result_buf += "[Local Message] 生成长度过长,后续输出被截断" + yield self.result_buf + break + else: + self.result_buf += response.output.choices[0].message.content + yield self.result_buf + else: + self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}" + yield self.result_buf + break + logging.info(f'[raw_input] {inputs}') + logging.info(f'[response] {self.result_buf}') + return self.result_buf + + +def generate_message_payload(inputs, llm_kwargs, history, system_prompt): + conversation_cnt = len(history) // 2 + if system_prompt == '': system_prompt = 'Hello!' + messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}] + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": + continue + if what_gpt_answer["content"] == timeout_bot_msg: + continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]['content'] = what_gpt_answer['content'] + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = inputs + messages.append(what_i_ask_now) + return messages diff --git a/request_llms/requirements_qwen.txt b/request_llms/requirements_qwen.txt index ea65dee..5899464 100644 --- a/request_llms/requirements_qwen.txt +++ b/request_llms/requirements_qwen.txt @@ -1,4 +1 @@ -modelscope -transformers_stream_generator -auto-gptq -optimum \ No newline at end of file +dashscope \ No newline at end of file diff --git a/request_llms/requirements_qwen_local.txt b/request_llms/requirements_qwen_local.txt new file mode 100644 index 0000000..ea65dee --- /dev/null +++ b/request_llms/requirements_qwen_local.txt @@ -0,0 +1,4 @@ +modelscope +transformers_stream_generator +auto-gptq +optimum \ No newline at end of file diff --git a/tests/test_llms.py b/tests/test_llms.py index bdb622b..347c6b9 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -18,7 +18,7 @@ if __name__ == "__main__": # from request_llms.bridge_internlm import predict_no_ui_long_connection # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection - from request_llms.bridge_qwen import predict_no_ui_long_connection + from request_llms.bridge_qwen_local import predict_no_ui_long_connection # from request_llms.bridge_spark import predict_no_ui_long_connection # from request_llms.bridge_zhipu import predict_no_ui_long_connection # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection