Add support to aliyun qwen online models.

Rename model tag "qwen" to "qwen-local"
Add model tag "qwen-turbo", "qwen-plus", "qwen-max"
Add corresponding model interfaces in request_llms/bridge_all.py
Add configuration variable “DASHSCOPE_API_KEY"
Rename request_llms/bridge_qwen.py to bridge_qwen_local.py to distinguish it from the online model interface
This commit is contained in:
leike0813 2023-12-20 07:37:26 +08:00
parent 9479dd984c
commit ac3d4cf073
9 changed files with 255 additions and 63 deletions

View File

@ -92,8 +92,9 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev
"api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
"gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4",
"chatglm3", "moss", "claude-2"]
# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random"
# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random"
# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"
# “qwen-turbo", "qwen-plus", "qwen-max"]
# 定义界面上“询问多个GPT模型”插件应该使用哪些模型请从AVAIL_LLM_MODELS中选择并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
@ -103,7 +104,11 @@ MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3"
# 选择本地模型变体只有当AVAIL_LLM_MODELS包含了对应本地模型时才会起作用
# 如果你选择Qwen系列的模型那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型
# 也可以是具体的模型路径
QWEN_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
DASHSCOPE_API_KEY = "此处填阿里灵积云API秘钥" # 阿里灵积云API_KEY
# 百度千帆LLM_MODEL="qianfan"
@ -284,6 +289,9 @@ NUM_CUSTOM_BASIC_BTN = 4
ZHIPUAI_API_KEY
ZHIPUAI_MODEL
"qwen-turbo" 等通义千问大模型
DASHSCOPE_API_KEY
"newbing" Newbing接口不再稳定不推荐使用
NEWBING_STYLE
NEWBING_COOKIES
@ -300,7 +308,7 @@ NUM_CUSTOM_BASIC_BTN = 4
"jittorllms_pangualpha"
"jittorllms_llama"
"deepseekcoder"
"qwen"
"qwen-local"
RWKV的支持见Wiki
"llama2"

View File

@ -2932,7 +2932,7 @@
"3. 输入修改需求": "3. Enter modification requirements",
"刷新界面 由于请求gpt需要一段时间": "Refreshing the interface takes some time due to the request for gpt",
"随机小游戏": "Random mini game",
"那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型": "So please specify the specific model in QWEN_MODEL_SELECTION below",
"那么请在下面的QWEN_LOCAL_MODEL_SELECTION中指定具体的模型": "So please specify the specific model in QWEN_LOCAL_MODEL_SELECTION below",
"表值": "Table value",
"我画你猜": "I draw, you guess",
"狗": "Dog",

View File

@ -431,16 +431,48 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS:
})
except:
print(trimmed_format_exc())
if "qwen" in AVAIL_LLM_MODELS:
if "qwen-local" in AVAIL_LLM_MODELS:
try:
from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
from .bridge_qwen_local import predict as qwen_local_ui
model_info.update({
"qwen-local": {
"fn_with_ui": qwen_local_ui,
"fn_without_ui": qwen_local_noui,
"endpoint": None,
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
})
except:
print(trimmed_format_exc())
if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai
try:
from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
from .bridge_qwen import predict as qwen_ui
model_info.update({
"qwen": {
"qwen-turbo": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"endpoint": None,
"max_token": 4096,
"max_token": 6144,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-plus": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"endpoint": None,
"max_token": 30720,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-max": {
"fn_with_ui": qwen_ui,
"fn_without_ui": qwen_noui,
"endpoint": None,
"max_token": 28672,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}

View File

@ -1,59 +1,66 @@
model_name = "Qwen"
cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`"
import time
import os
from toolbox import update_ui, get_conf, update_ui_lastest_msg
from toolbox import check_packages, report_exception
from toolbox import ProxyNetworkActivate, get_conf
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
model_name = 'Qwen'
def validate_key():
DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY")
if DASHSCOPE_API_KEY == '': return False
return True
if not validate_key():
raise RuntimeError('请配置DASHSCOPE_API_KEY')
os.environ['DASHSCOPE_API_KEY'] = get_conf("DASHSCOPE_API_KEY")
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
"""
多线程方法
函数的说明请见 request_llms/bridge_all.py
"""
watch_dog_patience = 5
response = ""
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
class GetQwenLMHandle(LocalLLMHandle):
from .com_qwenapi import QwenRequestInstance
sri = QwenRequestInstance()
for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
if len(observe_window) >= 1:
observe_window[0] = response
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
return response
def load_model_info(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
self.model_name = model_name
self.cmd_to_install = cmd_to_install
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
"""
单线程方法
函数的说明请见 request_llms/bridge_all.py
"""
chatbot.append((inputs, ""))
yield from update_ui(chatbot=chatbot, history=history)
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
# from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
with ProxyNetworkActivate('Download_LLM'):
model_id = get_conf('QWEN_MODEL_SELECTION')
self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
self._model = model
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
check_packages(["dashscope"])
except:
yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade dashscope```。",
chatbot=chatbot, history=history, delay=0)
return
return self._model, self._tokenizer
if additional_fn is not None:
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
def llm_stream_generator(self, **kwargs):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
def adaptor(kwargs):
query = kwargs['query']
max_length = kwargs['max_length']
top_p = kwargs['top_p']
temperature = kwargs['temperature']
history = kwargs['history']
return query, max_length, top_p, temperature, history
# 开始接收回复
from .com_qwenapi import QwenRequestInstance
sri = QwenRequestInstance()
for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
chatbot[-1] = (inputs, response)
yield from update_ui(chatbot=chatbot, history=history)
query, max_length, top_p, temperature, history = adaptor(kwargs)
for response in self._model.chat_stream(self._tokenizer, query, history=history):
yield response
def try_to_import_special_deps(self, **kwargs):
# import something that will raise error if the user does not install requirement_*.txt
# 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
import importlib
importlib.import_module('modelscope')
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 GPT-Academic Interface
# ------------------------------------------------------------------------------------------------------------------------
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
# 总结输出
if response == f"[Local Message] 等待{model_name}响应中 ...":
response = f"[Local Message] {model_name}响应异常 ..."
history.extend([inputs, response])
yield from update_ui(chatbot=chatbot, history=history)

View File

@ -0,0 +1,59 @@
model_name = "Qwen_local"
cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`"
from toolbox import ProxyNetworkActivate, get_conf
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 Local Model
# ------------------------------------------------------------------------------------------------------------------------
class GetQwenLMHandle(LocalLLMHandle):
def load_model_info(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
self.model_name = model_name
self.cmd_to_install = cmd_to_install
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
# from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
with ProxyNetworkActivate('Download_LLM'):
model_id = get_conf('QWEN_LOCAL_MODEL_SELECTION')
self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
self._model = model
return self._model, self._tokenizer
def llm_stream_generator(self, **kwargs):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
def adaptor(kwargs):
query = kwargs['query']
max_length = kwargs['max_length']
top_p = kwargs['top_p']
temperature = kwargs['temperature']
history = kwargs['history']
return query, max_length, top_p, temperature, history
query, max_length, top_p, temperature, history = adaptor(kwargs)
for response in self._model.chat_stream(self._tokenizer, query, history=history):
yield response
def try_to_import_special_deps(self, **kwargs):
# import something that will raise error if the user does not install requirement_*.txt
# 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
import importlib
importlib.import_module('modelscope')
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 GPT-Academic Interface
# ------------------------------------------------------------------------------------------------------------------------
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)

View File

@ -0,0 +1,85 @@
from http import HTTPStatus
from toolbox import get_conf
import threading
import logging
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
class QwenRequestInstance():
def __init__(self):
self.time_to_yield_event = threading.Event()
self.time_to_exit_event = threading.Event()
self.result_buf = ""
def generate(self, inputs, llm_kwargs, history, system_prompt):
# import _thread as thread
from dashscope import Generation
QWEN_MODEL = {
'qwen-turbo': Generation.Models.qwen_turbo,
'qwen-plus': Generation.Models.qwen_plus,
'qwen-max': Generation.Models.qwen_max,
}[llm_kwargs['llm_model']]
top_p = llm_kwargs.get('top_p', 0.8)
if top_p == 0: top_p += 1e-5
if top_p == 1: top_p -= 1e-5
self.result_buf = ""
responses = Generation.call(
model=QWEN_MODEL,
messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
top_p=top_p,
temperature=llm_kwargs.get('temperature', 1.0),
result_format='message',
stream=True,
incremental_output=True
)
for response in responses:
if response.status_code == HTTPStatus.OK:
if response.output.choices[0].finish_reason == 'stop':
yield self.result_buf
break
elif response.output.choices[0].finish_reason == 'length':
self.result_buf += "[Local Message] 生成长度过长,后续输出被截断"
yield self.result_buf
break
else:
self.result_buf += response.output.choices[0].message.content
yield self.result_buf
else:
self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}"
yield self.result_buf
break
logging.info(f'[raw_input] {inputs}')
logging.info(f'[response] {self.result_buf}')
return self.result_buf
def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
conversation_cnt = len(history) // 2
if system_prompt == '': system_prompt = 'Hello!'
messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "":
continue
if what_gpt_answer["content"] == timeout_bot_msg:
continue
messages.append(what_i_have_asked)
messages.append(what_gpt_answer)
else:
messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs
messages.append(what_i_ask_now)
return messages

View File

@ -1,4 +1 @@
modelscope
transformers_stream_generator
auto-gptq
optimum
dashscope

View File

@ -0,0 +1,4 @@
modelscope
transformers_stream_generator
auto-gptq
optimum

View File

@ -18,7 +18,7 @@ if __name__ == "__main__":
# from request_llms.bridge_internlm import predict_no_ui_long_connection
# from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
# from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
from request_llms.bridge_qwen import predict_no_ui_long_connection
from request_llms.bridge_qwen_local import predict_no_ui_long_connection
# from request_llms.bridge_spark import predict_no_ui_long_connection
# from request_llms.bridge_zhipu import predict_no_ui_long_connection
# from request_llms.bridge_chatglm3 import predict_no_ui_long_connection