Merge branch 'frontier' of https://github.com/leike0813/gpt_academic into leike0813-frontier
This commit is contained in:
		
						commit
						4f0dcc431c
					
				
							
								
								
									
										16
									
								
								config.py
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								config.py
									
									
									
									
									
								
							@ -92,8 +92,9 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev
 | 
				
			|||||||
                    "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
 | 
					                    "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
 | 
				
			||||||
                    "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4",
 | 
					                    "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4",
 | 
				
			||||||
                    "chatglm3", "moss", "claude-2"]
 | 
					                    "chatglm3", "moss", "claude-2"]
 | 
				
			||||||
# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
 | 
					# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
 | 
				
			||||||
# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
 | 
					# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"
 | 
				
			||||||
 | 
					# “qwen-turbo", "qwen-plus", "qwen-max"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
 | 
					# 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
 | 
				
			||||||
@ -103,7 +104,11 @@ MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3"
 | 
				
			|||||||
# 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用)
 | 
					# 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用)
 | 
				
			||||||
# 如果你选择Qwen系列的模型,那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型
 | 
					# 如果你选择Qwen系列的模型,那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型
 | 
				
			||||||
# 也可以是具体的模型路径
 | 
					# 也可以是具体的模型路径
 | 
				
			||||||
QWEN_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
 | 
					QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
 | 
				
			||||||
 | 
					DASHSCOPE_API_KEY = "此处填阿里灵积云API秘钥" # 阿里灵积云API_KEY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# 百度千帆(LLM_MODEL="qianfan")
 | 
					# 百度千帆(LLM_MODEL="qianfan")
 | 
				
			||||||
@ -284,6 +289,9 @@ NUM_CUSTOM_BASIC_BTN = 4
 | 
				
			|||||||
│   ├── ZHIPUAI_API_KEY
 | 
					│   ├── ZHIPUAI_API_KEY
 | 
				
			||||||
│   └── ZHIPUAI_MODEL
 | 
					│   └── ZHIPUAI_MODEL
 | 
				
			||||||
│
 | 
					│
 | 
				
			||||||
 | 
					├── "qwen-turbo" 等通义千问大模型
 | 
				
			||||||
 | 
					│   └──  DASHSCOPE_API_KEY
 | 
				
			||||||
 | 
					│
 | 
				
			||||||
└── "newbing" Newbing接口不再稳定,不推荐使用
 | 
					└── "newbing" Newbing接口不再稳定,不推荐使用
 | 
				
			||||||
    ├── NEWBING_STYLE
 | 
					    ├── NEWBING_STYLE
 | 
				
			||||||
    └── NEWBING_COOKIES
 | 
					    └── NEWBING_COOKIES
 | 
				
			||||||
@ -300,7 +308,7 @@ NUM_CUSTOM_BASIC_BTN = 4
 | 
				
			|||||||
├── "jittorllms_pangualpha"
 | 
					├── "jittorllms_pangualpha"
 | 
				
			||||||
├── "jittorllms_llama"
 | 
					├── "jittorllms_llama"
 | 
				
			||||||
├── "deepseekcoder"
 | 
					├── "deepseekcoder"
 | 
				
			||||||
├── "qwen"
 | 
					├── "qwen-local"
 | 
				
			||||||
├──  RWKV的支持见Wiki
 | 
					├──  RWKV的支持见Wiki
 | 
				
			||||||
└── "llama2"
 | 
					└── "llama2"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -431,16 +431,48 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS:
 | 
				
			|||||||
        })
 | 
					        })
 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
        print(trimmed_format_exc())
 | 
					        print(trimmed_format_exc())
 | 
				
			||||||
if "qwen" in AVAIL_LLM_MODELS:
 | 
					if "qwen-local" in AVAIL_LLM_MODELS:
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
 | 
				
			||||||
 | 
					        from .bridge_qwen_local import predict as qwen_local_ui
 | 
				
			||||||
 | 
					        model_info.update({
 | 
				
			||||||
 | 
					            "qwen-local": {
 | 
				
			||||||
 | 
					                "fn_with_ui": qwen_local_ui,
 | 
				
			||||||
 | 
					                "fn_without_ui": qwen_local_noui,
 | 
				
			||||||
 | 
					                "endpoint": None,
 | 
				
			||||||
 | 
					                "max_token": 4096,
 | 
				
			||||||
 | 
					                "tokenizer": tokenizer_gpt35,
 | 
				
			||||||
 | 
					                "token_cnt": get_token_num_gpt35,
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        print(trimmed_format_exc())
 | 
				
			||||||
 | 
					if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
 | 
					        from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
 | 
				
			||||||
        from .bridge_qwen import predict as qwen_ui
 | 
					        from .bridge_qwen import predict as qwen_ui
 | 
				
			||||||
        model_info.update({
 | 
					        model_info.update({
 | 
				
			||||||
            "qwen": {
 | 
					            "qwen-turbo": {
 | 
				
			||||||
                "fn_with_ui": qwen_ui,
 | 
					                "fn_with_ui": qwen_ui,
 | 
				
			||||||
                "fn_without_ui": qwen_noui,
 | 
					                "fn_without_ui": qwen_noui,
 | 
				
			||||||
                "endpoint": None,
 | 
					                "endpoint": None,
 | 
				
			||||||
                "max_token": 4096,
 | 
					                "max_token": 6144,
 | 
				
			||||||
 | 
					                "tokenizer": tokenizer_gpt35,
 | 
				
			||||||
 | 
					                "token_cnt": get_token_num_gpt35,
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "qwen-plus": {
 | 
				
			||||||
 | 
					                "fn_with_ui": qwen_ui,
 | 
				
			||||||
 | 
					                "fn_without_ui": qwen_noui,
 | 
				
			||||||
 | 
					                "endpoint": None,
 | 
				
			||||||
 | 
					                "max_token": 30720,
 | 
				
			||||||
 | 
					                "tokenizer": tokenizer_gpt35,
 | 
				
			||||||
 | 
					                "token_cnt": get_token_num_gpt35,
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "qwen-max": {
 | 
				
			||||||
 | 
					                "fn_with_ui": qwen_ui,
 | 
				
			||||||
 | 
					                "fn_without_ui": qwen_noui,
 | 
				
			||||||
 | 
					                "endpoint": None,
 | 
				
			||||||
 | 
					                "max_token": 28672,
 | 
				
			||||||
                "tokenizer": tokenizer_gpt35,
 | 
					                "tokenizer": tokenizer_gpt35,
 | 
				
			||||||
                "token_cnt": get_token_num_gpt35,
 | 
					                "token_cnt": get_token_num_gpt35,
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
				
			|||||||
@ -1,59 +1,66 @@
 | 
				
			|||||||
model_name = "Qwen"
 | 
					import time
 | 
				
			||||||
cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`"
 | 
					import os
 | 
				
			||||||
 | 
					from toolbox import update_ui, get_conf, update_ui_lastest_msg
 | 
				
			||||||
 | 
					from toolbox import check_packages, report_exception
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from toolbox import ProxyNetworkActivate, get_conf
 | 
					model_name = 'Qwen'
 | 
				
			||||||
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
 | 
					
 | 
				
			||||||
 | 
					def validate_key():
 | 
				
			||||||
 | 
					    DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY")
 | 
				
			||||||
 | 
					    if DASHSCOPE_API_KEY == '': return False
 | 
				
			||||||
 | 
					    return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if not validate_key():
 | 
				
			||||||
 | 
					    raise RuntimeError('请配置DASHSCOPE_API_KEY')
 | 
				
			||||||
 | 
					os.environ['DASHSCOPE_API_KEY'] = get_conf("DASHSCOPE_API_KEY")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					        ⭐多线程方法
 | 
				
			||||||
 | 
					        函数的说明请见 request_llms/bridge_all.py
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    watch_dog_patience = 5
 | 
				
			||||||
 | 
					    response = ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# ------------------------------------------------------------------------------------------------------------------------
 | 
					    from .com_qwenapi import QwenRequestInstance
 | 
				
			||||||
# 🔌💻 Local Model
 | 
					    sri = QwenRequestInstance()
 | 
				
			||||||
# ------------------------------------------------------------------------------------------------------------------------
 | 
					    for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
 | 
				
			||||||
class GetQwenLMHandle(LocalLLMHandle):
 | 
					        if len(observe_window) >= 1:
 | 
				
			||||||
 | 
					            observe_window[0] = response
 | 
				
			||||||
 | 
					        if len(observe_window) >= 2:
 | 
				
			||||||
 | 
					            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
 | 
				
			||||||
 | 
					    return response
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def load_model_info(self):
 | 
					def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
 | 
				
			||||||
        # 🏃♂️🏃♂️🏃♂️ 子进程执行
 | 
					    """
 | 
				
			||||||
        self.model_name = model_name
 | 
					        ⭐单线程方法
 | 
				
			||||||
        self.cmd_to_install = cmd_to_install
 | 
					        函数的说明请见 request_llms/bridge_all.py
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    chatbot.append((inputs, ""))
 | 
				
			||||||
 | 
					    yield from update_ui(chatbot=chatbot, history=history)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def load_model_and_tokenizer(self):
 | 
					    # 尝试导入依赖,如果缺少依赖,则给出安装建议
 | 
				
			||||||
        # 🏃♂️🏃♂️🏃♂️ 子进程执行
 | 
					    try:
 | 
				
			||||||
        # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 | 
					        check_packages(["dashscope"])
 | 
				
			||||||
        from transformers import AutoModelForCausalLM, AutoTokenizer
 | 
					    except:
 | 
				
			||||||
        from transformers.generation import GenerationConfig
 | 
					        yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade dashscope```。",
 | 
				
			||||||
        with ProxyNetworkActivate('Download_LLM'):
 | 
					                                         chatbot=chatbot, history=history, delay=0)
 | 
				
			||||||
            model_id = get_conf('QWEN_MODEL_SELECTION')
 | 
					        return
 | 
				
			||||||
            self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
 | 
					 | 
				
			||||||
            # use fp16
 | 
					 | 
				
			||||||
            model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
 | 
					 | 
				
			||||||
            model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
 | 
					 | 
				
			||||||
            self._model = model
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return self._model, self._tokenizer
 | 
					    if additional_fn is not None:
 | 
				
			||||||
 | 
					        from core_functional import handle_core_functionality
 | 
				
			||||||
 | 
					        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def llm_stream_generator(self, **kwargs):
 | 
					    # 开始接收回复
 | 
				
			||||||
        # 🏃♂️🏃♂️🏃♂️ 子进程执行
 | 
					    from .com_qwenapi import QwenRequestInstance
 | 
				
			||||||
        def adaptor(kwargs):
 | 
					    sri = QwenRequestInstance()
 | 
				
			||||||
            query = kwargs['query']
 | 
					    for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
 | 
				
			||||||
            max_length = kwargs['max_length']
 | 
					        chatbot[-1] = (inputs, response)
 | 
				
			||||||
            top_p = kwargs['top_p']
 | 
					        yield from update_ui(chatbot=chatbot, history=history)
 | 
				
			||||||
            temperature = kwargs['temperature']
 | 
					 | 
				
			||||||
            history = kwargs['history']
 | 
					 | 
				
			||||||
            return query, max_length, top_p, temperature, history
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        query, max_length, top_p, temperature, history = adaptor(kwargs)
 | 
					    # 总结输出
 | 
				
			||||||
 | 
					    if response == f"[Local Message] 等待{model_name}响应中 ...":
 | 
				
			||||||
        for response in self._model.chat_stream(self._tokenizer, query, history=history):
 | 
					        response = f"[Local Message] {model_name}响应异常 ..."
 | 
				
			||||||
            yield response
 | 
					    history.extend([inputs, response])
 | 
				
			||||||
        
 | 
					    yield from update_ui(chatbot=chatbot, history=history)
 | 
				
			||||||
    def try_to_import_special_deps(self, **kwargs):
 | 
					 | 
				
			||||||
        # import something that will raise error if the user does not install requirement_*.txt
 | 
					 | 
				
			||||||
        # 🏃♂️🏃♂️🏃♂️ 主进程执行
 | 
					 | 
				
			||||||
        import importlib
 | 
					 | 
				
			||||||
        importlib.import_module('modelscope')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# ------------------------------------------------------------------------------------------------------------------------
 | 
					 | 
				
			||||||
# 🔌💻 GPT-Academic Interface
 | 
					 | 
				
			||||||
# ------------------------------------------------------------------------------------------------------------------------
 | 
					 | 
				
			||||||
predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
 | 
					 | 
				
			||||||
							
								
								
									
										59
									
								
								request_llms/bridge_qwen_local.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								request_llms/bridge_qwen_local.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,59 @@
 | 
				
			|||||||
 | 
					model_name = "Qwen_local"
 | 
				
			||||||
 | 
					cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from toolbox import ProxyNetworkActivate, get_conf
 | 
				
			||||||
 | 
					from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# ------------------------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					# 🔌💻 Local Model
 | 
				
			||||||
 | 
					# ------------------------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					class GetQwenLMHandle(LocalLLMHandle):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def load_model_info(self):
 | 
				
			||||||
 | 
					        # 🏃♂️🏃♂️🏃♂️ 子进程执行
 | 
				
			||||||
 | 
					        self.model_name = model_name
 | 
				
			||||||
 | 
					        self.cmd_to_install = cmd_to_install
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def load_model_and_tokenizer(self):
 | 
				
			||||||
 | 
					        # 🏃♂️🏃♂️🏃♂️ 子进程执行
 | 
				
			||||||
 | 
					        # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 | 
				
			||||||
 | 
					        from transformers import AutoModelForCausalLM, AutoTokenizer
 | 
				
			||||||
 | 
					        from transformers.generation import GenerationConfig
 | 
				
			||||||
 | 
					        with ProxyNetworkActivate('Download_LLM'):
 | 
				
			||||||
 | 
					            model_id = get_conf('QWEN_LOCAL_MODEL_SELECTION')
 | 
				
			||||||
 | 
					            self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
 | 
				
			||||||
 | 
					            # use fp16
 | 
				
			||||||
 | 
					            model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
 | 
				
			||||||
 | 
					            model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
 | 
				
			||||||
 | 
					            self._model = model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return self._model, self._tokenizer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def llm_stream_generator(self, **kwargs):
 | 
				
			||||||
 | 
					        # 🏃♂️🏃♂️🏃♂️ 子进程执行
 | 
				
			||||||
 | 
					        def adaptor(kwargs):
 | 
				
			||||||
 | 
					            query = kwargs['query']
 | 
				
			||||||
 | 
					            max_length = kwargs['max_length']
 | 
				
			||||||
 | 
					            top_p = kwargs['top_p']
 | 
				
			||||||
 | 
					            temperature = kwargs['temperature']
 | 
				
			||||||
 | 
					            history = kwargs['history']
 | 
				
			||||||
 | 
					            return query, max_length, top_p, temperature, history
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        query, max_length, top_p, temperature, history = adaptor(kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for response in self._model.chat_stream(self._tokenizer, query, history=history):
 | 
				
			||||||
 | 
					            yield response
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					    def try_to_import_special_deps(self, **kwargs):
 | 
				
			||||||
 | 
					        # import something that will raise error if the user does not install requirement_*.txt
 | 
				
			||||||
 | 
					        # 🏃♂️🏃♂️🏃♂️ 主进程执行
 | 
				
			||||||
 | 
					        import importlib
 | 
				
			||||||
 | 
					        importlib.import_module('modelscope')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# ------------------------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					# 🔌💻 GPT-Academic Interface
 | 
				
			||||||
 | 
					# ------------------------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
 | 
				
			||||||
							
								
								
									
										85
									
								
								request_llms/com_qwenapi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								request_llms/com_qwenapi.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,85 @@
 | 
				
			|||||||
 | 
					from http import HTTPStatus
 | 
				
			||||||
 | 
					from toolbox import get_conf
 | 
				
			||||||
 | 
					import threading
 | 
				
			||||||
 | 
					import logging
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					timeout_bot_msg = '[Local Message] Request timeout. Network error.'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class QwenRequestInstance():
 | 
				
			||||||
 | 
					    def __init__(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.time_to_yield_event = threading.Event()
 | 
				
			||||||
 | 
					        self.time_to_exit_event = threading.Event()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.result_buf = ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def generate(self, inputs, llm_kwargs, history, system_prompt):
 | 
				
			||||||
 | 
					        # import _thread as thread
 | 
				
			||||||
 | 
					        from dashscope import Generation
 | 
				
			||||||
 | 
					        QWEN_MODEL = {
 | 
				
			||||||
 | 
					            'qwen-turbo': Generation.Models.qwen_turbo,
 | 
				
			||||||
 | 
					            'qwen-plus': Generation.Models.qwen_plus,
 | 
				
			||||||
 | 
					            'qwen-max': Generation.Models.qwen_max,
 | 
				
			||||||
 | 
					        }[llm_kwargs['llm_model']]
 | 
				
			||||||
 | 
					        top_p = llm_kwargs.get('top_p', 0.8)
 | 
				
			||||||
 | 
					        if top_p == 0: top_p += 1e-5
 | 
				
			||||||
 | 
					        if top_p == 1: top_p -= 1e-5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.result_buf = ""
 | 
				
			||||||
 | 
					        responses = Generation.call(
 | 
				
			||||||
 | 
					            model=QWEN_MODEL,
 | 
				
			||||||
 | 
					            messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
 | 
				
			||||||
 | 
					            top_p=top_p,
 | 
				
			||||||
 | 
					            temperature=llm_kwargs.get('temperature', 1.0),
 | 
				
			||||||
 | 
					            result_format='message',
 | 
				
			||||||
 | 
					            stream=True,
 | 
				
			||||||
 | 
					            incremental_output=True
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for response in responses:
 | 
				
			||||||
 | 
					            if response.status_code == HTTPStatus.OK:
 | 
				
			||||||
 | 
					                if response.output.choices[0].finish_reason == 'stop':
 | 
				
			||||||
 | 
					                    yield self.result_buf
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
 | 
					                elif response.output.choices[0].finish_reason == 'length':
 | 
				
			||||||
 | 
					                    self.result_buf += "[Local Message] 生成长度过长,后续输出被截断"
 | 
				
			||||||
 | 
					                    yield self.result_buf
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    self.result_buf += response.output.choices[0].message.content
 | 
				
			||||||
 | 
					                    yield self.result_buf
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}"
 | 
				
			||||||
 | 
					                yield self.result_buf
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					        logging.info(f'[raw_input] {inputs}')
 | 
				
			||||||
 | 
					        logging.info(f'[response] {self.result_buf}')
 | 
				
			||||||
 | 
					        return self.result_buf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
 | 
				
			||||||
 | 
					    conversation_cnt = len(history) // 2
 | 
				
			||||||
 | 
					    if system_prompt == '': system_prompt = 'Hello!'
 | 
				
			||||||
 | 
					    messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
 | 
				
			||||||
 | 
					    if conversation_cnt:
 | 
				
			||||||
 | 
					        for index in range(0, 2*conversation_cnt, 2):
 | 
				
			||||||
 | 
					            what_i_have_asked = {}
 | 
				
			||||||
 | 
					            what_i_have_asked["role"] = "user"
 | 
				
			||||||
 | 
					            what_i_have_asked["content"] = history[index]
 | 
				
			||||||
 | 
					            what_gpt_answer = {}
 | 
				
			||||||
 | 
					            what_gpt_answer["role"] = "assistant"
 | 
				
			||||||
 | 
					            what_gpt_answer["content"] = history[index+1]
 | 
				
			||||||
 | 
					            if what_i_have_asked["content"] != "":
 | 
				
			||||||
 | 
					                if what_gpt_answer["content"] == "":
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                if what_gpt_answer["content"] == timeout_bot_msg:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                messages.append(what_i_have_asked)
 | 
				
			||||||
 | 
					                messages.append(what_gpt_answer)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                messages[-1]['content'] = what_gpt_answer['content']
 | 
				
			||||||
 | 
					    what_i_ask_now = {}
 | 
				
			||||||
 | 
					    what_i_ask_now["role"] = "user"
 | 
				
			||||||
 | 
					    what_i_ask_now["content"] = inputs
 | 
				
			||||||
 | 
					    messages.append(what_i_ask_now)
 | 
				
			||||||
 | 
					    return messages
 | 
				
			||||||
@ -1,4 +1 @@
 | 
				
			|||||||
modelscope
 | 
					dashscope
 | 
				
			||||||
transformers_stream_generator
 | 
					 | 
				
			||||||
auto-gptq
 | 
					 | 
				
			||||||
optimum
 | 
					 | 
				
			||||||
							
								
								
									
										4
									
								
								request_llms/requirements_qwen_local.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								request_llms/requirements_qwen_local.txt
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,4 @@
 | 
				
			|||||||
 | 
					modelscope
 | 
				
			||||||
 | 
					transformers_stream_generator
 | 
				
			||||||
 | 
					auto-gptq
 | 
				
			||||||
 | 
					optimum
 | 
				
			||||||
@ -18,7 +18,7 @@ if __name__ == "__main__":
 | 
				
			|||||||
    # from request_llms.bridge_internlm import predict_no_ui_long_connection
 | 
					    # from request_llms.bridge_internlm import predict_no_ui_long_connection
 | 
				
			||||||
    # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
 | 
					    # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
 | 
				
			||||||
    # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
 | 
					    # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
 | 
				
			||||||
    from request_llms.bridge_qwen import predict_no_ui_long_connection
 | 
					    from request_llms.bridge_qwen_local import predict_no_ui_long_connection
 | 
				
			||||||
    # from request_llms.bridge_spark import predict_no_ui_long_connection
 | 
					    # from request_llms.bridge_spark import predict_no_ui_long_connection
 | 
				
			||||||
    # from request_llms.bridge_zhipu import predict_no_ui_long_connection
 | 
					    # from request_llms.bridge_zhipu import predict_no_ui_long_connection
 | 
				
			||||||
    # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
 | 
					    # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user