我是来自达摩院的大规模语言模型,我叫通义千问。
This commit is contained in:
		
							parent
							
								
									4d70b3786f
								
							
						
					
					
						commit
						c17fc2a9b5
					
				@ -71,7 +71,7 @@ MAX_RETRY = 2
 | 
				
			|||||||
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
 | 
					# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
 | 
				
			||||||
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
 | 
					LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
 | 
				
			||||||
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
 | 
					AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
 | 
				
			||||||
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
 | 
					# P.S. 其他可用的模型还包括 ["qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中)
 | 
					# ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中)
 | 
				
			||||||
 | 
				
			|||||||
@ -335,6 +335,22 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS:
 | 
				
			|||||||
        })
 | 
					        })
 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
        print(trimmed_format_exc())
 | 
					        print(trimmed_format_exc())
 | 
				
			||||||
 | 
					if "qwen" in AVAIL_LLM_MODELS:
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
 | 
				
			||||||
 | 
					        from .bridge_qwen import predict as qwen_ui
 | 
				
			||||||
 | 
					        model_info.update({
 | 
				
			||||||
 | 
					            "qwen": {
 | 
				
			||||||
 | 
					                "fn_with_ui": qwen_ui,
 | 
				
			||||||
 | 
					                "fn_without_ui": qwen_noui,
 | 
				
			||||||
 | 
					                "endpoint": None,
 | 
				
			||||||
 | 
					                "max_token": 4096,
 | 
				
			||||||
 | 
					                "tokenizer": tokenizer_gpt35,
 | 
				
			||||||
 | 
					                "token_cnt": get_token_num_gpt35,
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        print(trimmed_format_exc())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def LLM_CATCH_EXCEPTION(f):
 | 
					def LLM_CATCH_EXCEPTION(f):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
				
			|||||||
@ -32,15 +32,13 @@ class GetONNXGLMHandle(LocalLLMHandle):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        model_id = 'qwen/Qwen-7B-Chat'
 | 
					        model_id = 'qwen/Qwen-7B-Chat'
 | 
				
			||||||
        revision = 'v1.0.1'
 | 
					        revision = 'v1.0.1'
 | 
				
			||||||
        tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
 | 
					        self._tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
 | 
				
			||||||
        # use fp16
 | 
					        # use fp16
 | 
				
			||||||
        model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision, 
 | 
					        model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision, trust_remote_code=True, fp16=True).eval()
 | 
				
			||||||
                                                    trust_remote_code=True, fp16=True).eval()
 | 
					        model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
 | 
				
			||||||
        model.generation_config = GenerationConfig.from_pretrained(model_id,
 | 
					 | 
				
			||||||
                                                                trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
 | 
					 | 
				
			||||||
        self._model = model
 | 
					        self._model = model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return self._model, None
 | 
					        return self._model, self._tokenizer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def llm_stream_generator(self, **kwargs):
 | 
					    def llm_stream_generator(self, **kwargs):
 | 
				
			||||||
        # 🏃♂️🏃♂️🏃♂️ 子进程执行
 | 
					        # 🏃♂️🏃♂️🏃♂️ 子进程执行
 | 
				
			||||||
@ -54,8 +52,7 @@ class GetONNXGLMHandle(LocalLLMHandle):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        query, max_length, top_p, temperature, history = adaptor(kwargs)
 | 
					        query, max_length, top_p, temperature, history = adaptor(kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        prompt = chat_template(history, query)
 | 
					        for response in self._model.chat(self._tokenizer, query, history=history, stream=True):
 | 
				
			||||||
        for response in model.chat(tokenizer, query, history=history, stream=True):
 | 
					 | 
				
			||||||
            yield response
 | 
					            yield response
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    def try_to_import_special_deps(self, **kwargs):
 | 
					    def try_to_import_special_deps(self, **kwargs):
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user