Merge pull request #979 from fenglui/master
增加chatGLM int4配置支持 小显存也可以选择chatGLM
This commit is contained in:
		
						commit
						1732127a28
					
				@ -80,6 +80,7 @@ ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
 | 
					# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
 | 
				
			||||||
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
 | 
					LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
 | 
				
			||||||
 | 
					LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# 设置gradio的并行线程数(不需要修改)
 | 
					# 设置gradio的并行线程数(不需要修改)
 | 
				
			||||||
 | 
				
			|||||||
@ -37,15 +37,23 @@ class GetGLMHandle(Process):
 | 
				
			|||||||
        # 子进程执行
 | 
					        # 子进程执行
 | 
				
			||||||
        # 第一次运行,加载参数
 | 
					        # 第一次运行,加载参数
 | 
				
			||||||
        retry = 0
 | 
					        retry = 0
 | 
				
			||||||
 | 
					        LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if LOCAL_MODEL_QUANT == "INT4":         # INT4
 | 
				
			||||||
 | 
					            _model_name_ = "THUDM/chatglm2-6b-int4"
 | 
				
			||||||
 | 
					        elif LOCAL_MODEL_QUANT == "INT8":       # INT8
 | 
				
			||||||
 | 
					            _model_name_ = "THUDM/chatglm2-6b-int8"
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            _model_name_ = "THUDM/chatglm2-6b"  # FP16
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        while True:
 | 
					        while True:
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                if self.chatglm_model is None:
 | 
					                if self.chatglm_model is None:
 | 
				
			||||||
                    self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
 | 
					                    self.chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
 | 
				
			||||||
                    device, = get_conf('LOCAL_MODEL_DEVICE')
 | 
					 | 
				
			||||||
                    if device=='cpu':
 | 
					                    if device=='cpu':
 | 
				
			||||||
                        self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
 | 
					                        self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float()
 | 
				
			||||||
                    else:
 | 
					                    else:
 | 
				
			||||||
                        self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
 | 
					                        self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda()
 | 
				
			||||||
                    self.chatglm_model = self.chatglm_model.eval()
 | 
					                    self.chatglm_model = self.chatglm_model.eval()
 | 
				
			||||||
                    break
 | 
					                    break
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
 | 
				
			|||||||
@ -681,6 +681,7 @@ def read_single_conf_with_lru_cache(arg):
 | 
				
			|||||||
        else:
 | 
					        else:
 | 
				
			||||||
            print亮红( "[API_KEY] 您的 API_KEY 不满足任何一种已知的密钥格式,请在config文件中修改API密钥之后再运行。")
 | 
					            print亮红( "[API_KEY] 您的 API_KEY 不满足任何一种已知的密钥格式,请在config文件中修改API密钥之后再运行。")
 | 
				
			||||||
    if arg == 'proxies':
 | 
					    if arg == 'proxies':
 | 
				
			||||||
 | 
					        if not read_single_conf_with_lru_cache('USE_PROXY'): r = None   # 检查USE_PROXY,防止proxies单独起作用
 | 
				
			||||||
        if r is None:
 | 
					        if r is None:
 | 
				
			||||||
            print亮红('[PROXY] 网络代理状态:未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议:检查USE_PROXY选项是否修改。')
 | 
					            print亮红('[PROXY] 网络代理状态:未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议:检查USE_PROXY选项是否修改。')
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user