增加chatGLM int4配置支持 小显存也可以选择chatGLM

This commit is contained in:
fenglui 2023-07-22 08:29:15 +08:00
parent a393edfaa4
commit 5813d65e52
2 changed files with 8 additions and 3 deletions

View File

@ -80,6 +80,7 @@ ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU # 本地LLM模型如ChatGLM的执行方式 CPU/GPU
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda" LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
LOCAL_MODEL_QUANT = "INT4" # 默认 "" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
# 设置gradio的并行线程数不需要修改 # 设置gradio的并行线程数不需要修改

View File

@ -37,15 +37,19 @@ class GetGLMHandle(Process):
# 子进程执行 # 子进程执行
# 第一次运行,加载参数 # 第一次运行,加载参数
retry = 0 retry = 0
pretrained_model_name_or_path = "THUDM/chatglm2-6b"
LOCAL_MODEL_QUANT = get_conf('LOCAL_MODEL_QUANT')
if LOCAL_MODEL_QUANT and len(LOCAL_MODEL_QUANT) > 0 and LOCAL_MODEL_QUANT[0] == "INT4":
pretrained_model_name_or_path = "THUDM/chatglm2-6b-int4"
while True: while True:
try: try:
if self.chatglm_model is None: if self.chatglm_model is None:
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True) self.chatglm_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True)
device, = get_conf('LOCAL_MODEL_DEVICE') device, = get_conf('LOCAL_MODEL_DEVICE')
if device=='cpu': if device=='cpu':
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float() self.chatglm_model = AutoModel.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True).float()
else: else:
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda() self.chatglm_model = AutoModel.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True).half().cuda()
self.chatglm_model = self.chatglm_model.eval() self.chatglm_model = self.chatglm_model.eval()
break break
else: else: