From e93b6fa3a6e1ded5ba8228da18ab8e417eee71bf Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 24 Jul 2023 18:19:57 +0800 Subject: [PATCH] Add GLM INT8 --- config.py | 2 +- request_llm/bridge_chatglm.py | 20 ++++++++++++-------- toolbox.py | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/config.py b/config.py index f39fef3..70507b5 100644 --- a/config.py +++ b/config.py @@ -80,7 +80,7 @@ ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b # 本地LLM模型如ChatGLM的执行方式 CPU/GPU LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda" -LOCAL_MODEL_QUANT = "INT4" # 默认 "" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本 +LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本 # 设置gradio的并行线程数(不需要修改) diff --git a/request_llm/bridge_chatglm.py b/request_llm/bridge_chatglm.py index c7ec42b..d761c65 100644 --- a/request_llm/bridge_chatglm.py +++ b/request_llm/bridge_chatglm.py @@ -37,19 +37,23 @@ class GetGLMHandle(Process): # 子进程执行 # 第一次运行,加载参数 retry = 0 - pretrained_model_name_or_path = "THUDM/chatglm2-6b" - LOCAL_MODEL_QUANT = get_conf('LOCAL_MODEL_QUANT') - if LOCAL_MODEL_QUANT and len(LOCAL_MODEL_QUANT) > 0 and LOCAL_MODEL_QUANT[0] == "INT4": - pretrained_model_name_or_path = "THUDM/chatglm2-6b-int4" + LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE') + + if LOCAL_MODEL_QUANT == "INT4": # INT4 + _model_name_ = "THUDM/chatglm2-6b-int4" + elif LOCAL_MODEL_QUANT == "INT8": # INT8 + _model_name_ = "THUDM/chatglm2-6b-int8" + else: + _model_name_ = "THUDM/chatglm2-6b" # FP16 + while True: try: if self.chatglm_model is None: - self.chatglm_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True) - device, = get_conf('LOCAL_MODEL_DEVICE') + self.chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True) if device=='cpu': - self.chatglm_model = AutoModel.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True).float() + self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float() else: - self.chatglm_model = AutoModel.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True).half().cuda() + self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda() self.chatglm_model = self.chatglm_model.eval() break else: diff --git a/toolbox.py b/toolbox.py index 89a46fb..901b61c 100644 --- a/toolbox.py +++ b/toolbox.py @@ -681,6 +681,7 @@ def read_single_conf_with_lru_cache(arg): else: print亮红( "[API_KEY] 您的 API_KEY 不满足任何一种已知的密钥格式,请在config文件中修改API密钥之后再运行。") if arg == 'proxies': + if not read_single_conf_with_lru_cache('USE_PROXY'): r = None # 检查USE_PROXY,防止proxies单独起作用 if r is None: print亮红('[PROXY] 网络代理状态:未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议:检查USE_PROXY选项是否修改。') else: