From f75e39dc2734c62d7590e137c37c8504fa0eedbb Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Sat, 11 Nov 2023 21:11:55 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=9C=AC=E5=9C=B0=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E5=9C=A8Windows=E4=B8=8B=E7=9A=84=E5=8A=A0=E8=BD=BDBU?= =?UTF-8?q?G?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- request_llms/bridge_chatgpt.py | 3 +-- request_llms/bridge_chatgpt_website.py | 3 +-- request_llms/bridge_claude.py | 2 +- request_llms/bridge_internlm.py | 17 +++++++++-------- request_llms/bridge_qwen.py | 15 ++++++++------- request_llms/local_llm_class.py | 2 +- tests/test_llms.py | 4 ++-- version | 4 ++-- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py index 292de0a..e55ad37 100644 --- a/request_llms/bridge_chatgpt.py +++ b/request_llms/bridge_chatgpt.py @@ -7,8 +7,7 @@ 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 具备多线程调用能力的函数 - 2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑 - 3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 + 2. predict_no_ui_long_connection:支持多线程 """ import json diff --git a/request_llms/bridge_chatgpt_website.py b/request_llms/bridge_chatgpt_website.py index 7f3147b..f2f0709 100644 --- a/request_llms/bridge_chatgpt_website.py +++ b/request_llms/bridge_chatgpt_website.py @@ -7,8 +7,7 @@ 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 具备多线程调用能力的函数 - 2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑 - 3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 + 2. predict_no_ui_long_connection:支持多线程 """ import json diff --git a/request_llms/bridge_claude.py b/request_llms/bridge_claude.py index 6084b1f..42b7505 100644 --- a/request_llms/bridge_claude.py +++ b/request_llms/bridge_claude.py @@ -7,7 +7,7 @@ 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 具备多线程调用能力的函数 - 2. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 + 2. predict_no_ui_long_connection:支持多线程 """ import os diff --git a/request_llms/bridge_internlm.py b/request_llms/bridge_internlm.py index b831dc5..20b53b4 100644 --- a/request_llms/bridge_internlm.py +++ b/request_llms/bridge_internlm.py @@ -5,7 +5,7 @@ from transformers import AutoModel, AutoTokenizer import time import threading import importlib -from toolbox import update_ui, get_conf +from toolbox import update_ui, get_conf, ProxyNetworkActivate from multiprocessing import Process, Pipe from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns @@ -52,14 +52,15 @@ class GetInternlmHandle(LocalLLMHandle): import torch from transformers import AutoModelForCausalLM, AutoTokenizer device = get_conf('LOCAL_MODEL_DEVICE') - if self._model is None: - tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True) - if device=='cpu': - model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16) - else: - model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda() + with ProxyNetworkActivate('Download_LLM'): + if self._model is None: + tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True) + if device=='cpu': + model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16) + else: + model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda() - model = model.eval() + model = model.eval() return model, tokenizer def llm_stream_generator(self, **kwargs): diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py index 0b226df..afd886b 100644 --- a/request_llms/bridge_qwen.py +++ b/request_llms/bridge_qwen.py @@ -6,7 +6,7 @@ from transformers import AutoModel, AutoTokenizer import time import threading import importlib -from toolbox import update_ui, get_conf +from toolbox import update_ui, get_conf, ProxyNetworkActivate from multiprocessing import Process, Pipe from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns @@ -29,12 +29,13 @@ class GetONNXGLMHandle(LocalLLMHandle): import platform from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig - model_id = 'qwen/Qwen-7B-Chat' - self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True) - # use fp16 - model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval() - model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 - self._model = model + with ProxyNetworkActivate('Download_LLM'): + model_id = 'qwen/Qwen-7B-Chat' + self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True) + # use fp16 + model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval() + model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 + self._model = model return self._model, self._tokenizer diff --git a/request_llms/local_llm_class.py b/request_llms/local_llm_class.py index fe6be96..38fcfc9 100644 --- a/request_llms/local_llm_class.py +++ b/request_llms/local_llm_class.py @@ -201,7 +201,7 @@ class LocalLLMHandle(Process): if res.startswith(self.std_tag): new_output = res[len(self.std_tag):] std_out = std_out[:std_out_clip_len] - # print(new_output, end='') + print(new_output, end='') std_out = new_output + std_out yield self.std_tag + '\n```\n' + std_out + '\n```\n' elif res == '[Finish]': diff --git a/tests/test_llms.py b/tests/test_llms.py index 5c5d2f6..6285f03 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -15,11 +15,11 @@ if __name__ == "__main__": # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection # from request_llms.bridge_claude import predict_no_ui_long_connection - # from request_llms.bridge_internlm import predict_no_ui_long_connection + from request_llms.bridge_internlm import predict_no_ui_long_connection # from request_llms.bridge_qwen import predict_no_ui_long_connection # from request_llms.bridge_spark import predict_no_ui_long_connection # from request_llms.bridge_zhipu import predict_no_ui_long_connection - from request_llms.bridge_chatglm3 import predict_no_ui_long_connection + # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection llm_kwargs = { 'max_length': 4096, diff --git a/version b/version index 5e4fb7d..69a871e 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.57, + "version": 3.58, "show_feature": true, - "new_feature": "支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮 <-> 新汇报PDF汇总页面 <-> 重新编译Gradio优化使用体验" + "new_feature": "修复本地模型在Windows下的加载BUG <-> 支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮 <-> 新汇报PDF汇总页面 <-> 重新编译Gradio优化使用体验" }