From f75e39dc2734c62d7590e137c37c8504fa0eedbb Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Sat, 11 Nov 2023 21:11:55 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=9C=AC=E5=9C=B0=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=E5=9C=A8Windows=E4=B8=8B=E7=9A=84=E5=8A=A0=E8=BD=BDBU?=
 =?UTF-8?q?G?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 request_llms/bridge_chatgpt.py         |  3 +--
 request_llms/bridge_chatgpt_website.py |  3 +--
 request_llms/bridge_claude.py          |  2 +-
 request_llms/bridge_internlm.py        | 17 +++++++++--------
 request_llms/bridge_qwen.py            | 15 ++++++++-------
 request_llms/local_llm_class.py        |  2 +-
 tests/test_llms.py                     |  4 ++--
 version                                |  4 ++--
 8 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py
index 292de0a..e55ad37 100644
--- a/request_llms/bridge_chatgpt.py
+++ b/request_llms/bridge_chatgpt.py
@@ -7,8 +7,7 @@
     1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
 
     具备多线程调用能力的函数
-    2. predict_no_ui：高级实验性功能模块调用，不会实时显示在界面上，参数简单，可以多线程并行，方便实现复杂的功能逻辑
-    3. predict_no_ui_long_connection：在实验过程中发现调用predict_no_ui处理长文档时，和openai的连接容易断掉，这个函数用stream的方式解决这个问题，同样支持多线程
+    2. predict_no_ui_long_connection：支持多线程
 """
 
 import json
diff --git a/request_llms/bridge_chatgpt_website.py b/request_llms/bridge_chatgpt_website.py
index 7f3147b..f2f0709 100644
--- a/request_llms/bridge_chatgpt_website.py
+++ b/request_llms/bridge_chatgpt_website.py
@@ -7,8 +7,7 @@
     1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
 
     具备多线程调用能力的函数
-    2. predict_no_ui：高级实验性功能模块调用，不会实时显示在界面上，参数简单，可以多线程并行，方便实现复杂的功能逻辑
-    3. predict_no_ui_long_connection：在实验过程中发现调用predict_no_ui处理长文档时，和openai的连接容易断掉，这个函数用stream的方式解决这个问题，同样支持多线程
+    2. predict_no_ui_long_connection：支持多线程
 """
 
 import json
diff --git a/request_llms/bridge_claude.py b/request_llms/bridge_claude.py
index 6084b1f..42b7505 100644
--- a/request_llms/bridge_claude.py
+++ b/request_llms/bridge_claude.py
@@ -7,7 +7,7 @@
     1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
 
     具备多线程调用能力的函数
-    2. predict_no_ui_long_connection：在实验过程中发现调用predict_no_ui处理长文档时，和openai的连接容易断掉，这个函数用stream的方式解决这个问题，同样支持多线程
+    2. predict_no_ui_long_connection：支持多线程
 """
 
 import os
diff --git a/request_llms/bridge_internlm.py b/request_llms/bridge_internlm.py
index b831dc5..20b53b4 100644
--- a/request_llms/bridge_internlm.py
+++ b/request_llms/bridge_internlm.py
@@ -5,7 +5,7 @@ from transformers import AutoModel, AutoTokenizer
 import time
 import threading
 import importlib
-from toolbox import update_ui, get_conf
+from toolbox import update_ui, get_conf, ProxyNetworkActivate
 from multiprocessing import Process, Pipe
 from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
 
@@ -52,14 +52,15 @@ class GetInternlmHandle(LocalLLMHandle):
         import torch
         from transformers import AutoModelForCausalLM, AutoTokenizer
         device = get_conf('LOCAL_MODEL_DEVICE')
-        if self._model is None:
-            tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
-            if device=='cpu':
-                model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
-            else:
-                model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
+        with ProxyNetworkActivate('Download_LLM'):
+            if self._model is None:
+                tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
+                if device=='cpu':
+                    model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
+                else:
+                    model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
 
-            model = model.eval()
+                model = model.eval()
         return model, tokenizer
 
     def llm_stream_generator(self, **kwargs):
diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py
index 0b226df..afd886b 100644
--- a/request_llms/bridge_qwen.py
+++ b/request_llms/bridge_qwen.py
@@ -6,7 +6,7 @@ from transformers import AutoModel, AutoTokenizer
 import time
 import threading
 import importlib
-from toolbox import update_ui, get_conf
+from toolbox import update_ui, get_conf, ProxyNetworkActivate
 from multiprocessing import Process, Pipe
 from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
 
@@ -29,12 +29,13 @@ class GetONNXGLMHandle(LocalLLMHandle):
         import platform
         from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 
-        model_id = 'qwen/Qwen-7B-Chat'
-        self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True)
-        # use fp16
-        model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval()
-        model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
-        self._model = model
+        with ProxyNetworkActivate('Download_LLM'):
+            model_id = 'qwen/Qwen-7B-Chat'
+            self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True)
+            # use fp16
+            model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval()
+            model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
+            self._model = model
 
         return self._model, self._tokenizer
 
diff --git a/request_llms/local_llm_class.py b/request_llms/local_llm_class.py
index fe6be96..38fcfc9 100644
--- a/request_llms/local_llm_class.py
+++ b/request_llms/local_llm_class.py
@@ -201,7 +201,7 @@ class LocalLLMHandle(Process):
                 if res.startswith(self.std_tag):
                     new_output = res[len(self.std_tag):]
                     std_out = std_out[:std_out_clip_len]
-                    # print(new_output, end='')
+                    print(new_output, end='')
                     std_out = new_output + std_out
                     yield self.std_tag + '\n```\n' + std_out + '\n```\n'
                 elif res == '[Finish]':
diff --git a/tests/test_llms.py b/tests/test_llms.py
index 5c5d2f6..6285f03 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -15,11 +15,11 @@ if __name__ == "__main__":
     # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
     # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
     # from request_llms.bridge_claude import predict_no_ui_long_connection
-    # from request_llms.bridge_internlm import predict_no_ui_long_connection
+    from request_llms.bridge_internlm import predict_no_ui_long_connection
     # from request_llms.bridge_qwen import predict_no_ui_long_connection
     # from request_llms.bridge_spark import predict_no_ui_long_connection
     # from request_llms.bridge_zhipu import predict_no_ui_long_connection
-    from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
+    # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
 
     llm_kwargs = {
         'max_length': 4096,
diff --git a/version b/version
index 5e4fb7d..69a871e 100644
--- a/version
+++ b/version
@@ -1,5 +1,5 @@
 {
-  "version": 3.57,
+  "version": 3.58,
   "show_feature": true,
-  "new_feature": "支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮 <-> 新汇报PDF汇总页面 <-> 重新编译Gradio优化使用体验"
+  "new_feature": "修复本地模型在Windows下的加载BUG <-> 支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮 <-> 新汇报PDF汇总页面 <-> 重新编译Gradio优化使用体验"
 }