diff --git a/.gitignore b/.gitignore
index efe469e..213f3e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,7 +55,6 @@ coverage.xml
 *.pot
 github
 .github
-.idea/
 TEMP
 TRASH
 
diff --git a/Dockerfile+ChatGLM b/Dockerfile+ChatGLM
new file mode 100644
index 0000000..f99f2a6
--- /dev/null
+++ b/Dockerfile+ChatGLM
@@ -0,0 +1,50 @@
+# How to build | 如何构建: docker build -t gpt-academic --network=host  -f Dockerfile+ChatGLM .
+# How to run | 如何运行 (1) 直接运行: docker run --rm -it --net=host --gpus=all gpt-academic
+# How to run | 如何运行 (2) 我想运行之前进容器做一些调整: docker run --rm -it --net=host --gpus=all gpt-academic bash
+ 
+# 从NVIDIA源，从而支持显卡运损（检查宿主的nvidia-smi中的cuda版本必须>=11.3）
+FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04
+ARG useProxyNetwork=''
+RUN apt-get update
+RUN apt-get install -y curl proxychains curl 
+RUN apt-get install -y git python python3 python-dev python3-dev --fix-missing
+
+# 配置代理网络（构建Docker镜像时使用）
+# # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除
+RUN $useProxyNetwork curl cip.cc
+RUN sed -i '$ d' /etc/proxychains.conf
+RUN sed -i '$ d' /etc/proxychains.conf
+RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf
+ARG useProxyNetwork=proxychains
+# # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除
+
+
+# use python3 as the system default python
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
+
+# 下载分支
+WORKDIR /gpt
+RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b v3.0
+WORKDIR /gpt/chatgpt_academic
+RUN $useProxyNetwork python3 -m pip install -r requirements.txt
+RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt
+RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113
+
+# 预热CHATGLM参数（非必要 可选步骤）
+RUN echo ' \n\
+from transformers import AutoModel, AutoTokenizer \n\
+chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) \n\
+chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() ' >> warm_up_chatglm.py
+RUN python3 -u warm_up_chatglm.py
+RUN $useProxyNetwork git pull
+
+# 为chatgpt-academic配置代理和API-KEY （非必要 可选步骤）
+RUN echo ' \n\
+API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \n\
+USE_PROXY = True \n\
+LLM_MODEL = "chatglm" \n\
+LOCAL_MODEL_DEVICE = "cuda" \n\
+proxies = { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' >> config_private.py
+
+# 启动
+CMD ["python3", "-u", "main.py"]
diff --git a/config.py b/config.py
index 658de99..8db5d46 100644
--- a/config.py
+++ b/config.py
@@ -45,7 +45,10 @@ WEB_PORT = -1
 MAX_RETRY = 2
 
 # OpenAI模型选择是（gpt4现在只对申请成功的人开放）
-LLM_MODEL = "gpt-3.5-turbo"
+LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm", "tgui:anymodel@localhost:7865"
+
+# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
+LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
 
 # OpenAI的API_URL
 API_URL = "https://api.openai.com/v1/chat/completions"
diff --git a/crazy_functional.py b/crazy_functional.py
index f701167..96c16b8 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -16,15 +16,20 @@ def get_crazy_functions():
     from crazy_functions.高级功能函数模板 import 高阶功能模板函数
     from crazy_functions.代码重写为全英文_多线程 import 全项目切换英文
     from crazy_functions.Latex全文润色 import Latex英文润色
+    from crazy_functions.询问多个大语言模型 import 同时问询
     from crazy_functions.解析项目源代码 import 解析一个Lua项目
     function_plugins = {
-
+        "询问多个GPT模型": {
+            "Color": "stop",    # 按钮颜色
+            "Function": HotReload(同时问询)
+        },
         "解析整个Python项目": {
             "Color": "stop",    # 按钮颜色
             "Function": HotReload(解析一个Python项目)
         },
         "解析整个C++项目头文件": {
             "Color": "stop",    # 按钮颜色
+            "AsButton": False,  # 加入下拉菜单中
             "Function": HotReload(解析一个C项目的头文件)
         },
         "解析整个C++项目（.cpp/.hpp/.c/.h）": {
diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py
index 189d948..cc43b53 100644
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -61,7 +61,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
     """
     import time
     from concurrent.futures import ThreadPoolExecutor
-    from request_llm.bridge_chatgpt import predict_no_ui_long_connection
+    from request_llm.bridge_all import predict_no_ui_long_connection
     # 用户反馈
     chatbot.append([inputs_show_user, ""])
     yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
@@ -167,13 +167,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
     """
     import time, random
     from concurrent.futures import ThreadPoolExecutor
-    from request_llm.bridge_chatgpt import predict_no_ui_long_connection
+    from request_llm.bridge_all import predict_no_ui_long_connection
     assert len(inputs_array) == len(history_array)
     assert len(inputs_array) == len(sys_prompt_array)
     if max_workers == -1: # 读取配置文件
         try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
         except: max_workers = 8
         if max_workers <= 0 or max_workers >= 20: max_workers = 8
+    # 屏蔽掉 chatglm的多线程，可能会导致严重卡顿
+    if not llm_kwargs['llm_model'].startswith('gpt-'):
+        max_workers = 1
+        
     executor = ThreadPoolExecutor(max_workers=max_workers)
     n_frag = len(inputs_array)
     # 用户反馈
diff --git a/crazy_functions/代码重写为全英文_多线程.py b/crazy_functions/代码重写为全英文_多线程.py
index 40bc45d..a09a3f5 100644
--- a/crazy_functions/代码重写为全英文_多线程.py
+++ b/crazy_functions/代码重写为全英文_多线程.py
@@ -1,5 +1,5 @@
 import threading
-from request_llm.bridge_chatgpt import predict_no_ui_long_connection
+from request_llm.bridge_all import predict_no_ui_long_connection
 from toolbox import update_ui
 from toolbox import CatchException, write_results_to_file, report_execption
 from .crazy_utils import breakdown_txt_to_satisfy_token_limit
diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/解析项目源代码.py
index 3af7497..9b476cb 100644
--- a/crazy_functions/解析项目源代码.py
+++ b/crazy_functions/解析项目源代码.py
@@ -12,7 +12,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
     sys_prompt_array = []
     report_part_1 = []
     
-    assert len(file_manifest) <= 1024, "源文件太多（超过1024个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
+    assert len(file_manifest) <= 512, "源文件太多（超过512个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
     ############################## <第一步，逐个文件分析，多线程> ##################################
     for index, fp in enumerate(file_manifest):
         # 读取文件
diff --git a/crazy_functions/询问多个大语言模型.py b/crazy_functions/询问多个大语言模型.py
new file mode 100644
index 0000000..a3c98c1
--- /dev/null
+++ b/crazy_functions/询问多个大语言模型.py
@@ -0,0 +1,28 @@
+from toolbox import CatchException, update_ui
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+import datetime
+@CatchException
+def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，如温度和top_p等，一般原样传递下去就行
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+
+    llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo'
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=txt, inputs_show_user=txt, 
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, 
+        sys_prompt=system_prompt
+    )
+
+    history.append(txt)
+    history.append(gpt_say)
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
\ No newline at end of file
diff --git a/main.py b/main.py
index 2e4e275..d8257a2 100644
--- a/main.py
+++ b/main.py
@@ -1,6 +1,6 @@
 import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
 import gradio as gr
-from request_llm.bridge_chatgpt import predict
+from request_llm.bridge_all import predict
 from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
 
 # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
@@ -97,7 +97,10 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
                 system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
                 top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
                 temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
+                max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="MaxLength",)
                 checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
+                md_dropdown = gr.Dropdown(["gpt-3.5-turbo", "chatglm"], value=LLM_MODEL, label="").style(container=False)
+
                 gr.Markdown(description)
             with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
                 with gr.Row():
@@ -118,7 +121,7 @@ with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=
         return ret
     checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2] )
     # 整理反复出现的控件句柄组合
-    input_combo = [cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
+    input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt]
     output_combo = [cookies, chatbot, history, status]
     predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=input_combo, outputs=output_combo)
     # 提交按钮、重置按钮
diff --git a/request_llm/README.md b/request_llm/README.md
index 3277e4a..973adea 100644
--- a/request_llm/README.md
+++ b/request_llm/README.md
@@ -1,35 +1,53 @@
 # 如何使用其他大语言模型（v3.0分支测试中）
 
-## 1. 先运行text-generation
+## ChatGLM
+
+- 安装依赖 `pip install -r request_llm/requirements_chatglm.txt`
+- 修改配置，在config.py中将LLM_MODEL的值改为"chatglm"
+
 ``` sh
-# 下载模型（ text-generation 这么牛的项目，别忘了给人家star ）
+LLM_MODEL = "chatglm"
+```
+- 运行！
+``` sh
+`python main.py`
+``` 
+
+
+---
+## Text-Generation-UI (TGUI)
+
+### 1. 部署TGUI
+``` sh
+# 1 下载模型
 git clone https://github.com/oobabooga/text-generation-webui.git
-
-# 安装text-generation的额外依赖
-pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
-
-# 切换路径
+# 2 这个仓库的最新代码有问题，回滚到几周之前
+git reset --hard fcda3f87767e642d1c0411776e549e1d3894843d
+# 3 切换路径
 cd text-generation-webui
-
-# 下载模型
+# 4 安装text-generation的额外依赖
+pip install accelerate bitsandbytes flexgen gradio llamacpp markdown numpy peft requests rwkv safetensors sentencepiece tqdm datasets git+https://github.com/huggingface/transformers
+# 5 下载模型
 python download-model.py facebook/galactica-1.3b
 # 其他可选如 facebook/opt-1.3b
+#           facebook/galactica-1.3b
 #           facebook/galactica-6.7b
 #           facebook/galactica-120b
 #           facebook/pygmalion-1.3b 等
 # 详情见 https://github.com/oobabooga/text-generation-webui
 
-# 启动text-generation，注意把模型的斜杠改成下划线
-python server.py --cpu --listen --listen-port 7860 --model facebook_galactica-1.3b
+# 6 启动text-generation
+python server.py --cpu --listen --listen-port 7865 --model facebook_galactica-1.3b
 ```
 
-## 2. 修改config.py
+### 2. 修改config.py
+
 ``` sh
-# LLM_MODEL格式较复杂   TGUI:[模型]@[ws地址]:[ws端口] ,   端口要和上面给定的端口一致
-LLM_MODEL = "TGUI:galactica-1.3b@localhost:7860"
+# LLM_MODEL格式:   tgui:[模型]@[ws地址]:[ws端口] ,   端口要和上面给定的端口一致
+LLM_MODEL = "tgui:galactica-1.3b@localhost:7860"
 ```
 
-## 3. 运行！
+### 3. 运行！
 ``` sh
 cd chatgpt-academic
 python main.py
diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py
new file mode 100644
index 0000000..f02a1c8
--- /dev/null
+++ b/request_llm/bridge_all.py
@@ -0,0 +1,135 @@
+
+"""
+    该文件中主要包含2个函数
+
+    不具备多线程能力的函数：
+    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
+
+    具备多线程调用能力的函数
+    2. predict_no_ui_long_connection：在实验过程中发现调用predict_no_ui处理长文档时，和openai的连接容易断掉，这个函数用stream的方式解决这个问题，同样支持多线程
+"""
+
+from concurrent.futures import ThreadPoolExecutor
+
+from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
+from .bridge_chatgpt import predict as chatgpt_ui
+
+from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
+from .bridge_chatglm import predict as chatglm_ui
+
+from .bridge_tgui import predict_no_ui_long_connection as tgui_noui
+from .bridge_tgui import predict as tgui_ui
+
+methods = {
+    "openai-no-ui": chatgpt_noui,
+    "openai-ui": chatgpt_ui,
+
+    "chatglm-no-ui": chatglm_noui,
+    "chatglm-ui": chatglm_ui,
+
+    "tgui-no-ui": tgui_noui,
+    "tgui-ui": tgui_ui,
+}
+
+def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
+    """
+        发送至LLM，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
+        inputs：
+            是本次问询的输入
+        sys_prompt:
+            系统静默prompt
+        llm_kwargs：
+            LLM的内部调优参数
+        history：
+            是之前的对话列表
+        observe_window = None：
+            用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
+    """
+    import threading, time, copy
+
+    model = llm_kwargs['llm_model']
+    n_model = 1
+    if '&' not in model:
+        assert not model.startswith("tgui"), "TGUI不支持函数插件的实现"
+
+        # 如果只询问1个大语言模型：
+        if model.startswith('gpt'):
+            method = methods['openai-no-ui']
+        elif model == 'chatglm':
+            method = methods['chatglm-no-ui']
+        elif model.startswith('tgui'):
+            method = methods['tgui-no-ui']
+        return method(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience)
+    else:
+        # 如果同时询问多个大语言模型：
+        executor = ThreadPoolExecutor(max_workers=16)
+        models = model.split('&')
+        n_model = len(models)
+        
+        window_len = len(observe_window)
+        if window_len==0:
+            window_mutex = [[] for _ in range(n_model)] + [True]
+        elif window_len==1:
+            window_mutex = [[""] for _ in range(n_model)] + [True]
+        elif window_len==2:
+            window_mutex = [["", time.time()] for _ in range(n_model)] + [True]
+
+        futures = []
+        for i in range(n_model):
+            model = models[i]
+            if model.startswith('gpt'):
+                method = methods['openai-no-ui']
+            elif model == 'chatglm':
+                method = methods['chatglm-no-ui']
+            elif model.startswith('tgui'):
+                method = methods['tgui-no-ui']
+            llm_kwargs_feedin = copy.deepcopy(llm_kwargs)
+            llm_kwargs_feedin['llm_model'] = model
+            future = executor.submit(method, inputs, llm_kwargs_feedin, history, sys_prompt, window_mutex[i], console_slience)
+            futures.append(future)
+
+        def mutex_manager(window_mutex, observe_window):
+            while True:
+                time.sleep(0.2)
+                if not window_mutex[-1]: break
+                # 看门狗（watchdog）
+                for i in range(n_model): 
+                    window_mutex[i][1] = observe_window[1]
+                # 观察窗（window）
+                chat_string = []
+                for i in range(n_model):
+                    chat_string.append( f"[{str(models[i])} 说]: {window_mutex[i][0]}" )
+                res = '\n\n---\n\n'.join(chat_string)
+                # # # # # # # # # # #
+                observe_window[0] = res
+
+        t_model = threading.Thread(target=mutex_manager, args=(window_mutex, observe_window), daemon=True)
+        t_model.start()
+
+        return_string_collect = []
+        for i, future in enumerate(futures):  # wait and get
+            return_string_collect.append( f"[{str(models[i])} 说]: {future.result()}" )
+        window_mutex[-1] = False # stop mutex thread
+        res = '\n\n---\n\n'.join(return_string_collect)
+        return res
+
+
+def predict(inputs, llm_kwargs, *args, **kwargs):
+    """
+        发送至LLM，流式获取输出。
+        用于基础的对话功能。
+        inputs 是本次问询的输入
+        top_p, temperature是LLM的内部调优参数
+        history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+        chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+        additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
+    if llm_kwargs['llm_model'].startswith('gpt'):
+        method = methods['openai-ui']
+    elif llm_kwargs['llm_model'] == 'chatglm':
+        method = methods['chatglm-ui']
+    elif llm_kwargs['llm_model'].startswith('tgui'):
+        method = methods['tgui-ui']
+
+    yield from method(inputs, llm_kwargs, *args, **kwargs)
+
diff --git a/request_llm/bridge_chatglm.py b/request_llm/bridge_chatglm.py
new file mode 100644
index 0000000..d6f5eec
--- /dev/null
+++ b/request_llm/bridge_chatglm.py
@@ -0,0 +1,83 @@
+
+from transformers import AutoModel, AutoTokenizer
+import time
+import importlib
+from toolbox import update_ui, get_conf
+
+
+global chatglm_model, chatglm_tokenizer
+
+chatglm_model = None
+chatglm_tokenizer = None
+
+def model_loader():
+    global chatglm_model, chatglm_tokenizer
+    if chatglm_tokenizer is None:
+        chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+    if chatglm_model is None: # 尚未加载
+        device, = get_conf('LOCAL_MODEL_DEVICE')
+        if device=='cpu':
+            chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
+        else:
+            chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+        chatglm_model = chatglm_model.eval()
+    chatglm_model = chatglm_model.eval()
+
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
+    """
+        函数的说明请见 request_llm/bridge_all.py
+    """
+    global chatglm_model, chatglm_tokenizer
+    if chatglm_model is None:
+        observe_window[0] = "ChatGLM尚未加载，加载需要一段时间 ……"
+
+    model_loader()
+    # chatglm 没有 sys_prompt 接口，因此把prompt加入 history
+    history_feedin = []
+    for i in range(len(history)//2):
+        history_feedin.append(["What can I do?", sys_prompt] )
+        history_feedin.append([history[2*i], history[2*i+1]] )
+
+    watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
+    response = ""
+    for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'],
+                                                       top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        # 观测窗，把已经获取的数据显示出去
+        observe_window[0] = response
+        # 看门狗 (watchdog)，如果超过期限没有喂狗，则终止
+        if len(observe_window) >= 2:  
+            if (time.time()-observe_window[1]) > watch_dog_patience:
+                raise RuntimeError("程序终止。")
+        # if not console_slience:
+        #     print(response)
+    return response
+
+
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        函数的说明请见 request_llm/bridge_all.py
+    """
+    global chatglm_model, chatglm_tokenizer
+    chatbot.append((inputs, ""))
+    if chatglm_model is None:
+        chatbot[-1] = (inputs, "ChatGLM尚未加载，加载需要一段时间 ……")
+        yield from update_ui(chatbot=chatbot, history=[])
+    model_loader()
+
+    if additional_fn is not None:
+        import core_functional
+        importlib.reload(core_functional)    # 热更新prompt
+        core_functional = core_functional.get_core_functions()
+        if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs)  # 获取预处理函数（如果有的话）
+        inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
+
+
+    history_feedin = []
+    for i in range(len(history)//2):
+        history_feedin.append(["What can I do?", system_prompt] )
+        history_feedin.append([history[2*i], history[2*i+1]] )
+
+    for response, history in chatglm_model.stream_chat(chatglm_tokenizer, inputs, history=history_feedin, max_length=llm_kwargs['max_length'],
+                                                       top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
\ No newline at end of file
diff --git a/request_llm/bridge_tgui.py b/request_llm/bridge_tgui.py
index 22a4075..fcf852f 100644
--- a/request_llm/bridge_tgui.py
+++ b/request_llm/bridge_tgui.py
@@ -13,23 +13,18 @@ import time
 import threading
 import importlib
 from toolbox import get_conf, update_ui
-LLM_MODEL, = get_conf('LLM_MODEL')
 
-# "TGUI:galactica-1.3b@localhost:7860"
-model_name, addr_port = LLM_MODEL.split('@')
-assert ':' in addr_port, "LLM_MODEL 格式不正确！" + LLM_MODEL
-addr, port = addr_port.split(':')
 
 def random_hash():
     letters = string.ascii_lowercase + string.digits
     return ''.join(random.choice(letters) for i in range(9))
 
-async def run(context, max_token=512):
+async def run(context, max_token, temperature, top_p, addr, port):
     params = {
         'max_new_tokens': max_token,
         'do_sample': True,
-        'temperature': 0.5,
-        'top_p': 0.9,
+        'temperature': temperature,
+        'top_p': top_p,
         'typical_p': 1,
         'repetition_penalty': 1.05,
         'encoder_repetition_penalty': 1.0,
@@ -90,7 +85,7 @@ async def run(context, max_token=512):
 
 
 
-def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
     """
         发送至chatGPT，流式获取输出。
         用于基础的对话功能。
@@ -108,18 +103,26 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt=
         inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
 
     raw_input = "What I would like to say is the following: " + inputs
-    logging.info(f'[raw_input] {raw_input}')
     history.extend([inputs, ""])
     chatbot.append([inputs, ""])
     yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
 
-    prompt = inputs
+    prompt = raw_input
     tgui_say = ""
 
+    model_name, addr_port = llm_kwargs['llm_model'].split('@')
+    assert ':' in addr_port, "LLM_MODEL 格式不正确！" + llm_kwargs['llm_model']
+    addr, port = addr_port.split(':')
+
+
     mutable = ["", time.time()]
     def run_coorotine(mutable):
         async def get_result(mutable):
-            async for response in run(prompt):
+            # "tgui:galactica-1.3b@localhost:7860"
+
+            async for response in run(context=prompt, max_token=llm_kwargs['max_length'], 
+                                      temperature=llm_kwargs['temperature'], 
+                                      top_p=llm_kwargs['top_p'], addr=addr, port=port):
                 print(response[len(mutable[0]):])
                 mutable[0] = response
                 if (time.time() - mutable[1]) > 3: 
@@ -140,28 +143,29 @@ def predict_tgui(inputs, top_p, temperature, chatbot, history=[], system_prompt=
             chatbot[-1] = (history[-2], history[-1])
             yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
-    logging.info(f'[response] {tgui_say}')
 
 
 
-def predict_tgui_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""):
+def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
     raw_input = "What I would like to say is the following: " + inputs
-    prompt = inputs
+    prompt = raw_input
     tgui_say = ""
-    mutable = ["", time.time()]
-    def run_coorotine(mutable):
-        async def get_result(mutable):
-            async for response in run(prompt, max_token=20):
-                print(response[len(mutable[0]):])
-                mutable[0] = response
-                if (time.time() - mutable[1]) > 3: 
+    model_name, addr_port = llm_kwargs['llm_model'].split('@')
+    assert ':' in addr_port, "LLM_MODEL 格式不正确！" + llm_kwargs['llm_model']
+    addr, port = addr_port.split(':')
+
+
+    def run_coorotine(observe_window):
+        async def get_result(observe_window):
+            async for response in run(context=prompt, max_token=llm_kwargs['max_length'], 
+                                      temperature=llm_kwargs['temperature'], 
+                                      top_p=llm_kwargs['top_p'], addr=addr, port=port):
+                print(response[len(observe_window[0]):])
+                observe_window[0] = response
+                if (time.time() - observe_window[1]) > 5: 
                     print('exit when no listener')
                     break
-        asyncio.run(get_result(mutable))
-    thread_listen = threading.Thread(target=run_coorotine, args=(mutable,))
+        asyncio.run(get_result(observe_window))
+    thread_listen = threading.Thread(target=run_coorotine, args=(observe_window,))
     thread_listen.start()
-    while thread_listen.is_alive():
-        time.sleep(1)
-        mutable[1] = time.time()
-    tgui_say = mutable[0]
-    return tgui_say
+    return observe_window[0]
diff --git a/request_llm/requirements_chatglm.txt b/request_llm/requirements_chatglm.txt
new file mode 100644
index 0000000..fa049ca
--- /dev/null
+++ b/request_llm/requirements_chatglm.txt
@@ -0,0 +1,6 @@
+protobuf
+transformers==4.27.1
+cpm_kernels
+torch>=1.10
+mdtex2html
+sentencepiece
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 1cee117..910be08 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-gradio==3.25.0
+gradio>=3.25.0
 tiktoken>=0.3.3
 requests[socks]
 transformers
diff --git a/toolbox.py b/toolbox.py
index 3ced653..0dd035a 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -27,7 +27,7 @@ def ArgsGeneralWrapper(f):
     """
         装饰器函数，用于重组输入参数，改变输入参数的顺序与结构。
     """
-    def decorated(cookies, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
+    def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
         txt_passon = txt
         if txt == "" and txt2 != "": txt_passon = txt2
         # 引入一个有cookie的chatbot
@@ -37,8 +37,9 @@ def ArgsGeneralWrapper(f):
         })
         llm_kwargs = {
             'api_key': cookies['api_key'],
-            'llm_model': cookies['llm_model'],
+            'llm_model': llm_model,
             'top_p':top_p, 
+            'max_length': max_length,
             'temperature':temperature,
         }
         plugin_kwargs = {
@@ -75,66 +76,6 @@ def get_reduce_token_percent(text):
     except:
         return 0.5, '不详'
 
-def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, llm_kwargs, history=[], sys_prompt='', long_connection=True):
-    """
-        * 此函数未来将被弃用（替代函数 request_gpt_model_in_new_thread_with_ui_alive 文件 chatgpt_academic/crazy_functions/crazy_utils）
-
-        调用简单的predict_no_ui接口，但是依然保留了些许界面心跳功能，当对话太长时，会自动采用二分法截断
-        i_say: 当前输入
-        i_say_show_user: 显示到对话界面上的当前输入，例如，输入整个文件时，你绝对不想把文件的内容都糊到对话界面上
-        chatbot: 对话界面句柄
-        top_p, temperature: gpt参数
-        history: gpt参数 对话历史
-        sys_prompt: gpt参数 sys_prompt
-        long_connection: 是否采用更稳定的连接方式（推荐）（已弃用）
-    """
-    import time
-    from request_llm.bridge_chatgpt import predict_no_ui_long_connection
-    from toolbox import get_conf
-    TIMEOUT_SECONDS, MAX_RETRY = get_conf('TIMEOUT_SECONDS', 'MAX_RETRY')
-    # 多线程的时候，需要一个mutable结构在不同线程之间传递信息
-    # list就是最简单的mutable结构，我们第一个位置放gpt输出，第二个位置传递报错信息
-    mutable = [None, '']
-    # multi-threading worker
-
-    def mt(i_say, history):
-        while True:
-            try:
-                mutable[0] = predict_no_ui_long_connection(
-                    inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt)
-
-            except ConnectionAbortedError as token_exceeded_error:
-                # 尝试计算比例，尽可能多地保留文本
-                p_ratio, n_exceed = get_reduce_token_percent(
-                    str(token_exceeded_error))
-                if len(history) > 0:
-                    history = [his[int(len(his) * p_ratio):]
-                               for his in history if his is not None]
-                else:
-                    i_say = i_say[:     int(len(i_say) * p_ratio)]
-                mutable[1] = f'警告，文本过长将进行截断，Token溢出数：{n_exceed}，截断比例：{(1-p_ratio):.0%}。'
-            except TimeoutError as e:
-                mutable[0] = '[Local Message] 请求超时。'
-                raise TimeoutError
-            except Exception as e:
-                mutable[0] = f'[Local Message] 异常：{str(e)}.'
-                raise RuntimeError(f'[Local Message] 异常：{str(e)}.')
-    # 创建新线程发出http请求
-    thread_name = threading.Thread(target=mt, args=(i_say, history))
-    thread_name.start()
-    # 原来的线程则负责持续更新UI，实现一个超时倒计时，并等待新线程的任务完成
-    cnt = 0
-    while thread_name.is_alive():
-        cnt += 1
-        chatbot[-1] = (i_say_show_user,
-                       f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt % 4)))
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-        time.sleep(1)
-    # 把gpt的输出从mutable中取出来
-    gpt_say = mutable[0]
-    if gpt_say == '[Local Message] Failed with timeout.':
-        raise TimeoutError
-    return gpt_say
 
 
 def write_results_to_file(history, file_name=None):
diff --git a/version b/version
index e1a3466..59e288a 100644
--- a/version
+++ b/version
@@ -1,5 +1,5 @@
 {
-  "version": 2.68,
+  "version": 3.0,
   "show_feature": true,
-  "new_feature": "改善理解pdf（chatpdf）功能 <-> 修复读取罕见字符的BUG <-> 如果一键更新失败，可前往github手动更新"
+  "new_feature": "支持ChatGLM <-> 支持多LLM模型同时对话"
 }