解除本地模型的若干并发问题

This commit is contained in:
binary-husky 2023-10-31 20:37:07 +08:00
parent 17cf47dcd6
commit 09857ea455
2 changed files with 105 additions and 28 deletions

View File

@ -3,11 +3,32 @@ import threading
from toolbox import update_ui from toolbox import update_ui
from multiprocessing import Process, Pipe from multiprocessing import Process, Pipe
from contextlib import redirect_stdout from contextlib import redirect_stdout
from request_llms.queued_pipe import create_queue_pipe
class DebugLock(object):
def __init__(self):
self._lock = threading.Lock()
def acquire(self):
print("acquiring", self)
#traceback.print_tb
self._lock.acquire()
print("acquired", self)
def release(self):
print("released", self)
#traceback.print_tb
self._lock.release()
def __enter__(self):
self.acquire()
def __exit__(self, type, value, traceback):
self.release()
def SingletonLocalLLM(cls): def SingletonLocalLLM(cls):
""" """
一个单实例装饰器 Singleton Decroator for LocalLLMHandle
""" """
_instance = {} _instance = {}
@ -46,24 +67,41 @@ def reset_tqdm_output():
class LocalLLMHandle(Process): class LocalLLMHandle(Process):
def __init__(self): def __init__(self):
# ⭐主进程执行 # ⭐run in main process
super().__init__(daemon=True) super().__init__(daemon=True)
self.is_main_process = True # init
self.corrupted = False self.corrupted = False
self.load_model_info() self.load_model_info()
self.parent, self.child = Pipe() self.parent, self.child = create_queue_pipe()
self.parent_state, self.child_state = create_queue_pipe()
# allow redirect_stdout # allow redirect_stdout
self.std_tag = "[Subprocess Message] " self.std_tag = "[Subprocess Message] "
self.child.write = lambda x: self.child.send(self.std_tag + x) self.child.write = lambda x: self.child.send(self.std_tag + x)
self.running = True self.running = True
self._model = None self._model = None
self._tokenizer = None self._tokenizer = None
self.info = "" self.state = ""
self.check_dependency() self.check_dependency()
self.is_main_process = False # state wrap for child process
self.start() self.start()
self.threadLock = threading.Lock() self.is_main_process = True # state wrap for child process
self.threadLock = DebugLock()
def get_state(self):
# ⭐run in main process
while self.parent_state.poll():
self.state = self.parent_state.recv()
return self.state
def set_state(self, new_state):
# ⭐run in main process or 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
if self.is_main_process:
self.state = new_state
else:
self.child_state.send(new_state)
def load_model_info(self): def load_model_info(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
raise NotImplementedError("Method not implemented yet") raise NotImplementedError("Method not implemented yet")
self.model_name = "" self.model_name = ""
self.cmd_to_install = "" self.cmd_to_install = ""
@ -72,40 +110,40 @@ class LocalLLMHandle(Process):
""" """
This function should return the model and the tokenizer This function should return the model and the tokenizer
""" """
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
raise NotImplementedError("Method not implemented yet") raise NotImplementedError("Method not implemented yet")
def llm_stream_generator(self, **kwargs): def llm_stream_generator(self, **kwargs):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
raise NotImplementedError("Method not implemented yet") raise NotImplementedError("Method not implemented yet")
def try_to_import_special_deps(self, **kwargs): def try_to_import_special_deps(self, **kwargs):
""" """
import something that will raise error if the user does not install requirement_*.txt import something that will raise error if the user does not install requirement_*.txt
""" """
# ⭐主进程执行 # ⭐run in main process
raise NotImplementedError("Method not implemented yet") raise NotImplementedError("Method not implemented yet")
def check_dependency(self): def check_dependency(self):
# ⭐主进程执行 # ⭐run in main process
try: try:
self.try_to_import_special_deps() self.try_to_import_special_deps()
self.info = "`依赖检测通过`" self.set_state("`依赖检测通过`")
self.running = True self.running = True
except: except:
self.info = f"缺少{self.model_name}的依赖,如果要使用{self.model_name}除了基础的pip依赖以外您还需要运行{self.cmd_to_install}安装{self.model_name}的依赖。" self.set_state(f"缺少{self.model_name}的依赖,如果要使用{self.model_name}除了基础的pip依赖以外您还需要运行{self.cmd_to_install}安装{self.model_name}的依赖。")
self.running = False self.running = False
def run(self): def run(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
# 第一次运行,加载参数 # 第一次运行,加载参数
reset_tqdm_output() reset_tqdm_output()
self.info = "`尝试加载模型`" self.set_state("`尝试加载模型`")
try: try:
with redirect_stdout(self.child): with redirect_stdout(self.child):
self._model, self._tokenizer = self.load_model_and_tokenizer() self._model, self._tokenizer = self.load_model_and_tokenizer()
except: except:
self.info = "`加载模型失败`" self.set_state("`加载模型失败`")
self.running = False self.running = False
from toolbox import trimmed_format_exc from toolbox import trimmed_format_exc
self.child.send( self.child.send(
@ -113,7 +151,7 @@ class LocalLLMHandle(Process):
self.child.send('[FinishBad]') self.child.send('[FinishBad]')
raise RuntimeError(f"不能正常加载{self.model_name}的参数!") raise RuntimeError(f"不能正常加载{self.model_name}的参数!")
self.info = "`准备就绪`" self.set_state("`准备就绪`")
while True: while True:
# 进入任务等待状态 # 进入任务等待状态
kwargs = self.child.recv() kwargs = self.child.recv()
@ -121,6 +159,7 @@ class LocalLLMHandle(Process):
try: try:
for response_full in self.llm_stream_generator(**kwargs): for response_full in self.llm_stream_generator(**kwargs):
self.child.send(response_full) self.child.send(response_full)
print('debug' + response_full)
self.child.send('[Finish]') self.child.send('[Finish]')
# 请求处理结束,开始下一个循环 # 请求处理结束,开始下一个循环
except: except:
@ -129,18 +168,35 @@ class LocalLLMHandle(Process):
f'[Local Message] 调用{self.model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n') f'[Local Message] 调用{self.model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
self.child.send('[Finish]') self.child.send('[Finish]')
def clear_pending_messages(self):
# ⭐run in main process
while True:
if self.parent.poll():
self.parent.recv()
continue
for _ in range(5):
time.sleep(0.5)
if self.parent.poll():
r = self.parent.recv()
continue
break
return
def stream_chat(self, **kwargs): def stream_chat(self, **kwargs):
# ⭐主进程执行 # ⭐run in main process
if self.info == "`准备就绪`": if self.get_state() == "`准备就绪`":
yield "`正在等待线程锁,排队中请稍后 ...`" yield "`正在等待线程锁,排队中请稍后 ...`"
with self.threadLock: with self.threadLock:
if self.parent.poll(): if self.parent.poll():
while self.parent.poll(): self.parent.recv() yield "`排队中请稍后 ...`"
self.clear_pending_messages()
self.parent.send(kwargs) self.parent.send(kwargs)
std_out = "" std_out = ""
std_out_clip_len = 4096 std_out_clip_len = 4096
while True: while True:
res = self.parent.recv() res = self.parent.recv()
# pipe_watch_dog.feed()
if res.startswith(self.std_tag): if res.startswith(self.std_tag):
new_output = res[len(self.std_tag):] new_output = res[len(self.std_tag):]
std_out = std_out[:std_out_clip_len] std_out = std_out[:std_out_clip_len]
@ -157,20 +213,18 @@ class LocalLLMHandle(Process):
std_out = "" std_out = ""
yield res yield res
def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'): def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'):
load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……" load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……"
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
""" """
多线程方法 refer to request_llms/bridge_all.py
函数的说明请见 request_llms/bridge_all.py
""" """
_llm_handle = LLMSingletonClass() _llm_handle = LLMSingletonClass()
if len(observe_window) >= 1: if len(observe_window) >= 1:
observe_window[0] = load_message + "\n\n" + _llm_handle.info observe_window[0] = load_message + "\n\n" + _llm_handle.get_state()
if not _llm_handle.running: if not _llm_handle.running:
raise RuntimeError(_llm_handle.info) raise RuntimeError(_llm_handle.get_state())
if history_format == 'classic': if history_format == 'classic':
# 没有 sys_prompt 接口因此把prompt加入 history # 没有 sys_prompt 接口因此把prompt加入 history
@ -210,16 +264,15 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='cla
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None): def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
""" """
单线程方法 refer to request_llms/bridge_all.py
函数的说明请见 request_llms/bridge_all.py
""" """
chatbot.append((inputs, "")) chatbot.append((inputs, ""))
_llm_handle = LLMSingletonClass() _llm_handle = LLMSingletonClass()
chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.info) chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.get_state())
yield from update_ui(chatbot=chatbot, history=[]) yield from update_ui(chatbot=chatbot, history=[])
if not _llm_handle.running: if not _llm_handle.running:
raise RuntimeError(_llm_handle.info) raise RuntimeError(_llm_handle.get_state())
if additional_fn is not None: if additional_fn is not None:
from core_functional import handle_core_functionality from core_functional import handle_core_functionality

View File

@ -0,0 +1,24 @@
from multiprocessing import Pipe, Queue
import time
import threading
class PipeSide(object):
def __init__(self, q_2remote, q_2local) -> None:
self.q_2remote = q_2remote
self.q_2local = q_2local
def recv(self):
return self.q_2local.get()
def send(self, buf):
self.q_2remote.put(buf)
def poll(self):
return not self.q_2local.empty()
def create_queue_pipe():
q_p2c = Queue()
q_c2p = Queue()
pipe_c = PipeSide(q_2local=q_p2c, q_2remote=q_c2p)
pipe_p = PipeSide(q_2local=q_c2p, q_2remote=q_p2c)
return pipe_c, pipe_p