tiktoken做lazyload处理

This commit is contained in:
Your Name 2023-04-19 14:27:34 +08:00
parent 28aa6d1dc0
commit b0409b929b
12 changed files with 83 additions and 35 deletions

View File

@ -103,7 +103,7 @@ def auto_update():
import json
proxies, = get_conf('proxies')
response = requests.get(
"https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=1)
"https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
remote_json_data = json.loads(response.text)
remote_version = remote_json_data['version']
if remote_json_data["show_feature"]:
@ -133,6 +133,13 @@ def auto_update():
except:
print('自动更新程序:已禁用')
def warm_up_modules():
print('正在执行一些模块的预热...')
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
enc.encode("模块预热", disallowed_special=())
enc = model_info["gpt-4"]['tokenizer']
enc.encode("模块预热", disallowed_special=())
if __name__ == '__main__':
import os

View File

@ -11,9 +11,8 @@ class PaperFileGroup():
self.sp_file_tag = []
# count_token
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

View File

@ -11,9 +11,8 @@ class PaperFileGroup():
self.sp_file_tag = []
# count_token
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

View File

@ -2,9 +2,9 @@ import traceback
from toolbox import update_ui, get_conf
def input_clipping(inputs, history, max_token_limit):
import tiktoken
import numpy as np
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
mode = 'input-and-history'

View File

@ -59,9 +59,8 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
# 第5步Token限制下的截断与处理
MAX_TOKEN = 3000
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))

View File

@ -11,9 +11,8 @@ class PaperFileGroup():
self.sp_file_tag = []
# count_token
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

View File

@ -68,8 +68,8 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
# 递归地切割PDF文件
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from toolbox import get_conf
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)

View File

@ -17,8 +17,8 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
TOKEN_LIMIT_PER_FRAGMENT = 2500
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from toolbox import get_conf
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)

View File

@ -40,7 +40,7 @@ def main():
set_theme = adjust_theme()
# 代理与自动更新
from check_proxy import check_proxy, auto_update
from check_proxy import check_proxy, auto_update, warm_up_modules
proxy_info = check_proxy(proxies)
gr_L1 = lambda: gr.Row().style()
@ -180,6 +180,7 @@ def main():
webbrowser.open_new_tab(f"http://localhost:{PORT}/?__dark-theme=true")
threading.Thread(target=open, name="open-browser", daemon=True).start()
threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start()
threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
auto_opentab_delay()
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=PORT, auth=AUTHENTICATION, favicon_path="docs/logo.png")

View File

@ -9,7 +9,7 @@
2. predict_no_ui_long_connection在实验过程中发现调用predict_no_ui处理长文档时和openai的连接容易断掉这个函数用stream的方式解决这个问题同样支持多线程
"""
import tiktoken
from functools import wraps, lru_cache
from concurrent.futures import ThreadPoolExecutor
from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
@ -18,13 +18,31 @@ from .bridge_chatgpt import predict as chatgpt_ui
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
from .bridge_chatglm import predict as chatglm_ui
from .bridge_tgui import predict_no_ui_long_connection as tgui_noui
from .bridge_tgui import predict as tgui_ui
# from .bridge_tgui import predict_no_ui_long_connection as tgui_noui
# from .bridge_tgui import predict as tgui_ui
colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044']
get_token_num_gpt35 = lambda txt: len(tiktoken.encoding_for_model("gpt-3.5-turbo").encode(txt, disallowed_special=()))
get_token_num_gpt4 = lambda txt: len(tiktoken.encoding_for_model("gpt-4").encode(txt, disallowed_special=()))
class LazyloadTiktoken(object):
def __init__(self, model):
self.model = model
@staticmethod
@lru_cache(maxsize=128)
def get_encoder(model):
print('正在加载tokenizer如果是第一次运行可能需要一点时间下载参数')
tmp = tiktoken.encoding_for_model(model)
print('加载tokenizer完毕')
return tmp
def encode(self, *args, **kwargs):
encoder = self.get_encoder(self.model)
return encoder.encode(*args, **kwargs)
tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
tokenizer_gpt4 = LazyloadTiktoken("gpt-4")
get_token_num_gpt35 = lambda txt: len(tokenizer_gpt35.encode(txt, disallowed_special=()))
get_token_num_gpt4 = lambda txt: len(tokenizer_gpt4.encode(txt, disallowed_special=()))
model_info = {
# openai
@ -33,7 +51,7 @@ model_info = {
"fn_without_ui": chatgpt_noui,
"endpoint": "https://api.openai.com/v1/chat/completions",
"max_token": 4096,
"tokenizer": tiktoken.encoding_for_model("gpt-3.5-turbo"),
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
@ -42,7 +60,7 @@ model_info = {
"fn_without_ui": chatgpt_noui,
"endpoint": "https://api.openai.com/v1/chat/completions",
"max_token": 8192,
"tokenizer": tiktoken.encoding_for_model("gpt-4"),
"tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4,
},
@ -52,7 +70,7 @@ model_info = {
"fn_without_ui": chatgpt_noui,
"endpoint": "https://openai.api2d.net/v1/chat/completions",
"max_token": 4096,
"tokenizer": tiktoken.encoding_for_model("gpt-3.5-turbo"),
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
@ -61,7 +79,7 @@ model_info = {
"fn_without_ui": chatgpt_noui,
"endpoint": "https://openai.api2d.net/v1/chat/completions",
"max_token": 8192,
"tokenizer": tiktoken.encoding_for_model("gpt-4"),
"tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4,
},
@ -71,7 +89,7 @@ model_info = {
"fn_without_ui": chatglm_noui,
"endpoint": None,
"max_token": 1024,
"tokenizer": tiktoken.encoding_for_model("gpt-3.5-turbo"),
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},

View File

@ -5,6 +5,8 @@ import importlib
from toolbox import update_ui, get_conf
from multiprocessing import Process, Pipe
load_message = "ChatGLM尚未加载加载需要一段时间。注意取决于`config.py`的配置ChatGLM消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……"
#################################################################################
class GetGLMHandle(Process):
def __init__(self):
@ -12,13 +14,26 @@ class GetGLMHandle(Process):
self.parent, self.child = Pipe()
self.chatglm_model = None
self.chatglm_tokenizer = None
self.info = ""
self.success = True
self.check_dependency()
self.start()
print('初始化')
def check_dependency(self):
try:
import sentencepiece
self.info = "依赖检测通过"
self.success = True
except:
self.info = "缺少ChatGLM的依赖如果要使用ChatGLM除了基础的pip依赖以外您还需要运行`pip install -r request_llm/requirements_chatglm.txt`安装ChatGLM的依赖。"
self.success = False
def ready(self):
return self.chatglm_model is not None
def run(self):
# 第一次运行,加载参数
retry = 0
while True:
try:
if self.chatglm_model is None:
@ -33,7 +48,12 @@ class GetGLMHandle(Process):
else:
break
except:
pass
retry += 1
if retry > 3:
self.child.send('[Local Message] Call ChatGLM fail 不能正常加载ChatGLM的参数。')
raise RuntimeError("不能正常加载ChatGLM的参数")
# 进入任务等待状态
while True:
kwargs = self.child.recv()
try:
@ -64,7 +84,11 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
global glm_handle
if glm_handle is None:
glm_handle = GetGLMHandle()
observe_window[0] = "ChatGLM尚未加载加载需要一段时间。注意取决于`config.py`的配置ChatGLM消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……"
observe_window[0] = load_message + "\n\n" + glm_handle.info
if not glm_handle.success:
error = glm_handle.info
glm_handle = None
raise RuntimeError(error)
# chatglm 没有 sys_prompt 接口因此把prompt加入 history
history_feedin = []
@ -93,8 +117,11 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
global glm_handle
if glm_handle is None:
glm_handle = GetGLMHandle()
chatbot[-1] = (inputs, "ChatGLM尚未加载加载需要一段时间。注意取决于`config.py`的配置ChatGLM消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……")
chatbot[-1] = (inputs, load_message + "\n\n" + glm_handle.info)
yield from update_ui(chatbot=chatbot, history=[])
if not glm_handle.success:
glm_handle = None
return
if additional_fn is not None:
import core_functional

View File

@ -25,7 +25,6 @@ def ArgsGeneralWrapper(f):
装饰器函数用于重组输入参数改变输入参数的顺序与结构
"""
def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args):
from request_llm.bridge_all import model_info
txt_passon = txt
if txt == "" and txt2 != "": txt_passon = txt2
# 引入一个有cookie的chatbot