unify tiktoken model

This commit is contained in:
Your Name 2023-04-17 19:41:50 +08:00
parent 40bc865d33
commit 2472185de9
8 changed files with 9 additions and 9 deletions

View File

@ -13,7 +13,7 @@ class PaperFileGroup():
# count_token
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

View File

@ -13,7 +13,7 @@ class PaperFileGroup():
# count_token
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

View File

@ -4,7 +4,7 @@ from toolbox import update_ui, get_conf
def input_clipping(inputs, history, max_token_limit):
import tiktoken
import numpy as np
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
mode = 'input-and-history'

View File

@ -61,7 +61,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
MAX_TOKEN = 3000
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))

View File

@ -13,7 +13,7 @@ class PaperFileGroup():
# count_token
import tiktoken
from toolbox import get_conf
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

View File

@ -69,7 +69,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
# 递归地切割PDF文件
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from toolbox import get_conf
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)

View File

@ -18,7 +18,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from toolbox import get_conf
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)

View File

@ -22,8 +22,8 @@ import importlib
# config_private.py放自己的秘密如API和代理网址
# 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件
from toolbox import get_conf, update_ui
proxies, API_URL, API_KEY, TIMEOUT_SECONDS, MAX_RETRY, LLM_MODEL = \
get_conf('proxies', 'API_URL', 'API_KEY', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'LLM_MODEL')
proxies, API_URL, API_KEY, TIMEOUT_SECONDS, MAX_RETRY = \
get_conf('proxies', 'API_URL', 'API_KEY', 'TIMEOUT_SECONDS', 'MAX_RETRY')
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'