fix missing get_token_num method

This commit is contained in:
binary-husky 2024-02-12 15:58:20 +08:00
parent 8814026ec3
commit b9b1e12dc9

View File

@ -12,6 +12,12 @@ class PaperFileGroup():
self.sp_file_index = [] self.sp_file_index = []
self.sp_file_tag = [] self.sp_file_tag = []
# count_token
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
def run_file_split(self, max_token_limit=1900): def run_file_split(self, max_token_limit=1900):
""" """
将长文本分离开来 将长文本分离开来