lazy load +decode 方法

This commit is contained in:
Your Name 2023-04-19 14:58:39 +08:00
parent a15489d6e6
commit c37c49dd51

View File

@ -39,6 +39,10 @@ class LazyloadTiktoken(object):
encoder = self.get_encoder(self.model) encoder = self.get_encoder(self.model)
return encoder.encode(*args, **kwargs) return encoder.encode(*args, **kwargs)
def decode(self, *args, **kwargs):
encoder = self.get_encoder(self.model)
return encoder.decode(*args, **kwargs)
tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo") tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
tokenizer_gpt4 = LazyloadTiktoken("gpt-4") tokenizer_gpt4 = LazyloadTiktoken("gpt-4")
get_token_num_gpt35 = lambda txt: len(tokenizer_gpt35.encode(txt, disallowed_special=())) get_token_num_gpt35 = lambda txt: len(tokenizer_gpt35.encode(txt, disallowed_special=()))