修复文心一言最大文本长度限制带来的问题
This commit is contained in:
parent
67fff17917
commit
6cc7d4ed69
@ -175,7 +175,7 @@ model_info = {
|
|||||||
"fn_with_ui": qianfan_ui,
|
"fn_with_ui": qianfan_ui,
|
||||||
"fn_without_ui": qianfan_noui,
|
"fn_without_ui": qianfan_noui,
|
||||||
"endpoint": None,
|
"endpoint": None,
|
||||||
"max_token": 4096,
|
"max_token": 2000,
|
||||||
"tokenizer": tokenizer_gpt35,
|
"tokenizer": tokenizer_gpt35,
|
||||||
"token_cnt": get_token_num_gpt35,
|
"token_cnt": get_token_num_gpt35,
|
||||||
},
|
},
|
||||||
|
@ -216,7 +216,6 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
|||||||
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
||||||
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
||||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
||||||
# history = [] # 清除历史
|
|
||||||
elif "does not exist" in error_msg:
|
elif "does not exist" in error_msg:
|
||||||
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
|
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
|
||||||
elif "Incorrect API key" in error_msg:
|
elif "Incorrect API key" in error_msg:
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
|
|
||||||
import time, requests, json
|
import time, requests, json
|
||||||
from toolbox import update_ui, get_conf
|
|
||||||
from multiprocessing import Process, Pipe
|
from multiprocessing import Process, Pipe
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, get_conf
|
||||||
|
|
||||||
model_name = '千帆大模型平台'
|
model_name = '千帆大模型平台'
|
||||||
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
||||||
@ -101,6 +101,7 @@ def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
|
|||||||
response = requests.request("POST", url, headers=headers, data=payload, stream=True)
|
response = requests.request("POST", url, headers=headers, data=payload, stream=True)
|
||||||
buffer = ""
|
buffer = ""
|
||||||
for line in response.iter_lines():
|
for line in response.iter_lines():
|
||||||
|
if len(line) == 0: continue
|
||||||
try:
|
try:
|
||||||
dec = line.decode().lstrip('data:')
|
dec = line.decode().lstrip('data:')
|
||||||
dec = json.loads(dec)
|
dec = json.loads(dec)
|
||||||
@ -108,7 +109,10 @@ def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
|
|||||||
buffer += incoming
|
buffer += incoming
|
||||||
yield buffer
|
yield buffer
|
||||||
except:
|
except:
|
||||||
if 'error_code' in dec: raise RuntimeError(dec['error_msg'])
|
if ('error_code' in dec) and ("max length" in dec['error_msg']):
|
||||||
|
raise ConnectionAbortedError(dec['error_msg'])
|
||||||
|
elif ('error_code' in dec):
|
||||||
|
raise RuntimeError(dec['error_msg'])
|
||||||
|
|
||||||
|
|
||||||
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
|
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
|
||||||
@ -137,11 +141,21 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|||||||
from core_functional import handle_core_functionality
|
from core_functional import handle_core_functionality
|
||||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||||
|
|
||||||
# 开始接收回复
|
yield from update_ui(chatbot=chatbot, history=history)
|
||||||
for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
|
# 开始接收回复
|
||||||
chatbot[-1] = (inputs, response)
|
try:
|
||||||
yield from update_ui(chatbot=chatbot, history=history)
|
for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
|
||||||
|
chatbot[-1] = (inputs, response)
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history)
|
||||||
|
except ConnectionAbortedError as e:
|
||||||
|
from .bridge_all import model_info
|
||||||
|
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
||||||
|
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
||||||
|
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
||||||
|
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history, msg="异常") # 刷新界面
|
||||||
|
return
|
||||||
|
|
||||||
# 总结输出
|
# 总结输出
|
||||||
response = f"[Local Message]: {model_name}响应异常 ..."
|
response = f"[Local Message]: {model_name}响应异常 ..."
|
||||||
if response == f"[Local Message]: 等待{model_name}响应中 ...":
|
if response == f"[Local Message]: 等待{model_name}响应中 ...":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user