From 6cc7d4ed69ff83d40b13702d959a56d35ca04470 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 25 Aug 2023 13:09:08 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=96=87=E5=BF=83=E4=B8=80?= =?UTF-8?q?=E8=A8=80=E6=9C=80=E5=A4=A7=E6=96=87=E6=9C=AC=E9=95=BF=E5=BA=A6?= =?UTF-8?q?=E9=99=90=E5=88=B6=E5=B8=A6=E6=9D=A5=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- request_llm/bridge_all.py | 2 +- request_llm/bridge_chatgpt.py | 1 - request_llm/bridge_qianfan.py | 28 +++++++++++++++++++++------- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index b645d01..e167825 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -175,7 +175,7 @@ model_info = { "fn_with_ui": qianfan_ui, "fn_without_ui": qianfan_noui, "endpoint": None, - "max_token": 4096, + "max_token": 2000, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, diff --git a/request_llm/bridge_chatgpt.py b/request_llm/bridge_chatgpt.py index fc09ed7..87cc664 100644 --- a/request_llm/bridge_chatgpt.py +++ b/request_llm/bridge_chatgpt.py @@ -216,7 +216,6 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg) history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'], max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一 chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)") - # history = [] # 清除历史 elif "does not exist" in error_msg: chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.") elif "Incorrect API key" in error_msg: diff --git a/request_llm/bridge_qianfan.py b/request_llm/bridge_qianfan.py index 0d56e3a..7abce8a 100644 --- a/request_llm/bridge_qianfan.py +++ b/request_llm/bridge_qianfan.py @@ -1,9 +1,9 @@ import time, requests, json -from toolbox import update_ui, get_conf from multiprocessing import Process, Pipe from functools import wraps from datetime import datetime, timedelta +from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, get_conf model_name = '千帆大模型平台' timeout_bot_msg = '[Local Message] Request timeout. Network error.' @@ -101,6 +101,7 @@ def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt): response = requests.request("POST", url, headers=headers, data=payload, stream=True) buffer = "" for line in response.iter_lines(): + if len(line) == 0: continue try: dec = line.decode().lstrip('data:') dec = json.loads(dec) @@ -108,7 +109,10 @@ def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt): buffer += incoming yield buffer except: - if 'error_code' in dec: raise RuntimeError(dec['error_msg']) + if ('error_code' in dec) and ("max length" in dec['error_msg']): + raise ConnectionAbortedError(dec['error_msg']) + elif ('error_code' in dec): + raise RuntimeError(dec['error_msg']) def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): @@ -137,11 +141,21 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp from core_functional import handle_core_functionality inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) - # 开始接收回复 - for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt): - chatbot[-1] = (inputs, response) - yield from update_ui(chatbot=chatbot, history=history) - + yield from update_ui(chatbot=chatbot, history=history) + # 开始接收回复 + try: + for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt): + chatbot[-1] = (inputs, response) + yield from update_ui(chatbot=chatbot, history=history) + except ConnectionAbortedError as e: + from .bridge_all import model_info + if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出 + history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'], + max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一 + chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)") + yield from update_ui(chatbot=chatbot, history=history, msg="异常") # 刷新界面 + return + # 总结输出 response = f"[Local Message]: {model_name}响应异常 ..." if response == f"[Local Message]: 等待{model_name}响应中 ...":