Update Claude3 api request and fix some bugs (#1641)

* Update version to 3.74 * Add support for Yi Model API (#1635) * 更新以支持零一万物模型 * 删除newbing * 修改config --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> * Update claude requrest to http type * Update for endpoint * Add support for other tpyes of pictures * Update pip packages * Fix console_slience issue while error handling * revert version changes --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com>
2024-03-20 17:22:23 +08:00 · 2024-03-20 17:22:23 +08:00 · e42ede512b
commit e42ede512b
parent 84ccc9e64c
4 changed files with 474 additions and 98 deletions
--- a/config.py
+++ b/config.py
@ -47,6 +47,7 @@ AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-p
 #   "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125"
 #   "claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
 #   "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
 #   "yi-34b-chat-0205", "yi-34b-chat-200k"
 # ]
 # --- --- --- ---
 # 此外，为了更灵活地接入one-api多模型管理界面，您还可以在接入one-api时，
@ -212,6 +213,10 @@ ANTHROPIC_API_KEY = ""
 MOONSHOT_API_KEY = ""
 # 零一万物(Yi Model) API KEY
 YIMODEL_API_KEY = ""
 # Mathpix 拥有执行PDF的OCR功能，但是需要注册账号
 MATHPIX_APPID = ""
 MATHPIX_APPKEY = ""
@ -313,6 +318,9 @@ NUM_CUSTOM_BASIC_BTN = 4
 ├── "glm-4", "glm-3-turbo", "zhipuai" 智谱AI大模型
 │   └── ZHIPUAI_API_KEY
 │
 ├── "yi-34b-chat-0205", "yi-34b-chat-200k" 等零一万物(Yi Model)大模型
 │   └── YIMODEL_API_KEY
 │
 ├── "qwen-turbo" 等通义千问大模型
 │   └──  DASHSCOPE_API_KEY
 │
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@ -62,7 +62,8 @@ openai_endpoint = "https://api.openai.com/v1/chat/completions"
 api2d_endpoint = "https://openai.api2d.net/v1/chat/completions"
 newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub"
 gemini_endpoint = "https://generativelanguage.googleapis.com/v1beta/models"
-claude_endpoint = "https://api.anthropic.com"
+claude_endpoint = "https://api.anthropic.com/v1/messages"
 yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@ -80,6 +81,7 @@ if api2d_endpoint in API_URL_REDIRECT: api2d_endpoint = API_URL_REDIRECT[api2d_e
 if newbing_endpoint in API_URL_REDIRECT: newbing_endpoint = API_URL_REDIRECT[newbing_endpoint]
 if gemini_endpoint in API_URL_REDIRECT: gemini_endpoint = API_URL_REDIRECT[gemini_endpoint]
 if claude_endpoint in API_URL_REDIRECT: claude_endpoint = API_URL_REDIRECT[claude_endpoint]
 if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
 # 获取tokenizer
 tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@ -316,6 +318,7 @@ model_info.update({
    "moonshot-v1-8k": {
        "fn_with_ui": moonshot_ui,
        "fn_without_ui": moonshot_no_ui,
        "can_multi_thread": True,
        "endpoint": None,
        "max_token": 1024 * 8,
        "tokenizer": tokenizer_gpt35,
@ -324,6 +327,7 @@ model_info.update({
    "moonshot-v1-32k": {
        "fn_with_ui": moonshot_ui,
        "fn_without_ui": moonshot_no_ui,
        "can_multi_thread": True,
        "endpoint": None,
        "max_token": 1024 * 32,
        "tokenizer": tokenizer_gpt35,
@ -332,6 +336,7 @@ model_info.update({
    "moonshot-v1-128k": {
        "fn_with_ui": moonshot_ui,
        "fn_without_ui": moonshot_no_ui,
        "can_multi_thread": True,
        "endpoint": None,
        "max_token": 1024 * 128,
        "tokenizer": tokenizer_gpt35,
@ -473,22 +478,6 @@ if "stack-claude" in AVAIL_LLM_MODELS:
            "token_cnt": get_token_num_gpt35,
        }
    })
 if "newbing-free" in AVAIL_LLM_MODELS:
    try:
        from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
        from .bridge_newbingfree import predict as newbingfree_ui
        model_info.update({
            "newbing-free": {
                "fn_with_ui": newbingfree_ui,
                "fn_without_ui": newbingfree_noui,
                "endpoint": newbing_endpoint,
                "max_token": 4096,
                "tokenizer": tokenizer_gpt35,
                "token_cnt": get_token_num_gpt35,
            }
        })
    except:
        print(trimmed_format_exc())
 if "newbing" in AVAIL_LLM_MODELS:   # same with newbing-free
    try:
        from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
@ -521,6 +510,7 @@ if "chatglmft" in AVAIL_LLM_MODELS:   # same with newbing-free
        })
    except:
        print(trimmed_format_exc())
 # -=-=-=-=-=-=- 上海AI-LAB书生大模型 -=-=-=-=-=-=-
 if "internlm" in AVAIL_LLM_MODELS:
    try:
        from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
@ -553,6 +543,7 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS:
        })
    except:
        print(trimmed_format_exc())
 # -=-=-=-=-=-=- 通义-本地模型 -=-=-=-=-=-=-
 if "qwen-local" in AVAIL_LLM_MODELS:
    try:
        from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
@ -570,6 +561,7 @@ if "qwen-local" in AVAIL_LLM_MODELS:
        })
    except:
        print(trimmed_format_exc())
 # -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
 if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
    try:
        from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
@ -605,7 +597,35 @@ if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-
        })
    except:
        print(trimmed_format_exc())
-if "spark" in AVAIL_LLM_MODELS:   # 讯飞星火认知大模型
+# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
 if "yi-34b-chat-0205" in AVAIL_LLM_MODELS or "yi-34b-chat-200k" in AVAIL_LLM_MODELS:   # zhipuai
    try:
        from .bridge_yimodel import predict_no_ui_long_connection as yimodel_noui
        from .bridge_yimodel import predict as yimodel_ui
        model_info.update({
            "yi-34b-chat-0205": {
                "fn_with_ui": yimodel_ui,
                "fn_without_ui": yimodel_noui,
                "can_multi_thread": False,  # 目前来说，默认情况下并发量极低，因此禁用
                "endpoint": yimodel_endpoint,
                "max_token": 4000,
                "tokenizer": tokenizer_gpt35,
                "token_cnt": get_token_num_gpt35,
            },
            "yi-34b-chat-200k": {
                "fn_with_ui": yimodel_ui,
                "fn_without_ui": yimodel_noui,
                "can_multi_thread": False,  # 目前来说，默认情况下并发量极低，因此禁用
                "endpoint": yimodel_endpoint,
                "max_token": 200000,
                "tokenizer": tokenizer_gpt35,
                "token_cnt": get_token_num_gpt35,
            },
        })
    except:
        print(trimmed_format_exc())
 # -=-=-=-=-=-=- 讯飞星火认知大模型 -=-=-=-=-=-=-
 if "spark" in AVAIL_LLM_MODELS:
    try:
        from .bridge_spark import predict_no_ui_long_connection as spark_noui
        from .bridge_spark import predict as spark_ui
@ -681,6 +701,7 @@ if "llama2" in AVAIL_LLM_MODELS:   # llama2
        })
    except:
        print(trimmed_format_exc())
 # -=-=-=-=-=-=- 智谱 -=-=-=-=-=-=-
 if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai 是glm-4的别名，向后兼容配置
    try:
        model_info.update({
@ -695,6 +716,7 @@ if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai 是glm-4的别名，向后兼容
        })
    except:
        print(trimmed_format_exc())
 # -=-=-=-=-=-=- 幻方-深度求索大模型 -=-=-=-=-=-=-
 if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
    try:
        from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
@ -711,6 +733,8 @@ if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
        })
    except:
        print(trimmed_format_exc())
 # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
    # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
@ -735,8 +759,8 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
    })
-# <-- 用于定义和切换多个azure模型 -->
+# -=-=-=-=-=-=- azure模型对齐支持 -=-=-=-=-=-=-
-AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
+AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY") # <-- 用于定义和切换多个azure模型 -->
 if len(AZURE_CFG_ARRAY) > 0:
    for azure_model_name, azure_cfg_dict in AZURE_CFG_ARRAY.items():
        # 可能会覆盖之前的配置，但这是意料之中的
--- a/request_llms/bridge_claude.py
+++ b/request_llms/bridge_claude.py
@ -9,12 +9,13 @@
    具备多线程调用能力的函数
    2. predict_no_ui_long_connection：支持多线程
 """
-
+import logging
 import os
 import time
 import traceback
 from toolbox import get_conf, update_ui, trimmed_format_exc, encode_image, every_image_file_in_path
-
+import json
 import requests
 picture_system_prompt = "\n当回复图像时,必须说明正在回复哪张图像。所有图像仅在最后一个问题中提供,即使它们在历史记录中被提及。请使用'这是第X张图像:'的格式来指明您正在描述的是哪张图像。"
 Claude_3_Models = ["claude-3-sonnet-20240229", "claude-3-opus-20240229"]
@ -38,6 +39,34 @@ def get_full_error(chunk, stream_response):
            break
    return chunk
 def decode_chunk(chunk):
    # 提前读取一些信息（用于判断异常）
    chunk_decoded = chunk.decode()
    chunkjson = None
    is_last_chunk = False
    need_to_pass = False
    if chunk_decoded.startswith('data:'):
        try:
            chunkjson = json.loads(chunk_decoded[6:])
        except:
            need_to_pass = True
            pass
    elif chunk_decoded.startswith('event:'):
        try:
            event_type = chunk_decoded.split(':')[1].strip()
            if event_type == 'content_block_stop' or event_type == 'message_stop':
                is_last_chunk = True
            elif event_type == 'content_block_start' or event_type == 'message_start':
                need_to_pass = True
                pass
        except:
            need_to_pass = True
            pass
    else:
        need_to_pass = True
        pass
    return need_to_pass, chunkjson, is_last_chunk
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
    """
@ -53,53 +82,60 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
    observe_window = None：
        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
    """
    from anthropic import Anthropic
    watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
    if inputs == "":     inputs = "空空如也的输入栏"
    message = generate_payload(inputs, llm_kwargs, history, stream=True, image_paths=None)
    retry = 0
    if len(ANTHROPIC_API_KEY) == 0:
        raise RuntimeError("没有设置ANTHROPIC_API_KEY选项")
    if inputs == "":     inputs = "空空如也的输入栏"
    headers, message = generate_payload(inputs, llm_kwargs, history, sys_prompt, image_paths=None)
    retry = 0
    while True:
        try:
            # make a POST request to the API endpoint, stream=False
            from .bridge_all import model_info
-            anthropic = Anthropic(api_key=ANTHROPIC_API_KEY, base_url=model_info[llm_kwargs['llm_model']]['endpoint'])
+            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
-            # endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
+            response = requests.post(endpoint, headers=headers, json=message, 
-            # with ProxyNetworkActivate()
+                                     proxies=proxies, stream=True, timeout=TIMEOUT_SECONDS);break
-            stream = anthropic.messages.create(
+        except requests.exceptions.ReadTimeout as e:
                messages=message,
                max_tokens=4096,       # The maximum number of tokens to generate before stopping.
                model=llm_kwargs['llm_model'],
                stream=True,
                temperature = llm_kwargs['temperature'],
                system=sys_prompt
            )
            break
        except Exception as e:
            retry += 1
            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
    stream_response = response.iter_lines()
    result = ''
-    try:
+    while True:
-        for completion in stream:
+        try: chunk = next(stream_response)
-            if completion.type == "message_start" or completion.type == "content_block_start":
+        except StopIteration:
-                continue
+            break
-            elif completion.type == "message_stop" or completion.type == "content_block_stop" or completion.type == "message_delta":
+        except requests.exceptions.ConnectionError:
-                break
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
-            result += completion.delta.text
+        need_to_pass, chunkjson, is_last_chunk = decode_chunk(chunk)
-            if not console_slience: print(completion.delta.text, end='')
+        if chunk:
-            if observe_window is not None:
+            try:
-                # 观测窗，把已经获取的数据显示出去
+                if need_to_pass:
-                if len(observe_window) >= 1: observe_window[0] += completion.delta.text
+                    pass
-                # 看门狗，如果超过期限没有喂狗，则终止
+                elif is_last_chunk:
-                if len(observe_window) >= 2:
+                    logging.info(f'[response] {result}')
-                    if (time.time()-observe_window[1]) > watch_dog_patience:
+                    break
-                        raise RuntimeError("用户取消了程序。")
+                else:
-    except Exception as e:
+                    if chunkjson and chunkjson['type'] == 'content_block_delta':
-        traceback.print_exc()
+                        result += chunkjson['delta']['text']
                        print(chunkjson['delta']['text'], end='')
                        if observe_window is not None:
                            # 观测窗，把已经获取的数据显示出去
                            if len(observe_window) >= 1:
                                observe_window[0] += chunkjson['delta']['text']
                            # 看门狗，如果超过期限没有喂狗，则终止
                            if len(observe_window) >= 2:
                                if (time.time()-observe_window[1]) > watch_dog_patience:
                                    raise RuntimeError("用户取消了程序。")
            except Exception as e:
                chunk = get_full_error(chunk, stream_response)
                chunk_decoded = chunk.decode()
                error_msg = chunk_decoded
                print(error_msg)
                raise RuntimeError("Json解析不合常规")
    return result
@ -119,7 +155,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
    additional_fn代表点击的哪个按钮，按钮见functional.py
    """
    if inputs == "":     inputs = "空空如也的输入栏"
    from anthropic import Anthropic
    if len(ANTHROPIC_API_KEY) == 0:
        chatbot.append((inputs, "没有设置ANTHROPIC_API_KEY"))
        yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
@ -145,7 +180,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
    try:
-        message = generate_payload(inputs, llm_kwargs, history, stream, image_paths)
+        headers, message = generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths)
    except RuntimeError as e:
        chatbot[-1] = (inputs, f"您提供的api-key不满足要求，不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
        yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
@ -158,46 +193,61 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        try:
            # make a POST request to the API endpoint, stream=True
            from .bridge_all import model_info
-            anthropic = Anthropic(api_key=ANTHROPIC_API_KEY, base_url=model_info[llm_kwargs['llm_model']]['endpoint'])
+            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
-            # endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
+            response = requests.post(endpoint, headers=headers, json=message, 
-            # with ProxyNetworkActivate()
+                                     proxies=proxies, stream=True, timeout=TIMEOUT_SECONDS);break
-            stream = anthropic.messages.create(
+        except requests.exceptions.ReadTimeout as e:
                messages=message,
                max_tokens=4096,       # The maximum number of tokens to generate before stopping.
                model=llm_kwargs['llm_model'],
                stream=True,
                temperature = llm_kwargs['temperature'],
                system=system_prompt
            )
            break
        except:
            retry += 1
-            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
+            traceback.print_exc()
            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
            if retry > MAX_RETRY: raise TimeoutError
-
+            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
    stream_response = response.iter_lines()
    gpt_replying_buffer = ""
-    for completion in stream:
+    while True:
-        if completion.type == "message_start" or completion.type == "content_block_start":
+        try: chunk = next(stream_response)
-            continue
+        except StopIteration:
        elif completion.type == "message_stop" or completion.type == "content_block_stop" or completion.type == "message_delta":
            break
-        try:
+        except requests.exceptions.ConnectionError:
-            gpt_replying_buffer = gpt_replying_buffer + completion.delta.text
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
-            history[-1] = gpt_replying_buffer
+        need_to_pass, chunkjson, is_last_chunk = decode_chunk(chunk)
-            chatbot[-1] = (history[-2], history[-1])
+        if chunk:
-            yield from update_ui(chatbot=chatbot, history=history, msg='正常') # 刷新界面
+            try:
                if need_to_pass:
                    pass
                elif is_last_chunk:
                    logging.info(f'[response] {gpt_replying_buffer}')
                    break
                else:
                    if chunkjson and chunkjson['type'] == 'content_block_delta':
                        gpt_replying_buffer += chunkjson['delta']['text']
                        history[-1] = gpt_replying_buffer
                        chatbot[-1] = (history[-2], history[-1])
                        yield from update_ui(chatbot=chatbot, history=history, msg='正常') # 刷新界面
-        except Exception as e:
+            except Exception as e:
-            from toolbox import regular_txt_to_markdown
+                chunk = get_full_error(chunk, stream_response)
-            tb_str = '```\n' + trimmed_format_exc() + '```'
+                chunk_decoded = chunk.decode()
-            chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str}")
+                error_msg = chunk_decoded
-            yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + tb_str) # 刷新界面
+                print(error_msg)
-            return
+                raise RuntimeError("Json解析不合常规")
-def generate_payload(inputs, llm_kwargs, history, stream, image_paths):
+def multiple_picture_types(image_paths):
    """
    根据图片类型返回image/jpeg, image/png, image/gif, image/webp，无法判断则返回image/jpeg
    """
    for image_path in image_paths:
        if image_path.endswith('.jpeg') or image_path.endswith('.jpg'):
            return 'image/jpeg'
        elif image_path.endswith('.png'):
            return 'image/png'
        elif image_path.endswith('.gif'):
            return 'image/gif'
        elif image_path.endswith('.webp'):
            return 'image/webp'
    return 'image/jpeg'
 def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
    """
    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
    """
@ -223,19 +273,16 @@ def generate_payload(inputs, llm_kwargs, history, stream, image_paths):
                messages[-1]['content'][0]['text'] = what_gpt_answer['content'][0]['text']
    if any([llm_kwargs['llm_model'] == model for model in Claude_3_Models]) and image_paths:
        base64_images = []
        for image_path in image_paths:
            base64_images.append(encode_image(image_path))
        what_i_ask_now = {}
        what_i_ask_now["role"] = "user"
        what_i_ask_now["content"] = []
-        for base64_image in base64_images:
+        for image_path in image_paths:
            what_i_ask_now["content"].append({
                "type": "image",
                "source": {
                    "type": "base64",
-                    "media_type": "image/jpeg",
+                    "media_type": multiple_picture_types(image_paths),
-                    "data": base64_image,
+                    "data": encode_image(image_path),
                }
            })
        what_i_ask_now["content"].append({"type": "text", "text": inputs})
@ -244,4 +291,18 @@ def generate_payload(inputs, llm_kwargs, history, stream, image_paths):
        what_i_ask_now["role"] = "user"
        what_i_ask_now["content"] = [{"type": "text", "text": inputs}]
    messages.append(what_i_ask_now)
-    return messages
+    # 开始整理headers与message
    headers = {
        'x-api-key': ANTHROPIC_API_KEY,
        'anthropic-version': '2023-06-01',
        'content-type': 'application/json'
    }
    payload = {
        'model': llm_kwargs['llm_model'],
        'max_tokens': 4096,
        'messages': messages,
        'temperature': llm_kwargs['temperature'],
        'stream': True,
        'system': system_prompt
    }
    return headers, payload
--- a/request_llms/bridge_yimodel.py
+++ b/request_llms/bridge_yimodel.py
@ -0,0 +1,283 @@
 # 借鉴自同目录下的bridge_chatgpt.py
 """
    该文件中主要包含三个函数
    不具备多线程能力的函数：
    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
    具备多线程调用能力的函数
    2. predict_no_ui_long_connection：支持多线程
 """
 import json
 import time
 import gradio as gr
 import logging
 import traceback
 import requests
 import importlib
 import random
 # config_private.py放自己的秘密如API和代理网址
 # 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
 from toolbox import get_conf, update_ui, trimmed_format_exc, is_the_upload_folder, read_one_api_model_name
 proxies, TIMEOUT_SECONDS, MAX_RETRY, YIMODEL_API_KEY = \
    get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'YIMODEL_API_KEY')
 timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'
 def get_full_error(chunk, stream_response):
    """
        获取完整的从Openai返回的报错
    """
    while True:
        try:
            chunk += next(stream_response)
        except:
            break
    return chunk
 def decode_chunk(chunk):
    # 提前读取一些信息（用于判断异常）
    chunk_decoded = chunk.decode()
    chunkjson = None
    is_last_chunk = False
    try:
        chunkjson = json.loads(chunk_decoded[6:])
        is_last_chunk = chunkjson.get("lastOne", False)
    except:
        pass
    return chunk_decoded, chunkjson, is_last_chunk
 def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
    """
    发送至chatGPT，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
    inputs：
        是本次问询的输入
    sys_prompt:
        系统静默prompt
    llm_kwargs：
        chatGPT的内部调优参数
    history：
        是之前的对话列表
    observe_window = None：
        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
    """
    watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
    if inputs == "":     inputs = "空空如也的输入栏"
    headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
    retry = 0
    while True:
        try:
            # make a POST request to the API endpoint, stream=False
            from .bridge_all import model_info
            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
            response = requests.post(endpoint, headers=headers, proxies=proxies,
                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
        except requests.exceptions.ReadTimeout as e:
            retry += 1
            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
    stream_response = response.iter_lines()
    result = ''
    is_head_of_the_stream = True
    while True:
        try: chunk = next(stream_response)
        except StopIteration:
            break
        except requests.exceptions.ConnectionError:
            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
        chunk_decoded, chunkjson, is_last_chunk = decode_chunk(chunk)
        if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r'"role":"assistant"' in chunk_decoded):
            # 数据流的第一帧不携带content
            is_head_of_the_stream = False; continue
        if chunk:
            try:
                if is_last_chunk:
                    # 判定为数据流的结束，gpt_replying_buffer也写完了
                    logging.info(f'[response] {result}')
                    break
                result += chunkjson['choices'][0]["delta"]["content"]
                if not console_slience: print(chunkjson['choices'][0]["delta"]["content"], end='')
                if observe_window is not None:
                    # 观测窗，把已经获取的数据显示出去
                    if len(observe_window) >= 1:
                        observe_window[0] += chunkjson['choices'][0]["delta"]["content"]
                    # 看门狗，如果超过期限没有喂狗，则终止
                    if len(observe_window) >= 2:
                        if (time.time()-observe_window[1]) > watch_dog_patience:
                            raise RuntimeError("用户取消了程序。")
            except Exception as e:
                chunk = get_full_error(chunk, stream_response)
                chunk_decoded = chunk.decode()
                error_msg = chunk_decoded
                print(error_msg)
                raise RuntimeError("Json解析不合常规")
    return result
 def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
    """
    发送至chatGPT，流式获取输出。
    用于基础的对话功能。
    inputs 是本次问询的输入
    top_p, temperature是chatGPT的内部调优参数
    history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
    additional_fn代表点击的哪个按钮，按钮见functional.py
    """
    if len(YIMODEL_API_KEY) == 0:
        raise RuntimeError("没有设置YIMODEL_API_KEY选项")
    if inputs == "":     inputs = "空空如也的输入栏"
    user_input = inputs
    if additional_fn is not None:
        from core_functional import handle_core_functionality
        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
    raw_input = inputs
    logging.info(f'[raw_input] {raw_input}')
    chatbot.append((inputs, ""))
    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
    # check mis-behavior
    if is_the_upload_folder(user_input):
        chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误！当您上传文档之后，需点击“**函数插件区**”按钮进行处理，请勿点击“提交”按钮或者“基础功能区”按钮。")
        yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
        time.sleep(2)
    headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
    from .bridge_all import model_info
    endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
    history.append(inputs); history.append("")
    retry = 0
    while True:
        try:
            # make a POST request to the API endpoint, stream=True
            response = requests.post(endpoint, headers=headers, proxies=proxies,
                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
        except:
            retry += 1
            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
            if retry > MAX_RETRY: raise TimeoutError
    gpt_replying_buffer = ""
    is_head_of_the_stream = True
    if stream:
        stream_response =  response.iter_lines()
        while True:
            try:
                chunk = next(stream_response)
            except StopIteration:
                break
            except requests.exceptions.ConnectionError:
                chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
            # 提前读取一些信息 （用于判断异常）
            chunk_decoded, chunkjson, is_last_chunk = decode_chunk(chunk)
            if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r'"role":"assistant"' in chunk_decoded):
                # 数据流的第一帧不携带content
                is_head_of_the_stream = False; continue
            if chunk:
                try:
                    if is_last_chunk:
                        # 判定为数据流的结束，gpt_replying_buffer也写完了
                        logging.info(f'[response] {gpt_replying_buffer}')
                        break
                    # 处理数据流的主体
                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
                    gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
                    # 如果这里抛出异常，一般是文本过长，详情见get_full_error的输出
                    history[-1] = gpt_replying_buffer
                    chatbot[-1] = (history[-2], history[-1])
                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
                except Exception as e:
                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
                    chunk = get_full_error(chunk, stream_response)
                    chunk_decoded = chunk.decode()
                    error_msg = chunk_decoded
                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
                    print(error_msg)
                    return
 def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
    from .bridge_all import model_info
    if "bad_request" in error_msg:
        chatbot[-1] = (chatbot[-1][0], "[Local Message] 已经超过了模型的最大上下文或是模型格式错误,请尝试削减单次输入的文本量。")
    elif "authentication_error" in error_msg:
        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. 请确保API key有效。")
    elif "not_found" in error_msg:
        chatbot[-1] = (chatbot[-1][0], f"[Local Message] {llm_kwargs['llm_model']} 无效，请确保使用小写的模型名称。")
    elif "rate_limit" in error_msg:
        chatbot[-1] = (chatbot[-1][0], "[Local Message] 遇到了控制请求速率限制，请一分钟后重试。")
    elif "system_busy" in error_msg:
        chatbot[-1] = (chatbot[-1][0], "[Local Message] 系统繁忙，请一分钟后重试。")
    else:
        from toolbox import regular_txt_to_markdown
        tb_str = '```\n' + trimmed_format_exc() + '```'
        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
    return chatbot, history
 def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
    """
    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
    """
    api_key = f"Bearer {YIMODEL_API_KEY}"
    headers = {
        "Content-Type": "application/json",
        "Authorization": api_key
    }
    conversation_cnt = len(history) // 2
    messages = [{"role": "system", "content": system_prompt}]
    if conversation_cnt:
        for index in range(0, 2*conversation_cnt, 2):
            what_i_have_asked = {}
            what_i_have_asked["role"] = "user"
            what_i_have_asked["content"] = history[index]
            what_gpt_answer = {}
            what_gpt_answer["role"] = "assistant"
            what_gpt_answer["content"] = history[index+1]
            if what_i_have_asked["content"] != "":
                if what_gpt_answer["content"] == "": continue
                if what_gpt_answer["content"] == timeout_bot_msg: continue
                messages.append(what_i_have_asked)
                messages.append(what_gpt_answer)
            else:
                messages[-1]['content'] = what_gpt_answer['content']
    what_i_ask_now = {}
    what_i_ask_now["role"] = "user"
    what_i_ask_now["content"] = inputs
    messages.append(what_i_ask_now)
    model = llm_kwargs['llm_model']
    if llm_kwargs['llm_model'].startswith('one-api-'):
        model = llm_kwargs['llm_model'][len('one-api-'):]
        model, _ = read_one_api_model_name(model)
    tokens = 600 if llm_kwargs['llm_model'] == 'yi-34b-chat-0205' else 4096    #yi-34b-chat-0205只有4k上下文...
    payload = {
        "model": model,
        "messages": messages,
        "temperature": llm_kwargs['temperature'],  # 1.0,
        "stream": stream,
        "max_tokens": tokens
    }
    try:
        print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
    except:
        print('输入中可能存在乱码。')
    return headers,payload