From 1fe66f029188debf4fb977e41d4d6ff831ad4ca3 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 9 Jul 2023 00:20:58 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96azure=E7=9A=84=E4=BD=93?= =?UTF-8?q?=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 7 +- main.py | 8 +- request_llm/bridge_all.py | 12 +- request_llm/bridge_azure_test.py | 237 ------------------------------- request_llm/bridge_chatgpt.py | 7 +- toolbox.py | 34 +++-- 6 files changed, 42 insertions(+), 263 deletions(-) delete mode 100644 request_llm/bridge_azure_test.py diff --git a/config.py b/config.py index e975271..621e575 100644 --- a/config.py +++ b/config.py @@ -8,7 +8,7 @@ """ # [step 1]>> API_KEY = "sk-123456789xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx123456789"。极少数情况下,还需要填写组织(格式如org-123456789abcdefghijklmno的),请向下翻,找 API_ORG 设置项 -API_KEY = "sk-此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2" +API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4" # [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改 @@ -110,9 +110,8 @@ SLACK_CLAUDE_USER_TOKEN = '' # 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md AZURE_ENDPOINT = "https://你亲手写的api名称.openai.azure.com/" -AZURE_API_KEY = "填入azure openai api的密钥" -AZURE_API_VERSION = "2023-05-15" # 一般不修改 -AZURE_ENGINE = "填入你亲手写的部署名" # 读 docs\use_azure.md +AZURE_API_KEY = "填入azure openai api的密钥" # 建议直接在API_KEY处填写,该选项即将被弃用 +AZURE_ENGINE = "填入你亲手写的部署名" # 读 docs\use_azure.md # 使用Newbing diff --git a/main.py b/main.py index 2cbb27f..e599152 100644 --- a/main.py +++ b/main.py @@ -4,10 +4,10 @@ def main(): import gradio as gr if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt" from request_llm.bridge_all import predict - from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith + from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 - proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \ - get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT') + proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \ + get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT') # 如果WEB_PORT是-1, 则随机选取WEB端口 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT @@ -54,7 +54,7 @@ def main(): cancel_handles = [] with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo: gr.HTML(title_html) - cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL}) + cookies = gr.State(load_chat_cookies()) with gr_L1(): with gr_L2(scale=2): chatbot = gr.Chatbot(label=f"当前模型:{LLM_MODEL}") diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index 5f6947e..13f49bd 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -16,9 +16,6 @@ from toolbox import get_conf, trimmed_format_exc from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui from .bridge_chatgpt import predict as chatgpt_ui -from .bridge_azure_test import predict_no_ui_long_connection as azure_noui -from .bridge_azure_test import predict as azure_ui - from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui from .bridge_chatglm import predict as chatglm_ui @@ -48,10 +45,11 @@ class LazyloadTiktoken(object): return encoder.decode(*args, **kwargs) # Endpoint 重定向 -API_URL_REDIRECT, = get_conf("API_URL_REDIRECT") +API_URL_REDIRECT, AZURE_ENDPOINT, AZURE_ENGINE = get_conf("API_URL_REDIRECT", "AZURE_ENDPOINT", "AZURE_ENGINE") openai_endpoint = "https://api.openai.com/v1/chat/completions" api2d_endpoint = "https://openai.api2d.net/v1/chat/completions" newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub" +azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15' # 兼容旧版的配置 try: API_URL, = get_conf("API_URL") @@ -122,9 +120,9 @@ model_info = { # azure openai "azure-gpt-3.5":{ - "fn_with_ui": azure_ui, - "fn_without_ui": azure_noui, - "endpoint": get_conf("AZURE_ENDPOINT"), + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": azure_endpoint, "max_token": 4096, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, diff --git a/request_llm/bridge_azure_test.py b/request_llm/bridge_azure_test.py deleted file mode 100644 index e9c7cb3..0000000 --- a/request_llm/bridge_azure_test.py +++ /dev/null @@ -1,237 +0,0 @@ -""" - 该文件中主要包含三个函数 - - 不具备多线程能力的函数: - 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 - - 具备多线程调用能力的函数 - 2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑 - 3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 -""" - -import logging -import traceback -import importlib -import openai -import time -import requests -import json - -# 读取config.py文件中关于AZURE OPENAI API的信息 -from toolbox import get_conf, update_ui, clip_history, trimmed_format_exc -TIMEOUT_SECONDS, MAX_RETRY, AZURE_ENGINE, AZURE_ENDPOINT, AZURE_API_VERSION, AZURE_API_KEY = \ - get_conf('TIMEOUT_SECONDS', 'MAX_RETRY',"AZURE_ENGINE","AZURE_ENDPOINT", "AZURE_API_VERSION", "AZURE_API_KEY") - - -def get_full_error(chunk, stream_response): - """ - 获取完整的从Openai返回的报错 - """ - while True: - try: - chunk += next(stream_response) - except: - break - return chunk - -def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): - """ - 发送至azure openai api,流式获取输出。 - 用于基础的对话功能。 - inputs 是本次问询的输入 - top_p, temperature是chatGPT的内部调优参数 - history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) - chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 - additional_fn代表点击的哪个按钮,按钮见functional.py - """ - - if additional_fn is not None: - import core_functional - importlib.reload(core_functional) # 热更新prompt - core_functional = core_functional.get_core_functions() - if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) - inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] - - raw_input = inputs - logging.info(f'[raw_input] {raw_input}') - chatbot.append((inputs, "")) - yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 - - payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream) - - history.append(inputs); history.append("") - - retry = 0 - while True: - try: - openai.api_type = "azure" - openai.api_version = AZURE_API_VERSION - openai.api_base = AZURE_ENDPOINT - openai.api_key = AZURE_API_KEY - response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break - except openai.error.AuthenticationError: - tb_str = '```\n' + trimmed_format_exc() + '```' - chatbot[-1] = [chatbot[-1][0], tb_str] - yield from update_ui(chatbot=chatbot, history=history, msg="openai返回错误") # 刷新界面 - return - except: - retry += 1 - traceback.print_exc() - if retry > MAX_RETRY: raise TimeoutError - if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') - - gpt_replying_buffer = "" - is_head_of_the_stream = True - if stream: - - stream_response = response - - while True: - try: - chunk = next(stream_response) - - except StopIteration: - from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```' - chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk)}") - yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk) # 刷新界面 - return - - if is_head_of_the_stream and (r'"object":"error"' not in chunk): - # 数据流的第一帧不携带content - is_head_of_the_stream = False; continue - - if chunk: - #print(chunk) - try: - if "delta" in chunk["choices"][0]: - if chunk["choices"][0]["finish_reason"] == "stop": - logging.info(f'[response] {gpt_replying_buffer}') - break - status_text = f"finish_reason: {chunk['choices'][0]['finish_reason']}" - gpt_replying_buffer = gpt_replying_buffer + chunk["choices"][0]["delta"]["content"] - - history[-1] = gpt_replying_buffer - chatbot[-1] = (history[-2], history[-1]) - yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面 - - except Exception as e: - traceback.print_exc() - yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面 - chunk = get_full_error(chunk, stream_response) - - error_msg = chunk - yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 - return - - -def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): - """ - 发送至AZURE OPENAI API,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 - inputs: - 是本次问询的输入 - sys_prompt: - 系统静默prompt - llm_kwargs: - chatGPT的内部调优参数 - history: - 是之前的对话列表 - observe_window = None: - 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 - """ - watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 - payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True) - retry = 0 - while True: - try: - openai.api_type = "azure" - openai.api_version = AZURE_API_VERSION - openai.api_base = AZURE_ENDPOINT - openai.api_key = AZURE_API_KEY - response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break - except: - retry += 1 - traceback.print_exc() - if retry > MAX_RETRY: raise TimeoutError - if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') - - stream_response = response - result = '' - while True: - try: chunk = next(stream_response) - except StopIteration: - break - except: - chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 - if len(chunk)==0: continue - - json_data = json.loads(str(chunk))['choices'][0] - delta = json_data["delta"] - if len(delta) == 0: - break - if "role" in delta: - continue - if "content" in delta: - result += delta["content"] - if not console_slience: print(delta["content"], end='') - if observe_window is not None: - # 观测窗,把已经获取的数据显示出去 - if len(observe_window) >= 1: observe_window[0] += delta["content"] - # 看门狗,如果超过期限没有喂狗,则终止 - if len(observe_window) >= 2000: - if (time.time()-observe_window[1]) > watch_dog_patience: - raise RuntimeError("用户取消了程序。") - else: - raise RuntimeError("意外Json结构:"+delta) - if json_data['finish_reason'] == 'content_filter': - raise RuntimeError("由于提问含不合规内容被Azure过滤。") - if json_data['finish_reason'] == 'length': - raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。") - return result - - -def generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream): - """ - 整合所有信息,选择LLM模型,生成 azure openai api请求,为发送请求做准备 - """ - - conversation_cnt = len(history) // 2 - - messages = [{"role": "system", "content": system_prompt}] - if conversation_cnt: - for index in range(0, 2*conversation_cnt, 2): - what_i_have_asked = {} - what_i_have_asked["role"] = "user" - what_i_have_asked["content"] = history[index] - what_gpt_answer = {} - what_gpt_answer["role"] = "assistant" - what_gpt_answer["content"] = history[index+1] - if what_i_have_asked["content"] != "": - if what_gpt_answer["content"] == "": continue - messages.append(what_i_have_asked) - messages.append(what_gpt_answer) - else: - messages[-1]['content'] = what_gpt_answer['content'] - - what_i_ask_now = {} - what_i_ask_now["role"] = "user" - what_i_ask_now["content"] = inputs - messages.append(what_i_ask_now) - - payload = { - "model": llm_kwargs['llm_model'], - "messages": messages, - "temperature": llm_kwargs['temperature'], # 1.0, - "top_p": llm_kwargs['top_p'], # 1.0, - "n": 1, - "stream": stream, - "presence_penalty": 0, - "frequency_penalty": 0, - "engine": AZURE_ENGINE - } - try: - print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") - except: - print('输入中可能存在乱码。') - return payload - - diff --git a/request_llm/bridge_chatgpt.py b/request_llm/bridge_chatgpt.py index 858c395..ca47bd2 100644 --- a/request_llm/bridge_chatgpt.py +++ b/request_llm/bridge_chatgpt.py @@ -22,8 +22,8 @@ import importlib # config_private.py放自己的秘密如API和代理网址 # 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件 from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc -proxies, API_KEY, TIMEOUT_SECONDS, MAX_RETRY, API_ORG = \ - get_conf('proxies', 'API_KEY', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG') +proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG = \ + get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG') timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \ '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。' @@ -101,6 +101,8 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("用户取消了程序。") else: raise RuntimeError("意外Json结构:"+delta) + if json_data['finish_reason'] == 'content_filter': + raise RuntimeError("由于提问含不合规内容被Azure过滤。") if json_data['finish_reason'] == 'length': raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。") return result @@ -247,6 +249,7 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream): "Authorization": f"Bearer {api_key}" } if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG}) + if llm_kwargs['llm_model'].startswith('azure-'): headers.update({"api-key": api_key}) conversation_cnt = len(history) // 2 diff --git a/toolbox.py b/toolbox.py index 256d99c..8a48581 100644 --- a/toolbox.py +++ b/toolbox.py @@ -505,16 +505,24 @@ def on_report_generated(cookies, files, chatbot): chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) return cookies, report_files, chatbot +def load_chat_cookies(): + API_KEY, LLM_MODEL, AZURE_API_KEY = get_conf('API_KEY', 'LLM_MODEL', 'AZURE_API_KEY') + if is_any_api_key(AZURE_API_KEY): + if is_any_api_key(API_KEY): API_KEY = API_KEY + ',' + AZURE_API_KEY + else: API_KEY = AZURE_API_KEY + return {'api_key': API_KEY, 'llm_model': LLM_MODEL} + def is_openai_api_key(key): API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key) + return bool(API_MATCH_ORIGINAL) + +def is_azure_api_key(key): API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key) - return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE) + return bool(API_MATCH_AZURE) def is_api2d_key(key): - if key.startswith('fk') and len(key) == 41: - return True - else: - return False + API_MATCH_API2D = re.match(r"fk[a-zA-Z0-9]{6}-[a-zA-Z0-9]{32}$", key) + return bool(API_MATCH_API2D) def is_any_api_key(key): if ',' in key: @@ -523,10 +531,10 @@ def is_any_api_key(key): if is_any_api_key(k): return True return False else: - return is_openai_api_key(key) or is_api2d_key(key) + return is_openai_api_key(key) or is_api2d_key(key) or is_azure_api_key(key) def what_keys(keys): - avail_key_list = {'OpenAI Key':0, "API2D Key":0} + avail_key_list = {'OpenAI Key':0, "Azure Key":0, "API2D Key":0} key_list = keys.split(',') for k in key_list: @@ -537,7 +545,11 @@ def what_keys(keys): if is_api2d_key(k): avail_key_list['API2D Key'] += 1 - return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']} 个,API2D Key {avail_key_list['API2D Key']} 个" + for k in key_list: + if is_azure_api_key(k): + avail_key_list['Azure Key'] += 1 + + return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']} 个, Azure Key {avail_key_list['Azure Key']} 个, API2D Key {avail_key_list['API2D Key']} 个" def select_api_key(keys, llm_model): import random @@ -552,8 +564,12 @@ def select_api_key(keys, llm_model): for k in key_list: if is_api2d_key(k): avail_key_list.append(k) + if llm_model.startswith('azure-'): + for k in key_list: + if is_azure_api_key(k): avail_key_list.append(k) + if len(avail_key_list) == 0: - raise RuntimeError(f"您提供的api-key不满足要求,不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源。") + raise RuntimeError(f"您提供的api-key不满足要求,不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源(右下角更换模型菜单中可切换openai,azure和api2d请求源)") api_key = random.choice(avail_key_list) # 随机负载均衡 return api_key