diff --git a/crazy_functions/CodeInterpreter.py b/crazy_functions/CodeInterpreter.py deleted file mode 100644 index f806f5a..0000000 --- a/crazy_functions/CodeInterpreter.py +++ /dev/null @@ -1,232 +0,0 @@ -from collections.abc import Callable, Iterable, Mapping -from typing import Any -from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc -from toolbox import promote_file_to_downloadzone, get_log_folder -from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive -from .crazy_utils import input_clipping, try_install_deps -from multiprocessing import Process, Pipe -import os -import time - -templete = """ -```python -import ... # Put dependencies here, e.g. import numpy as np - -class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction` - - def run(self, path): # The name of the function must be `run`, it takes only a positional argument. - # rewrite the function you have just written here - ... - return generated_file_path -``` -""" - -def inspect_dependency(chatbot, history): - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - return True - -def get_code_block(reply): - import re - pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks - matches = re.findall(pattern, reply) # find all code blocks in text - if len(matches) == 1: - return matches[0].strip('python') # code block - for match in matches: - if 'class TerminalFunction' in match: - return match.strip('python') # code block - raise RuntimeError("GPT is not generating proper code.") - -def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history): - # 输入 - prompt_compose = [ - f'Your job:\n' - f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n', - f"2. You should write this function to perform following task: " + txt + "\n", - f"3. Wrap the output python function with markdown codeblock." - ] - i_say = "".join(prompt_compose) - demo = [] - - # 第一步 - gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( - inputs=i_say, inputs_show_user=i_say, - llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo, - sys_prompt= r"You are a programmer." - ) - history.extend([i_say, gpt_say]) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 - - # 第二步 - prompt_compose = [ - "If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n", - templete - ] - i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. " - gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( - inputs=i_say, inputs_show_user=inputs_show_user, - llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, - sys_prompt= r"You are a programmer." - ) - code_to_return = gpt_say - history.extend([i_say, gpt_say]) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 - - # # 第三步 - # i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them." - # i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`' - # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive( - # inputs=i_say, inputs_show_user=inputs_show_user, - # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, - # sys_prompt= r"You are a programmer." - # ) - # # # 第三步 - # i_say = "Show me how to use `pip` to install packages to run the code above. " - # i_say += 'For instance. `pip install -r opencv-python scipy numpy`' - # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive( - # inputs=i_say, inputs_show_user=i_say, - # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, - # sys_prompt= r"You are a programmer." - # ) - installation_advance = "" - - return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history - -def make_module(code): - module_file = 'gpt_fn_' + gen_time_str().replace('-','_') - with open(f'{get_log_folder()}/{module_file}.py', 'w', encoding='utf8') as f: - f.write(code) - - def get_class_name(class_string): - import re - # Use regex to extract the class name - class_name = re.search(r'class (\w+)\(', class_string).group(1) - return class_name - - class_name = get_class_name(code) - return f"{get_log_folder().replace('/', '.')}.{module_file}->{class_name}" - -def init_module_instance(module): - import importlib - module_, class_ = module.split('->') - init_f = getattr(importlib.import_module(module_), class_) - return init_f() - -def for_immediate_show_off_when_possible(file_type, fp, chatbot): - if file_type in ['png', 'jpg']: - image_path = os.path.abspath(fp) - chatbot.append(['这是一张图片, 展示如下:', - f'本地文件地址:
`{image_path}`
'+ - f'本地文件预览:
' - ]) - return chatbot - -def subprocess_worker(instance, file_path, return_dict): - return_dict['result'] = instance.run(file_path) - -def have_any_recent_upload_files(chatbot): - _5min = 5 * 60 - if not chatbot: return False # chatbot is None - most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None) - if not most_recent_uploaded: return False # most_recent_uploaded is None - if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new - else: return False # most_recent_uploaded is too old - -def get_recent_file_prompt_support(chatbot): - most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None) - path = most_recent_uploaded['path'] - return path - -@CatchException -def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request): - """ - txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 - llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 - plugin_kwargs 插件模型的参数,暂时没有用武之地 - chatbot 聊天显示框的句柄,用于显示给用户 - history 聊天历史,前情提要 - system_prompt 给gpt的静默提醒 - user_request 当前用户的请求信息(IP地址等) - """ - raise NotImplementedError - - # 清空历史,以免输入溢出 - history = []; clear_file_downloadzone(chatbot) - - # 基本信息:功能、贡献者 - chatbot.append([ - "函数插件功能?", - "CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 插件初始化中 ..." - ]) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - - if have_any_recent_upload_files(chatbot): - file_path = get_recent_file_prompt_support(chatbot) - else: - chatbot.append(["文件检索", "没有发现任何近期上传的文件。"]) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - - # 读取文件 - if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files") - recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None) - file_path = recently_uploaded_files[-1] - file_type = file_path.split('.')[-1] - - # 粗心检查 - if is_the_upload_folder(txt): - chatbot.append([ - "...", - f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)" - ]) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - return - - # 开始干正事 - for j in range(5): # 最多重试5次 - try: - code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \ - yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history) - code = get_code_block(code) - res = make_module(code) - instance = init_module_instance(res) - break - except Exception as e: - chatbot.append([f"第{j}次代码生成尝试,失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"]) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - - # 代码生成结束, 开始执行 - try: - import multiprocessing - manager = multiprocessing.Manager() - return_dict = manager.dict() - - p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict)) - # only has 10 seconds to run - p.start(); p.join(timeout=10) - if p.is_alive(): p.terminate(); p.join() - p.close() - res = return_dict['result'] - # res = instance.run(file_path) - except Exception as e: - chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"]) - # chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance]) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - return - - # 顺利完成,收尾 - res = str(res) - if os.path.exists(res): - chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res]) - new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot) - chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 - else: - chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res]) - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 - -""" -测试: - 裁剪图像,保留下半部分 - 交换图像的蓝色通道和红色通道 - 将图像转为灰度图像 - 将csv文件转excel表格 -""" \ No newline at end of file diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 495cbf8..9c8aecc 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -12,7 +12,7 @@ def input_clipping(inputs, history, max_token_limit): mode = 'input-and-history' # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史 input_token_num = get_token_num(inputs) - if input_token_num < max_token_limit//2: + if input_token_num < max_token_limit//2: mode = 'only-history' max_token_limit = max_token_limit - input_token_num @@ -21,7 +21,7 @@ def input_clipping(inputs, history, max_token_limit): n_token = get_token_num('\n'.join(everything)) everything_token = [get_token_num(e) for e in everything] delta = max(everything_token) // 16 # 截断时的颗粒度 - + while n_token > max_token_limit: where = np.argmax(everything_token) encoded = enc.encode(everything[where], disallowed_special=()) @@ -38,9 +38,9 @@ def input_clipping(inputs, history, max_token_limit): return inputs, history def request_gpt_model_in_new_thread_with_ui_alive( - inputs, inputs_show_user, llm_kwargs, + inputs, inputs_show_user, llm_kwargs, chatbot, history, sys_prompt, refresh_interval=0.2, - handle_token_exceed=True, + handle_token_exceed=True, retry_times_at_unknown_error=2, ): """ @@ -77,7 +77,7 @@ def request_gpt_model_in_new_thread_with_ui_alive( exceeded_cnt = 0 while True: # watchdog error - if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience: + if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience: raise RuntimeError("检测到程序终止。") try: # 【第一种情况】:顺利完成 @@ -140,12 +140,12 @@ def can_multi_process(llm): if llm.startswith('api2d-'): return True if llm.startswith('azure-'): return True if llm.startswith('spark'): return True - if llm.startswith('zhipuai'): return True + if llm.startswith('zhipuai') or llm.startswith('glm-'): return True return False def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( - inputs_array, inputs_show_user_array, llm_kwargs, - chatbot, history_array, sys_prompt_array, + inputs_array, inputs_show_user_array, llm_kwargs, + chatbot, history_array, sys_prompt_array, refresh_interval=0.2, max_workers=-1, scroller_max_len=30, handle_token_exceed=True, show_user_at_complete=False, retry_times_at_unknown_error=2, @@ -189,7 +189,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿 if not can_multi_process(llm_kwargs['llm_model']): max_workers = 1 - + executor = ThreadPoolExecutor(max_workers=max_workers) n_frag = len(inputs_array) # 用户反馈 @@ -214,7 +214,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( try: # 【第一种情况】:顺利完成 gpt_say = predict_no_ui_long_connection( - inputs=inputs, llm_kwargs=llm_kwargs, history=history, + inputs=inputs, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True ) mutable[index][2] = "已成功" @@ -246,7 +246,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( print(tb_str) gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] - if retry_op > 0: + if retry_op > 0: retry_op -= 1 wait = random.randint(5, 20) if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str): @@ -287,8 +287,8 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( replace('\n', '').replace('`', '.').replace(' ', '.').replace('
', '.....').replace('$', '.')+"`... ]" observe_win.append(print_something_really_funny) # 在前端打印些好玩的东西 - stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n' - if not done else f'`{mutable[thread_index][2]}`\n\n' + stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n' + if not done else f'`{mutable[thread_index][2]}`\n\n' for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)]) # 在前端打印些好玩的东西 chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))] @@ -302,7 +302,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( for inputs_show_user, f in zip(inputs_show_user_array, futures): gpt_res = f.result() gpt_response_collection.extend([inputs_show_user, gpt_res]) - + # 是否在结束时,在界面上显示结果 if show_user_at_complete: for inputs_show_user, f in zip(inputs_show_user_array, futures): @@ -352,7 +352,7 @@ def read_and_clean_pdf_text(fp): if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0 fsize_statiscs[wtf['size']] += len(wtf['text']) return max(fsize_statiscs, key=fsize_statiscs.get) - + def ffsize_same(a,b): """ 提取字体大小是否近似相等 @@ -388,7 +388,7 @@ def read_and_clean_pdf_text(fp): if index == 0: page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace( '- ', '') for t in text_areas['blocks'] if 'lines' in t] - + ############################## <第 2 步,获取正文主字体> ################################## try: fsize_statiscs = {} @@ -404,7 +404,7 @@ def read_and_clean_pdf_text(fp): mega_sec = [] sec = [] for index, line in enumerate(meta_line): - if index == 0: + if index == 0: sec.append(line[fc]) continue if REMOVE_FOOT_NOTE: @@ -501,12 +501,12 @@ def get_files_from_everything(txt, type): # type='.md' """ 这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。 下面是对每个参数和返回值的说明: - 参数 - - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。 + 参数 + - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。 - type: 字符串,表示要搜索的文件类型。默认是.md。 - 返回值 - - success: 布尔值,表示函数是否成功执行。 - - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。 + 返回值 + - success: 布尔值,表示函数是否成功执行。 + - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。 - project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。 该函数详细注释已添加,请确认是否满足您的需要。 """ @@ -570,7 +570,7 @@ class nougat_interface(): def NOUGAT_parse_pdf(self, fp, chatbot, history): from toolbox import update_ui_lastest_msg - yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...", + yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...", chatbot=chatbot, history=history, delay=0) self.threadLock.acquire() import glob, threading, os @@ -578,7 +578,7 @@ class nougat_interface(): dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str()) os.makedirs(dst) - yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)", + yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)", chatbot=chatbot, history=history, delay=0) self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600) res = glob.glob(os.path.join(dst,'*.mmd')) diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 9b6c491..e20570f 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -560,7 +560,7 @@ if "sparkv2" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型 }) except: print(trimmed_format_exc()) -if "sparkv3" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型 +if "sparkv3" in AVAIL_LLM_MODELS or "sparkv3.5" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型 try: from .bridge_spark import predict_no_ui_long_connection as spark_noui from .bridge_spark import predict as spark_ui @@ -572,6 +572,14 @@ if "sparkv3" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型 "max_token": 4096, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, + }, + "sparkv3.5": { + "fn_with_ui": spark_ui, + "fn_without_ui": spark_noui, + "endpoint": None, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, } }) except: diff --git a/request_llms/com_sparkapi.py b/request_llms/com_sparkapi.py index 3f667c1..359e407 100644 --- a/request_llms/com_sparkapi.py +++ b/request_llms/com_sparkapi.py @@ -65,6 +65,7 @@ class SparkRequestInstance(): self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat" self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat" self.gpt_url_v3 = "ws://spark-api.xf-yun.com/v3.1/chat" + self.gpt_url_v35 = "wss://spark-api.xf-yun.com/v3.5/chat" self.gpt_url_img = "wss://spark-api.cn-huabei-1.xf-yun.com/v2.1/image" self.time_to_yield_event = threading.Event() @@ -91,6 +92,8 @@ class SparkRequestInstance(): gpt_url = self.gpt_url_v2 elif llm_kwargs['llm_model'] == 'sparkv3': gpt_url = self.gpt_url_v3 + elif llm_kwargs['llm_model'] == 'sparkv3.5': + gpt_url = self.gpt_url_v35 else: gpt_url = self.gpt_url file_manifest = [] @@ -190,6 +193,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt, file_manifest) "spark": "general", "sparkv2": "generalv2", "sparkv3": "generalv3", + "sparkv3.5": "generalv3.5", } domains_select = domains[llm_kwargs['llm_model']] if file_manifest: domains_select = 'image'