diff --git a/crazy_functions/CodeInterpreter.py b/crazy_functions/CodeInterpreter.py
deleted file mode 100644
index f806f5a..0000000
--- a/crazy_functions/CodeInterpreter.py
+++ /dev/null
@@ -1,232 +0,0 @@
-from collections.abc import Callable, Iterable, Mapping
-from typing import Any
-from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc
-from toolbox import promote_file_to_downloadzone, get_log_folder
-from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
-from .crazy_utils import input_clipping, try_install_deps
-from multiprocessing import Process, Pipe
-import os
-import time
-
-templete = """
-```python
-import ... # Put dependencies here, e.g. import numpy as np
-
-class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
-
- def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
- # rewrite the function you have just written here
- ...
- return generated_file_path
-```
-"""
-
-def inspect_dependency(chatbot, history):
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
- return True
-
-def get_code_block(reply):
- import re
- pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
- matches = re.findall(pattern, reply) # find all code blocks in text
- if len(matches) == 1:
- return matches[0].strip('python') # code block
- for match in matches:
- if 'class TerminalFunction' in match:
- return match.strip('python') # code block
- raise RuntimeError("GPT is not generating proper code.")
-
-def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
- # 输入
- prompt_compose = [
- f'Your job:\n'
- f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
- f"2. You should write this function to perform following task: " + txt + "\n",
- f"3. Wrap the output python function with markdown codeblock."
- ]
- i_say = "".join(prompt_compose)
- demo = []
-
- # 第一步
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
- inputs=i_say, inputs_show_user=i_say,
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
- sys_prompt= r"You are a programmer."
- )
- history.extend([i_say, gpt_say])
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
-
- # 第二步
- prompt_compose = [
- "If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
- templete
- ]
- i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
- inputs=i_say, inputs_show_user=inputs_show_user,
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
- sys_prompt= r"You are a programmer."
- )
- code_to_return = gpt_say
- history.extend([i_say, gpt_say])
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
-
- # # 第三步
- # i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
- # i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
- # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
- # inputs=i_say, inputs_show_user=inputs_show_user,
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
- # sys_prompt= r"You are a programmer."
- # )
- # # # 第三步
- # i_say = "Show me how to use `pip` to install packages to run the code above. "
- # i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
- # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
- # inputs=i_say, inputs_show_user=i_say,
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
- # sys_prompt= r"You are a programmer."
- # )
- installation_advance = ""
-
- return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
-
-def make_module(code):
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
- with open(f'{get_log_folder()}/{module_file}.py', 'w', encoding='utf8') as f:
- f.write(code)
-
- def get_class_name(class_string):
- import re
- # Use regex to extract the class name
- class_name = re.search(r'class (\w+)\(', class_string).group(1)
- return class_name
-
- class_name = get_class_name(code)
- return f"{get_log_folder().replace('/', '.')}.{module_file}->{class_name}"
-
-def init_module_instance(module):
- import importlib
- module_, class_ = module.split('->')
- init_f = getattr(importlib.import_module(module_), class_)
- return init_f()
-
-def for_immediate_show_off_when_possible(file_type, fp, chatbot):
- if file_type in ['png', 'jpg']:
- image_path = os.path.abspath(fp)
- chatbot.append(['这是一张图片, 展示如下:',
- f'本地文件地址:
`{image_path}`
'+
- f'本地文件预览:

'
- ])
- return chatbot
-
-def subprocess_worker(instance, file_path, return_dict):
- return_dict['result'] = instance.run(file_path)
-
-def have_any_recent_upload_files(chatbot):
- _5min = 5 * 60
- if not chatbot: return False # chatbot is None
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
- if not most_recent_uploaded: return False # most_recent_uploaded is None
- if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
- else: return False # most_recent_uploaded is too old
-
-def get_recent_file_prompt_support(chatbot):
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
- path = most_recent_uploaded['path']
- return path
-
-@CatchException
-def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
- """
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
- plugin_kwargs 插件模型的参数,暂时没有用武之地
- chatbot 聊天显示框的句柄,用于显示给用户
- history 聊天历史,前情提要
- system_prompt 给gpt的静默提醒
- user_request 当前用户的请求信息(IP地址等)
- """
- raise NotImplementedError
-
- # 清空历史,以免输入溢出
- history = []; clear_file_downloadzone(chatbot)
-
- # 基本信息:功能、贡献者
- chatbot.append([
- "函数插件功能?",
- "CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 插件初始化中 ..."
- ])
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
- if have_any_recent_upload_files(chatbot):
- file_path = get_recent_file_prompt_support(chatbot)
- else:
- chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
- # 读取文件
- if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
- recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
- file_path = recently_uploaded_files[-1]
- file_type = file_path.split('.')[-1]
-
- # 粗心检查
- if is_the_upload_folder(txt):
- chatbot.append([
- "...",
- f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)"
- ])
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
- return
-
- # 开始干正事
- for j in range(5): # 最多重试5次
- try:
- code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
- yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
- code = get_code_block(code)
- res = make_module(code)
- instance = init_module_instance(res)
- break
- except Exception as e:
- chatbot.append([f"第{j}次代码生成尝试,失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
- # 代码生成结束, 开始执行
- try:
- import multiprocessing
- manager = multiprocessing.Manager()
- return_dict = manager.dict()
-
- p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
- # only has 10 seconds to run
- p.start(); p.join(timeout=10)
- if p.is_alive(): p.terminate(); p.join()
- p.close()
- res = return_dict['result']
- # res = instance.run(file_path)
- except Exception as e:
- chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
- # chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
- return
-
- # 顺利完成,收尾
- res = str(res)
- if os.path.exists(res):
- chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
- new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
- chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
- else:
- chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
-
-"""
-测试:
- 裁剪图像,保留下半部分
- 交换图像的蓝色通道和红色通道
- 将图像转为灰度图像
- 将csv文件转excel表格
-"""
\ No newline at end of file
diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py
index 495cbf8..9c8aecc 100644
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -12,7 +12,7 @@ def input_clipping(inputs, history, max_token_limit):
mode = 'input-and-history'
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
input_token_num = get_token_num(inputs)
- if input_token_num < max_token_limit//2:
+ if input_token_num < max_token_limit//2:
mode = 'only-history'
max_token_limit = max_token_limit - input_token_num
@@ -21,7 +21,7 @@ def input_clipping(inputs, history, max_token_limit):
n_token = get_token_num('\n'.join(everything))
everything_token = [get_token_num(e) for e in everything]
delta = max(everything_token) // 16 # 截断时的颗粒度
-
+
while n_token > max_token_limit:
where = np.argmax(everything_token)
encoded = enc.encode(everything[where], disallowed_special=())
@@ -38,9 +38,9 @@ def input_clipping(inputs, history, max_token_limit):
return inputs, history
def request_gpt_model_in_new_thread_with_ui_alive(
- inputs, inputs_show_user, llm_kwargs,
+ inputs, inputs_show_user, llm_kwargs,
chatbot, history, sys_prompt, refresh_interval=0.2,
- handle_token_exceed=True,
+ handle_token_exceed=True,
retry_times_at_unknown_error=2,
):
"""
@@ -77,7 +77,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
exceeded_cnt = 0
while True:
# watchdog error
- if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
+ if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
raise RuntimeError("检测到程序终止。")
try:
# 【第一种情况】:顺利完成
@@ -140,12 +140,12 @@ def can_multi_process(llm):
if llm.startswith('api2d-'): return True
if llm.startswith('azure-'): return True
if llm.startswith('spark'): return True
- if llm.startswith('zhipuai'): return True
+ if llm.startswith('zhipuai') or llm.startswith('glm-'): return True
return False
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
- inputs_array, inputs_show_user_array, llm_kwargs,
- chatbot, history_array, sys_prompt_array,
+ inputs_array, inputs_show_user_array, llm_kwargs,
+ chatbot, history_array, sys_prompt_array,
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
handle_token_exceed=True, show_user_at_complete=False,
retry_times_at_unknown_error=2,
@@ -189,7 +189,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
if not can_multi_process(llm_kwargs['llm_model']):
max_workers = 1
-
+
executor = ThreadPoolExecutor(max_workers=max_workers)
n_frag = len(inputs_array)
# 用户反馈
@@ -214,7 +214,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
try:
# 【第一种情况】:顺利完成
gpt_say = predict_no_ui_long_connection(
- inputs=inputs, llm_kwargs=llm_kwargs, history=history,
+ inputs=inputs, llm_kwargs=llm_kwargs, history=history,
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
)
mutable[index][2] = "已成功"
@@ -246,7 +246,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
print(tb_str)
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
- if retry_op > 0:
+ if retry_op > 0:
retry_op -= 1
wait = random.randint(5, 20)
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
@@ -287,8 +287,8 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
replace('\n', '').replace('`', '.').replace(' ', '.').replace('
', '.....').replace('$', '.')+"`... ]"
observe_win.append(print_something_really_funny)
# 在前端打印些好玩的东西
- stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
- if not done else f'`{mutable[thread_index][2]}`\n\n'
+ stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
+ if not done else f'`{mutable[thread_index][2]}`\n\n'
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
# 在前端打印些好玩的东西
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
@@ -302,7 +302,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
for inputs_show_user, f in zip(inputs_show_user_array, futures):
gpt_res = f.result()
gpt_response_collection.extend([inputs_show_user, gpt_res])
-
+
# 是否在结束时,在界面上显示结果
if show_user_at_complete:
for inputs_show_user, f in zip(inputs_show_user_array, futures):
@@ -352,7 +352,7 @@ def read_and_clean_pdf_text(fp):
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
fsize_statiscs[wtf['size']] += len(wtf['text'])
return max(fsize_statiscs, key=fsize_statiscs.get)
-
+
def ffsize_same(a,b):
"""
提取字体大小是否近似相等
@@ -388,7 +388,7 @@ def read_and_clean_pdf_text(fp):
if index == 0:
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
-
+
############################## <第 2 步,获取正文主字体> ##################################
try:
fsize_statiscs = {}
@@ -404,7 +404,7 @@ def read_and_clean_pdf_text(fp):
mega_sec = []
sec = []
for index, line in enumerate(meta_line):
- if index == 0:
+ if index == 0:
sec.append(line[fc])
continue
if REMOVE_FOOT_NOTE:
@@ -501,12 +501,12 @@ def get_files_from_everything(txt, type): # type='.md'
"""
这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
下面是对每个参数和返回值的说明:
- 参数
- - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
+ 参数
+ - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
- type: 字符串,表示要搜索的文件类型。默认是.md。
- 返回值
- - success: 布尔值,表示函数是否成功执行。
- - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
+ 返回值
+ - success: 布尔值,表示函数是否成功执行。
+ - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
- project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
该函数详细注释已添加,请确认是否满足您的需要。
"""
@@ -570,7 +570,7 @@ class nougat_interface():
def NOUGAT_parse_pdf(self, fp, chatbot, history):
from toolbox import update_ui_lastest_msg
- yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
+ yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
chatbot=chatbot, history=history, delay=0)
self.threadLock.acquire()
import glob, threading, os
@@ -578,7 +578,7 @@ class nougat_interface():
dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
os.makedirs(dst)
- yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
+ yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
chatbot=chatbot, history=history, delay=0)
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
res = glob.glob(os.path.join(dst,'*.mmd'))
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 9b6c491..e20570f 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -560,7 +560,7 @@ if "sparkv2" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
})
except:
print(trimmed_format_exc())
-if "sparkv3" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
+if "sparkv3" in AVAIL_LLM_MODELS or "sparkv3.5" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
try:
from .bridge_spark import predict_no_ui_long_connection as spark_noui
from .bridge_spark import predict as spark_ui
@@ -572,6 +572,14 @@ if "sparkv3" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
+ },
+ "sparkv3.5": {
+ "fn_with_ui": spark_ui,
+ "fn_without_ui": spark_noui,
+ "endpoint": None,
+ "max_token": 4096,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
}
})
except:
diff --git a/request_llms/com_sparkapi.py b/request_llms/com_sparkapi.py
index 3f667c1..359e407 100644
--- a/request_llms/com_sparkapi.py
+++ b/request_llms/com_sparkapi.py
@@ -65,6 +65,7 @@ class SparkRequestInstance():
self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat"
self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat"
self.gpt_url_v3 = "ws://spark-api.xf-yun.com/v3.1/chat"
+ self.gpt_url_v35 = "wss://spark-api.xf-yun.com/v3.5/chat"
self.gpt_url_img = "wss://spark-api.cn-huabei-1.xf-yun.com/v2.1/image"
self.time_to_yield_event = threading.Event()
@@ -91,6 +92,8 @@ class SparkRequestInstance():
gpt_url = self.gpt_url_v2
elif llm_kwargs['llm_model'] == 'sparkv3':
gpt_url = self.gpt_url_v3
+ elif llm_kwargs['llm_model'] == 'sparkv3.5':
+ gpt_url = self.gpt_url_v35
else:
gpt_url = self.gpt_url
file_manifest = []
@@ -190,6 +193,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt, file_manifest)
"spark": "general",
"sparkv2": "generalv2",
"sparkv3": "generalv3",
+ "sparkv3.5": "generalv3.5",
}
domains_select = domains[llm_kwargs['llm_model']]
if file_manifest: domains_select = 'image'