From dcaa7a1808bcc6eeb39c6fbec11ce50844ddb8cd Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 6 Apr 2023 02:02:04 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E5=91=BD=E5=90=8D=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- functional.py => core_functional.py | 2 +- functional_crazy.py => crazy_functional.py | 10 +- crazy_functions/crazy_utils.py | 58 +++++ crazy_functions/代码重写为全英文_多线程.py | 28 +-- crazy_functions/批量翻译PDF文档_多线程.py | 255 +++++++++++++++++++++ crazy_functions/高级功能函数模板.py | 13 +- main.py | 8 +- request_llm/bridge_chatgpt.py | 40 +++- request_llm/bridge_tgui.py | 10 +- requirements.txt | 1 + 10 files changed, 369 insertions(+), 56 deletions(-) rename functional.py => core_functional.py (99%) rename functional_crazy.py => crazy_functional.py (93%) create mode 100644 crazy_functions/crazy_utils.py create mode 100644 crazy_functions/批量翻译PDF文档_多线程.py diff --git a/functional.py b/core_functional.py similarity index 99% rename from functional.py rename to core_functional.py index eccc0ac..22d2c2b 100644 --- a/functional.py +++ b/core_functional.py @@ -4,7 +4,7 @@ # 默认按钮颜色是 secondary from toolbox import clear_line_break -def get_functionals(): +def get_core_functions(): return { "英语学术润色": { # 前言 diff --git a/functional_crazy.py b/crazy_functional.py similarity index 93% rename from functional_crazy.py rename to crazy_functional.py index 3f02400..2876cb8 100644 --- a/functional_crazy.py +++ b/crazy_functional.py @@ -1,6 +1,6 @@ from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效 -def get_crazy_functionals(): +def get_crazy_functions(): ###################### 第一组插件 ########################### # [第一组插件]: 最早期编写的项目插件和一些demo from crazy_functions.读文章写摘要 import 读文章写摘要 @@ -97,6 +97,14 @@ def get_crazy_functionals(): "Function": HotReload(下载arxiv论文并翻译摘要) } }) + from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档 + function_plugins.update({ + "批量翻译PDF文档(多线程)": { + "Color": "stop", + "AsButton": False, # 加入下拉菜单中 + "Function": HotReload(批量翻译PDF文档) + } + }) except Exception as err: print(f'[下载arxiv论文并翻译摘要] 插件导入失败 {str(err)}') diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py new file mode 100644 index 0000000..f1b5e82 --- /dev/null +++ b/crazy_functions/crazy_utils.py @@ -0,0 +1,58 @@ + + + +def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit): + def cut(txt_tocut, must_break_at_empty_line): # 递归 + if get_token_fn(txt_tocut) <= limit: + return [txt_tocut] + else: + lines = txt_tocut.split('\n') + estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) + estimated_line_cut = int(estimated_line_cut) + for cnt in reversed(range(estimated_line_cut)): + if must_break_at_empty_line: + if lines[cnt] != "": continue + print(cnt) + prev = "\n".join(lines[:cnt]) + post = "\n".join(lines[cnt:]) + if get_token_fn(prev) < limit: break + if cnt == 0: + print('what the fuck ?') + raise RuntimeError("存在一行极长的文本!") + # print(len(post)) + # 列表递归接龙 + result = [prev] + result.extend(cut(post, must_break_at_empty_line)) + return result + try: + return cut(txt, must_break_at_empty_line=True) + except RuntimeError: + return cut(txt, must_break_at_empty_line=False) + +def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit): + def cut(txt_tocut, must_break_at_empty_line): # 递归 + if get_token_fn(txt_tocut) <= limit: + return [txt_tocut] + else: + lines = txt_tocut.split('\n') + estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) + estimated_line_cut = int(estimated_line_cut) + for cnt in reversed(range(estimated_line_cut)): + if must_break_at_empty_line: + if lines[cnt] != "": continue + print(cnt) + prev = "\n".join(lines[:cnt]) + post = "\n".join(lines[cnt:]) + if get_token_fn(prev) < limit: break + if cnt == 0: + print('what the fuck ?') + raise RuntimeError("存在一行极长的文本!") + # print(len(post)) + # 列表递归接龙 + result = [prev] + result.extend(cut(post, must_break_at_empty_line)) + return result + try: + return cut(txt, must_break_at_empty_line=True) + except RuntimeError: + return cut(txt, must_break_at_empty_line=False) diff --git a/crazy_functions/代码重写为全英文_多线程.py b/crazy_functions/代码重写为全英文_多线程.py index 206a3d8..ad23fea 100644 --- a/crazy_functions/代码重写为全英文_多线程.py +++ b/crazy_functions/代码重写为全英文_多线程.py @@ -1,6 +1,7 @@ import threading from request_llm.bridge_chatgpt import predict_no_ui_long_connection from toolbox import CatchException, write_results_to_file, report_execption +from .crazy_utils import breakdown_txt_to_satisfy_token_limit def extract_code_block_carefully(txt): splitted = txt.split('```') @@ -10,33 +11,6 @@ def extract_code_block_carefully(txt): txt_out = '```'.join(splitted[1:-1]) return txt_out -def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit, must_break_at_empty_line=True): - def cut(txt_tocut, must_break_at_empty_line): # 递归 - if get_token_fn(txt_tocut) <= limit: - return [txt_tocut] - else: - lines = txt_tocut.split('\n') - estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) - estimated_line_cut = int(estimated_line_cut) - for cnt in reversed(range(estimated_line_cut)): - if must_break_at_empty_line: - if lines[cnt] != "": continue - print(cnt) - prev = "\n".join(lines[:cnt]) - post = "\n".join(lines[cnt:]) - if get_token_fn(prev) < limit: break - if cnt == 0: - print('what the f?') - raise RuntimeError("存在一行极长的文本!") - print(len(post)) - # 列表递归接龙 - result = [prev] - result.extend(cut(post, must_break_at_empty_line)) - return result - try: - return cut(txt, must_break_at_empty_line=True) - except RuntimeError: - return cut(txt, must_break_at_empty_line=False) def break_txt_into_half_at_some_linebreak(txt): diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py new file mode 100644 index 0000000..91ad003 --- /dev/null +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -0,0 +1,255 @@ +from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down +import re +import unicodedata +fast_debug = False + + +def is_paragraph_break(match): + """ + 根据给定的匹配结果来判断换行符是否表示段落分隔。 + 如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。 + 也可以根据之前的内容长度来判断段落是否已经足够长。 + """ + prev_char, next_char = match.groups() + + # 句子结束标志 + sentence_endings = ".!?" + + # 设定一个最小段落长度阈值 + min_paragraph_length = 140 + + if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length: + return "\n\n" + else: + return " " + + +def normalize_text(text): + """ + 通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。 + 例如,将连字 "fi" 转换为 "f" 和 "i"。 + """ + # 对文本进行归一化处理,分解连字 + normalized_text = unicodedata.normalize("NFKD", text) + + # 替换其他特殊字符 + cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text) + + return cleaned_text + + +def clean_text(raw_text): + """ + 对从 PDF 提取出的原始文本进行清洗和格式化处理。 + 1. 对原始文本进行归一化处理。 + 2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。 + 3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。 + """ + # 对文本进行归一化处理 + normalized_text = normalize_text(raw_text) + + # 替换跨行的连词 + text = re.sub(r'(\w+-\n\w+)', + lambda m: m.group(1).replace('-\n', ''), normalized_text) + + # 根据前后相邻字符的特点,找到原文本中的换行符 + newlines = re.compile(r'(\S)\n(\S)') + + # 根据 heuristic 规则,用空格或段落分隔符替换原换行符 + final_text = re.sub(newlines, lambda m: m.group( + 1) + is_paragraph_break(m) + m.group(2), text) + + return final_text.strip() + + +def read_and_clean_pdf_text(fp): + import fitz, re + import numpy as np + # file_content = "" + with fitz.open(fp) as doc: + meta_txt = [] + meta_font = [] + for page in doc: + # file_content += page.get_text() + text_areas = page.get_text("dict") # 获取页面上的文本信息 + + + # # 行元提取 for each word segment with in line for each line for each block + # meta_txt.extend( [ ["".join( [wtf['text'] for wtf in l['spans'] ]) for l in t['lines'] ] for t in text_areas['blocks'] if 'lines' in t]) + # meta_font.extend([ [ np.mean([wtf['size'] for wtf in l['spans'] ]) for l in t['lines'] ] for t in text_areas['blocks'] if 'lines' in t]) + + # 块元提取 for each word segment with in line for each line for each block + meta_txt.extend( [ " ".join(["".join( [wtf['text'] for wtf in l['spans'] ]) for l in t['lines'] ]) for t in text_areas['blocks'] if 'lines' in t]) + meta_font.extend([ np.mean( [ np.mean([wtf['size'] for wtf in l['spans'] ]) for l in t['lines'] ]) for t in text_areas['blocks'] if 'lines' in t]) + + def 把字符太少的块清除为回车(meta_txt): + for index, block_txt in enumerate(meta_txt): + if len(block_txt) < 100: + meta_txt[index] = '\n' + return meta_txt + meta_txt = 把字符太少的块清除为回车(meta_txt) + + def 清理多余的空行(meta_txt): + for index in reversed(range(1, len(meta_txt))): + if meta_txt[index] == '\n' and meta_txt[index-1] == '\n': + meta_txt.pop(index) + return meta_txt + meta_txt = 清理多余的空行(meta_txt) + + def 合并小写开头的段落块(meta_txt): + def starts_with_lowercase_word(s): + pattern = r"^[a-z]+" + match = re.match(pattern, s) + if match: + return True + else: + return False + for _ in range(100): + for index, block_txt in enumerate(meta_txt): + if starts_with_lowercase_word(block_txt): + if meta_txt[index-1]!='\n': meta_txt[index-1] += ' ' + else: meta_txt[index-1] = '' + meta_txt[index-1] += meta_txt[index] + meta_txt[index] = '\n' + return meta_txt + meta_txt = 合并小写开头的段落块(meta_txt) + meta_txt = 清理多余的空行(meta_txt) + + meta_txt = '\n'.join(meta_txt) + # 清除重复的换行 + for _ in range(5): + meta_txt = meta_txt.replace('\n\n','\n') + + # 换行 -> 双换行 + meta_txt = meta_txt.replace('\n', '\n\n') + + # print(meta_txt) + + return meta_txt + +@CatchException +def 批量翻译PDF文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): + import glob + import os + + # 基本信息:功能、贡献者 + chatbot.append([ + "函数插件功能?", + "批量总结PDF文档。函数插件贡献者: Binary-Husky, ValeriaWong, Eralien"]) + yield chatbot, history, '正常' + + # 尝试导入依赖,如果缺少依赖,则给出安装建议 + try: + import fitz, tiktoken + except: + report_execption(chatbot, history, + a=f"解析项目: {txt}", + b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。") + yield chatbot, history, '正常' + return + + # 清空历史,以免输入溢出 + history = [] + + # 检测输入参数,如没有给定输入参数,直接退出 + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": + txt = '空空如也的输入栏' + report_execption(chatbot, history, + a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}") + yield chatbot, history, '正常' + return + + # 搜索需要处理的文件清单 + file_manifest = [f for f in glob.glob( + f'{project_folder}/**/*.pdf', recursive=True)] + + # 如果没找到任何文件 + if len(file_manifest) == 0: + report_execption(chatbot, history, + a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}") + yield chatbot, history, '正常' + return + + # 开始正式执行任务 + yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt) + + +def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt): + import time + import glob + import os + import fitz + import tiktoken + from concurrent.futures import ThreadPoolExecutor + print('begin analysis on:', file_manifest) + for index, fp in enumerate(file_manifest): + ### 1. 读取PDF文件 + file_content = read_and_clean_pdf_text(fp) + ### 2. 递归地切割PDF文件 + from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + enc = tiktoken.get_encoding("gpt2") + TOKEN_LIMIT_PER_FRAGMENT = 2048 + get_token_num = lambda txt: len(enc.encode(txt)) + # 分解 + paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( + txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) + print([get_token_num(frag) for frag in paper_fragments]) + ### 3. 逐个段落翻译 + ## 3.1. 多线程开始 + from request_llm.bridge_chatgpt import predict_no_ui_long_connection + n_frag = len(paper_fragments) + # 异步原子 + mutable = [["", time.time()] for _ in range(n_frag)] + # 翻译函数 + def translate_(index, fragment, mutable): + i_say = f"以下是你需要翻译的文章段落:{fragment}" + # 请求gpt,需要一段时间 + gpt_say = predict_no_ui_long_connection( + inputs=i_say, top_p=top_p, temperature=temperature, history=[], # ["请翻译:" if len(previous_result)!=0 else "", previous_result], + sys_prompt="请你作为一个学术翻译,负责将给定的文章段落翻译成中文,要求语言简洁、精准、凝练。你只需要给出翻译后的文本,不能重复原文。", + observe_window=mutable[index]) + return gpt_say + ### 4. 异步任务开始 + executor = ThreadPoolExecutor(max_workers=16) + # Submit tasks to the pool + futures = [executor.submit(translate_, index, frag, mutable) for index, frag in enumerate(paper_fragments)] + + ### 5. UI主线程,在任务期间提供实时的前端显示 + cnt = 0 + while True: + cnt += 1 + time.sleep(1) + worker_done = [h.done() for h in futures] + if all(worker_done): + executor.shutdown(); break + # 更好的UI视觉效果 + observe_win = [] + # 每个线程都要喂狗(看门狗) + for thread_index, _ in enumerate(worker_done): + mutable[thread_index][1] = time.time() + # 在前端打印些好玩的东西 + for thread_index, _ in enumerate(worker_done): + print_something_really_funny = "[ ...`"+mutable[thread_index][0][-30:].replace('\n','').replace('```','...').replace(' ','.').replace('
','.....').replace('$','.')+"`... ]" + observe_win.append(print_something_really_funny) + stat_str = ''.join([f'执行中: {obs}\n\n' if not done else '已完成\n\n' for done, obs in zip(worker_done, observe_win)]) + chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt%10+1))]; msg = "正常" + yield chatbot, history, msg + + # Wait for tasks to complete + results = [future.result() for future in futures] + + print(results) + # full_result += gpt_say + + # history.extend([fp, full_result]) + + res = write_results_to_file(history) + chatbot.append(("完成了吗?", res)); msg = "完成" + yield chatbot, history, msg + + +# if __name__ == '__main__': +# pro() diff --git a/crazy_functions/高级功能函数模板.py b/crazy_functions/高级功能函数模板.py index d3fdde0..69fd379 100644 --- a/crazy_functions/高级功能函数模板.py +++ b/crazy_functions/高级功能函数模板.py @@ -14,12 +14,13 @@ def 高阶功能模板函数(txt, top_p, temperature, chatbot, history, systemPr i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。' chatbot.append((i_say, "[Local Message] waiting gpt response.")) yield chatbot, history, '正常' # 由于请求gpt需要一段时间,我们先及时地做一次状态显示 - - # history = [] 每次询问不携带之前的询问历史 - gpt_say = predict_no_ui_long_connection( - inputs=i_say, top_p=top_p, temperature=temperature, history=[], - sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。") # 请求gpt,需要一段时间 - + try: + # history = [] 每次询问不携带之前的询问历史 + gpt_say = predict_no_ui_long_connection( + inputs=i_say, top_p=top_p, temperature=temperature, history=[], + sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。") # 请求gpt,需要一段时间 + except: + print("") chatbot[-1] = (i_say, gpt_say) history.append(i_say);history.append(gpt_say) yield chatbot, history, '正常' # 显示 \ No newline at end of file diff --git a/main.py b/main.py index ae80050..67ee00a 100644 --- a/main.py +++ b/main.py @@ -23,12 +23,12 @@ except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.IN print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!") # 一些普通功能模块 -from functional import get_functionals -functional = get_functionals() +from core_functional import get_core_functions +functional = get_core_functions() # 高级函数插件 -from functional_crazy import get_crazy_functionals -crazy_fns = get_crazy_functionals() +from crazy_functional import get_crazy_functions +crazy_fns = get_crazy_functions() # 处理markdown文本格式的转变 gr.Chatbot.postprocess = format_io diff --git a/request_llm/bridge_chatgpt.py b/request_llm/bridge_chatgpt.py index 2a1ef4d..237db04 100644 --- a/request_llm/bridge_chatgpt.py +++ b/request_llm/bridge_chatgpt.py @@ -12,6 +12,7 @@ """ import json +import time import gradio as gr import logging import traceback @@ -73,11 +74,20 @@ def predict_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""): def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None): """ - 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免有人中途掐网线。 - observe_window:用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可 + 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 + inputs: + 是本次问询的输入 + sys_prompt: + 系统静默prompt + top_p, temperature: + chatGPT的内部调优参数 + history: + 是之前的对话列表 + observe_window = None: + 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 """ + watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt=sys_prompt, stream=True) - retry = 0 while True: try: @@ -109,10 +119,16 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr if "content" in delta: result += delta["content"] print(delta["content"], end='') - if observe_window is not None: observe_window[0] += delta["content"] + if observe_window is not None: + # 观测窗,把已经获取的数据显示出去 + if len(observe_window) >= 1: observe_window[0] += delta["content"] + # 看门狗,如果超过期限没有喂狗,则终止 + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: + raise RuntimeError("程序终止。") else: raise RuntimeError("意外Json结构:"+delta) if json_data['finish_reason'] == 'length': - raise ConnectionAbortedError("正常结束,但显示Token不足。") + raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。") return result @@ -128,11 +144,11 @@ def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt='' additional_fn代表点击的哪个按钮,按钮见functional.py """ if additional_fn is not None: - import functional - importlib.reload(functional) # 热更新prompt - functional = functional.get_functionals() - if "PreProcess" in functional[additional_fn]: inputs = functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) - inputs = functional[additional_fn]["Prefix"] + inputs + functional[additional_fn]["Suffix"] + import core_functional + importlib.reload(core_functional) # 热更新prompt + core_functional = core_functional.get_functions() + if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) + inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] if stream: raw_input = inputs @@ -189,10 +205,10 @@ def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt='' chunk = get_full_error(chunk, stream_response) error_msg = chunk.decode() if "reduce the length" in error_msg: - chatbot[-1] = (chatbot[-1][0], "[Local Message] Input (or history) is too long, please reduce input or clear history by refreshing this page.") + chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长,或历史数据过长. 历史缓存数据现已释放,您可以请再次尝试.") history = [] # 清除历史 elif "Incorrect API key" in error_msg: - chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key provided.") + chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由,拒绝服务.") elif "exceeded your current quota" in error_msg: chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由,拒绝服务.") else: diff --git a/request_llm/bridge_tgui.py b/request_llm/bridge_tgui.py index d7cbe10..15f60b7 100644 --- a/request_llm/bridge_tgui.py +++ b/request_llm/bridge_tgui.py @@ -101,11 +101,11 @@ def predict_tgui(inputs, top_p, temperature, chatbot=[], history=[], system_prom additional_fn代表点击的哪个按钮,按钮见functional.py """ if additional_fn is not None: - import functional - importlib.reload(functional) # 热更新prompt - functional = functional.get_functionals() - if "PreProcess" in functional[additional_fn]: inputs = functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) - inputs = functional[additional_fn]["Prefix"] + inputs + functional[additional_fn]["Suffix"] + import core_functional + importlib.reload(core_functional) # 热更新prompt + core_functional = core_functional.get_functions() + if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) + inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] raw_input = "What I would like to say is the following: " + inputs logging.info(f'[raw_input] {raw_input}') diff --git a/requirements.txt b/requirements.txt index bdafbe3..8034392 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ Markdown latex2mathml openai transformers +numpy \ No newline at end of file