auto prompt lang

This commit is contained in:
binary-husky 2024-01-21 13:47:11 +08:00
parent 06b0e800a2
commit b55d573819
3 changed files with 94 additions and 19 deletions

View File

@ -4,18 +4,27 @@
import importlib import importlib
from toolbox import clear_line_break from toolbox import clear_line_break
from toolbox import build_gpt_academic_masked_string from toolbox import build_gpt_academic_masked_string
from toolbox import apply_gpt_academic_string_mask_langbased
from toolbox import build_gpt_academic_masked_string_langbased
from textwrap import dedent from textwrap import dedent
def get_core_functions(): def get_core_functions():
return { return {
"英语学术润色": { "学术语料润色": {
# [1*] 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等 # [1*] 前缀字符串,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等。
"Prefix": r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, " # 这里填一个提示词字符串就行了,这里为了区分中英文情景搞复杂了一点
"Prefix": build_gpt_academic_masked_string_langbased(
text_show_english=
r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, "
r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. " r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. "
r"Firstly, you should provide the polished paragraph. " r"Firstly, you should provide the polished paragraph. "
r"Secondly, you should list all your modification and explain the reasons to do so in markdown table." + "\n\n", r"Secondly, you should list all your modification and explain the reasons to do so in markdown table.",
# [2*] 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来 text_show_chinese=
r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性,"
r"同时分解长句减少重复并提供改进建议。请先提供文本的更正版本然后在markdown表格中列出修改的内容并给出修改的理由:"
) + "\n\n",
# [2*] 后缀字符串,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
"Suffix": r"", "Suffix": r"",
# [3] 按钮颜色 (可选参数,默认 secondary) # [3] 按钮颜色 (可选参数,默认 secondary)
"Color": r"secondary", "Color": r"secondary",
@ -33,6 +42,7 @@ def get_core_functions():
"Prefix": r"", "Prefix": r"",
# 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来 # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
"Suffix": "Suffix":
# dedent() 函数用于去除多行字符串的缩进
dedent("\n"+f''' dedent("\n"+f'''
============================== ==============================
@ -85,14 +95,22 @@ def get_core_functions():
"学术英中互译": { "学术英中互译": {
"Prefix": r"I want you to act as a scientific English-Chinese translator, " + "Prefix": build_gpt_academic_masked_string_langbased(
r"I will provide you with some paragraphs in one language " + text_show_chinese=
r"and your task is to accurately and academically translate the paragraphs only into the other language. " + r"I want you to act as a scientific English-Chinese translator, "
r"Do not repeat the original provided paragraphs after translation. " + r"I will provide you with some paragraphs in one language "
r"You should use artificial intelligence tools, " + r"and your task is to accurately and academically translate the paragraphs only into the other language. "
r"such as natural language processing, and rhetorical knowledge " + r"Do not repeat the original provided paragraphs after translation. "
r"and experience about effective writing techniques to reply. " + r"You should use artificial intelligence tools, "
r"I'll give you my paragraphs as follows, tell me what language it is written in, and then translate:" + "\n\n", r"such as natural language processing, and rhetorical knowledge "
r"and experience about effective writing techniques to reply. "
r"I'll give you my paragraphs as follows, tell me what language it is written in, and then translate:",
text_show_english=
r"你是经验丰富的翻译,请把以下学术文章段落翻译成中文,"
r"并同时充分考虑中文的语法、清晰、简洁和整体可读性,"
r"必要时,你可以修改整个句子的顺序以确保翻译后的段落符合中文的语言习惯。"
r"你需要翻译的文本如下:"
) + "\n\n",
"Suffix": r"", "Suffix": r"",
}, },
@ -142,7 +160,11 @@ def handle_core_functionality(additional_fn, inputs, history, chatbot):
if "PreProcess" in core_functional[additional_fn]: if "PreProcess" in core_functional[additional_fn]:
if core_functional[additional_fn]["PreProcess"] is not None: if core_functional[additional_fn]["PreProcess"] is not None:
inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] # 为字符串加上上面定义的前缀和后缀。
inputs = apply_gpt_academic_string_mask_langbased(
string = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"],
lang_reference = inputs,
)
if core_functional[additional_fn].get("AutoClearHistory", False): if core_functional[additional_fn].get("AutoClearHistory", False):
history = [] history = []
return inputs, history return inputs, history

View File

@ -10,18 +10,25 @@ from functools import lru_cache
# - () 括号在正则表达式中表示捕获组。 # - () 括号在正则表达式中表示捕获组。
# - 在这个例子中,(.*?)表示捕获任意长度的文本,直到遇到括号外部最近的限定符,即</show_llm>和</show_render>。 # - 在这个例子中,(.*?)表示捕获任意长度的文本,直到遇到括号外部最近的限定符,即</show_llm>和</show_render>。
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=/1=-=-=-=-=-=-=-=-=-=-=-=-=-=/2-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=/1=-=-=-=-=-=-=-=-=-=-=-=-=-=/2-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
const_extract_re = re.compile( const_extract_re = re.compile(
r"<gpt_academic_string_mask><show_llm>(.*?)</show_llm><show_render>(.*?)</show_render></gpt_academic_string_mask>" r"<gpt_academic_string_mask><show_llm>(.*?)</show_llm><show_render>(.*?)</show_render></gpt_academic_string_mask>"
) )
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=/1=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/2-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
const_extract_langbased_re = re.compile(
r"<gpt_academic_string_mask><lang_english>(.*?)</lang_english><lang_chinese>(.*?)</lang_chinese></gpt_academic_string_mask>",
flags=re.DOTALL,
)
@lru_cache(maxsize=128) @lru_cache(maxsize=128)
def apply_gpt_academic_string_mask(string, mode="show_all"): def apply_gpt_academic_string_mask(string, mode="show_all"):
""" """
根据字符串要给谁看大模型还是web渲染对字符串进行处理返回处理后的字符串 当字符串中有掩码tag时<gpt_academic_string_mask><show_...>根据字符串要给谁看大模型还是web渲染对字符串进行处理返回处理后的字符串
示意图https://mermaid.live/edit#pako:eNqlkUtLw0AUhf9KuOta0iaTplkIPlpduFJwoZEwJGNbzItpita2O6tF8QGKogXFtwu7cSHiq3-mk_oznFR8IYLgrGbuOd9hDrcCpmcR0GDW9ubNPKaBMDauuwI_A9M6YN-3y0bODwxsYos4BdMoBrTg5gwHF-d0mBH6-vqFQe58ed5m9XPW2uteX3Tubrj0ljLYcwxxR3h1zB43WeMs3G19yEM9uapDMe_NG9i2dagKw1Fee4c1D9nGEbtc-5n6HbNtJ8IyHOs8tbs7V2HrlDX2w2Y7XD_5haHEtQiNsOwfMVa_7TzsvrWIuJGo02qTrdwLk9gukQylHv3Afv1ML270s-HZUndrmW1tdA-WfvbM_jMFYuAQ6uCCxVdciTJ1CPLEITpo_GphypeouzXuw6XAmyi7JmgBLZEYlHwLB2S4gHMUO-9DH7tTnvf1CVoFFkBLSOk4QmlRTqpIlaWUHINyNFXjaQWpCYRURUKiWovBYo8X4ymEJFlECQUpqaQkJmuvWygPpg 示意图https://mermaid.live/edit#pako:eNqlkUtLw0AUhf9KuOta0iaTplkIPlpduFJwoZEwJGNbzItpita2O6tF8QGKogXFtwu7cSHiq3-mk_oznFR8IYLgrGbuOd9hDrcCpmcR0GDW9ubNPKaBMDauuwI_A9M6YN-3y0bODwxsYos4BdMoBrTg5gwHF-d0mBH6-vqFQe58ed5m9XPW2uteX3Tubrj0ljLYcwxxR3h1zB43WeMs3G19yEM9uapDMe_NG9i2dagKw1Fee4c1D9nGEbtc-5n6HbNtJ8IyHOs8tbs7V2HrlDX2w2Y7XD_5haHEtQiNsOwfMVa_7TzsvrWIuJGo02qTrdwLk9gukQylHv3Afv1ML270s-HZUndrmW1tdA-WfvbM_jMFYuAQ6uCCxVdciTJ1CPLEITpo_GphypeouzXuw6XAmyi7JmgBLZEYlHwLB2S4gHMUO-9DH7tTnvf1CVoFFkBLSOk4QmlRTqpIlaWUHINyNFXjaQWpCYRURUKiWovBYo8X4ymEJFlECQUpqaQkJmuvWygPpg
""" """
if "<gpt_academic_string_mask>" not in string: # No need to process
return string
if mode == "show_all": if mode == "show_all":
return string return string
if mode == "show_llm": if mode == "show_llm":
@ -41,6 +48,50 @@ def build_gpt_academic_masked_string(text_show_llm="", text_show_render=""):
return f"<gpt_academic_string_mask><show_llm>{text_show_llm}</show_llm><show_render>{text_show_render}</show_render></gpt_academic_string_mask>" return f"<gpt_academic_string_mask><show_llm>{text_show_llm}</show_llm><show_render>{text_show_render}</show_render></gpt_academic_string_mask>"
@lru_cache(maxsize=128)
def apply_gpt_academic_string_mask_langbased(string, lang_reference):
"""
当字符串中有掩码tag时<gpt_academic_string_mask><lang_...>根据语言选择提示词对字符串进行处理返回处理后的字符串
例如如果lang_reference是英文那么就只显示英文提示词中文提示词就不显示了
举例
输入1
string = "注意lang_reference这段文字是<gpt_academic_string_mask><lang_english>英语</lang_english><lang_chinese>中文</lang_chinese></gpt_academic_string_mask>"
lang_reference = "hello world"
输出1
"注意lang_reference这段文字是英语"
输入2
string = "注意lang_reference这段文字是中文" # 注意这里没有掩码tag所以不会被处理
lang_reference = "hello world"
输出2
"注意lang_reference这段文字是中文" # 原样返回
"""
if "<gpt_academic_string_mask>" not in string: # No need to process
return string
def contains_chinese(string):
chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
return chinese_regex.search(string) is not None
mode = "english" if not contains_chinese(lang_reference) else "chinese"
if mode == "english":
string = const_extract_langbased_re.sub(r"\1", string)
elif mode == "chinese":
string = const_extract_langbased_re.sub(r"\2", string)
else:
raise ValueError("Invalid mode")
return string
@lru_cache(maxsize=128)
def build_gpt_academic_masked_string_langbased(text_show_english="", text_show_chinese=""):
"""
根据语言选择提示词对字符串进行处理返回处理后的字符串
"""
return f"<gpt_academic_string_mask><lang_english>{text_show_english}</lang_english><lang_chinese>{text_show_chinese}</lang_chinese></gpt_academic_string_mask>"
if __name__ == "__main__": if __name__ == "__main__":
# Test # Test
input_string = ( input_string = (

View File

@ -22,6 +22,8 @@ from shared_utils.connect_void_terminal import get_plugin_default_kwargs
from shared_utils.connect_void_terminal import get_chat_default_kwargs from shared_utils.connect_void_terminal import get_chat_default_kwargs
from shared_utils.text_mask import apply_gpt_academic_string_mask from shared_utils.text_mask import apply_gpt_academic_string_mask
from shared_utils.text_mask import build_gpt_academic_masked_string from shared_utils.text_mask import build_gpt_academic_masked_string
from shared_utils.text_mask import apply_gpt_academic_string_mask_langbased
from shared_utils.text_mask import build_gpt_academic_masked_string_langbased
pj = os.path.join pj = os.path.join
default_user_name = "default_user" default_user_name = "default_user"