diff --git a/core_functional.py b/core_functional.py index 0b283a8..5e67eee 100644 --- a/core_functional.py +++ b/core_functional.py @@ -3,6 +3,7 @@ # 'stop' 颜色对应 theme.py 中的 color_er import importlib from toolbox import clear_line_break +from toolbox import build_gpt_academic_masked_string from textwrap import dedent def get_core_functions(): @@ -32,12 +33,12 @@ def get_core_functions(): "Prefix": r"", # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来 "Suffix": - dedent("\n"+r''' + dedent("\n"+f''' ============================== 使用mermaid flowchart对以上文本进行总结,概括上述段落的内容以及内在逻辑关系,例如: 以下是对以上文本的总结,以mermaid flowchart的形式展示: - ```mermaid + ```{build_gpt_academic_masked_string(text_show_llm="mermaid", text_show_render="")} flowchart LR A["节点名1"] --> B("节点名2") B --> C{"节点名3"} diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index c19691e..1435247 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -11,7 +11,7 @@ import tiktoken, copy from functools import lru_cache from concurrent.futures import ThreadPoolExecutor -from toolbox import get_conf, trimmed_format_exc +from toolbox import get_conf, trimmed_format_exc, apply_gpt_academic_string_mask from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui from .bridge_chatgpt import predict as chatgpt_ui @@ -668,6 +668,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, obser """ import threading, time, copy + inputs = apply_gpt_academic_string_mask(inputs, mode="show_llm") model = llm_kwargs['llm_model'] n_model = 1 if '&' not in model: @@ -741,6 +742,7 @@ def predict(inputs, llm_kwargs, *args, **kwargs): additional_fn代表点击的哪个按钮,按钮见functional.py """ + inputs = apply_gpt_academic_string_mask(inputs, mode="show_llm") method = model_info[llm_kwargs['llm_model']]["fn_with_ui"] # 如果这里报错,检查config中的AVAIL_LLM_MODELS选项 yield from method(inputs, llm_kwargs, *args, **kwargs) diff --git a/shared_utils/advanced_markdown_format.py b/shared_utils/advanced_markdown_format.py index 9eed4f0..ece374b 100644 --- a/shared_utils/advanced_markdown_format.py +++ b/shared_utils/advanced_markdown_format.py @@ -4,52 +4,47 @@ import os import math from textwrap import dedent from functools import lru_cache -from pymdownx.superfences import fence_div_format, fence_code_format +from pymdownx.superfences import fence_code_format from latex2mathml.converter import convert as tex2mathml from shared_utils.config_loader import get_conf as get_conf - -pj = os.path.join -default_user_name = 'default_user' +from shared_utils.text_mask import apply_gpt_academic_string_mask markdown_extension_configs = { - 'mdx_math': { - 'enable_dollar_delimiter': True, - 'use_gitlab_delimiters': False, + "mdx_math": { + "enable_dollar_delimiter": True, + "use_gitlab_delimiters": False, }, } code_highlight_configs = { "pymdownx.superfences": { - 'css_class': 'codehilite', + "css_class": "codehilite", "custom_fences": [ - { - 'name': 'mermaid', - 'class': 'mermaid', - 'format': fence_code_format - } - ] + {"name": "mermaid", "class": "mermaid", "format": fence_code_format} + ], }, "pymdownx.highlight": { - 'css_class': 'codehilite', - 'guess_lang': True, + "css_class": "codehilite", + "guess_lang": True, # 'auto_title': True, # 'linenums': True - } + }, } + def text_divide_paragraph(text): """ 将文本按照段落分隔符分割开,生成带有段落标签的HTML代码。 """ pre = '
' - suf = '
' + suf = "" if text.startswith(pre) and text.endswith(suf): return text - if '```' in text: + if "```" in text: # careful input return text - elif '' in text: + elif "" in text: # careful input return text else: @@ -71,20 +66,20 @@ def tex2mathml_catch_exception(content, *args, **kwargs): def replace_math_no_render(match): content = match.group(1) - if 'mode=display' in match.group(0): - content = content.replace('\n', '
') - return f"$${content}$$" + if "mode=display" in match.group(0): + content = content.replace("\n", "
") + return f'$${content}$$' else: - return f"${content}$" + return f'${content}$' def replace_math_render(match): content = match.group(1) - if 'mode=display' in match.group(0): - if '\\begin{aligned}' in content: - content = content.replace('\\begin{aligned}', '\\begin{array}') - content = content.replace('\\end{aligned}', '\\end{array}') - content = content.replace('&', ' ') + if "mode=display" in match.group(0): + if "\\begin{aligned}" in content: + content = content.replace("\\begin{aligned}", "\\begin{array}") + content = content.replace("\\end{aligned}", "\\end{array}") + content = content.replace("&", " ") content = tex2mathml_catch_exception(content, display="block") return content else: @@ -95,9 +90,11 @@ def markdown_bug_hunt(content): """ 解决一个mdx_math的bug(单$包裹begin命令时多余\n', '') + content = content.replace( + '\n", "") return content @@ -105,25 +102,29 @@ def is_equation(txt): """ 判定是否为公式 | 测试1 写出洛伦兹定律,使用tex格式公式 测试2 给出柯西不等式,使用latex格式 测试3 写出麦克斯韦方程组 """ - if '```' in txt and '```reference' not in txt: return False - if '$' not in txt and '\\[' not in txt: return False + if "```" in txt and "```reference" not in txt: + return False + if "$" not in txt and "\\[" not in txt: + return False mathpatterns = { - r'(?^[ \t]*(?:~{3,}|`{3,}))[ ]* # opening fence ((\{(?P[^\}\n]*)\})| # (optional {attrs} or (\.?(?P[\w#.+-]*)[ ]*)? # optional (.)lang @@ -162,16 +164,17 @@ FENCED_BLOCK_RE = re.compile( \n # newline (end of opening fence) (?P.*?)(?<=\n) # the code block (?P=fence)[ ]*$ # closing fence - '''), - re.MULTILINE | re.DOTALL | re.VERBOSE + """ + ), + re.MULTILINE | re.DOTALL | re.VERBOSE, ) def get_line_range(re_match_obj, txt): start_pos, end_pos = re_match_obj.regs[0] - num_newlines_before = txt[:start_pos+1].count('\n') + num_newlines_before = txt[: start_pos + 1].count("\n") line_start = num_newlines_before - line_end = num_newlines_before + txt[start_pos:end_pos].count('\n')+1 + line_end = num_newlines_before + txt[start_pos:end_pos].count("\n") + 1 return line_start, line_end @@ -181,14 +184,16 @@ def fix_code_segment_indent(txt): txt_tmp = txt while True: re_match_obj = FENCED_BLOCK_RE.search(txt_tmp) - if not re_match_obj: break - if len(lines) == 0: lines = txt.split("\n") - + if not re_match_obj: + break + if len(lines) == 0: + lines = txt.split("\n") + # 清空 txt_tmp 对应的位置方便下次搜索 start_pos, end_pos = re_match_obj.regs[0] - txt_tmp = txt_tmp[:start_pos] + ' '*(end_pos-start_pos) + txt_tmp[end_pos:] + txt_tmp = txt_tmp[:start_pos] + " " * (end_pos - start_pos) + txt_tmp[end_pos:] line_start, line_end = get_line_range(re_match_obj, txt) - + # 获取公共缩进 shared_indent_cnt = 1e5 for i in range(line_start, line_end): @@ -202,26 +207,26 @@ def fix_code_segment_indent(txt): num_spaces_should_be = math.ceil(shared_indent_cnt / 4) * 4 for i in range(line_start, line_end): add_n = num_spaces_should_be - shared_indent_cnt - lines[i] = ' ' * add_n + lines[i] - if not change_any: # 遇到第一个 + lines[i] = " " * add_n + lines[i] + if not change_any: # 遇到第一个 change_any = True if change_any: - return '\n'.join(lines) + return "\n".join(lines) else: return txt - - -@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 + + +@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 def markdown_convertion(txt): """ 将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。 """ pre = '
' - suf = '
' + suf = "" if txt.startswith(pre) and txt.endswith(suf): # print('警告,输入了已经经过转化的字符串,二次转化可能出问题') - return txt # 已经被转化过,不需要再次转化 + return txt # 已经被转化过,不需要再次转化 find_equation_pattern = r'