From f654c1af317ab6fccb40b0097800690a786d8d5d Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Tue, 27 Jun 2023 18:59:56 +0800 Subject: [PATCH] merge regex expressions --- crazy_functions/crazy_functions_test.py | 6 +- crazy_functions/latex_utils.py | 74 ++++++++++++------------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index 3ef555d..f2d3969 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -190,9 +190,11 @@ def test_Latex(): # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE # txt = r"https://arxiv.org/abs/2002.09253" # txt = r"https://arxiv.org/abs/2306.07831" - # txt = r"https://arxiv.org/abs/2212.10156" + txt = r"https://arxiv.org/abs/2212.10156" # txt = r"https://arxiv.org/abs/2211.11559" - txt = r"https://arxiv.org/abs/2303.08774" + # txt = r"https://arxiv.org/abs/2303.08774" + # txt = r"https://arxiv.org/abs/2303.12712" + # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index a7eb9f2..83c4401 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -21,6 +21,7 @@ def set_forbidden_text(text, mask, pattern, flags=0): you can mask out (mask = PRESERVE so that text become untouchable for GPT) everything between "\begin{equation}" and "\end{equation}" """ + if isinstance(pattern, list): pattern = '|'.join(pattern) pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): mask[res.span()[0]:res.span()[1]] = PRESERVE @@ -46,7 +47,7 @@ def set_forbidden_text_careful_brace(text, mask, pattern, flags=0): mask[begin:end] = PRESERVE return text, mask -def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0): +def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True): """ Move area out of preserve area (make text editable for GPT) count the number of the braces so as to catch compelete text area. @@ -64,6 +65,9 @@ def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0): p += 1 end = p mask[begin:end] = TRANSFORM + if forbid_wrapper: + mask[res.regs[0][0]:begin] = PRESERVE + mask[end:res.regs[0][1]] = PRESERVE return text, mask def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): @@ -163,6 +167,7 @@ def rm_comments(main_file): else: new_file_remove_comment_lines.append(l) main_file = '\n'.join(new_file_remove_comment_lines) + # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令 main_file = re.sub(r'(?