From 7296d054a28c32412efd45e3db4535fcc3025592 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Sun, 4 Jun 2023 13:56:15 +0800 Subject: [PATCH] patch latex segmentation --- crazy_functions/crazy_functions_test.py | 2 ++ crazy_functions/latex_utils.py | 29 +++++++------------------ 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index d9c6d85..4852948 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -189,6 +189,8 @@ def test_Latex(): txt = r"https://arxiv.org/abs/1902.03185" txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40" txt = r"https://arxiv.org/abs/2305.18290" + txt = r"https://arxiv.org/abs/2305.17608" + for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index d759377..fd56d88 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -160,17 +160,8 @@ class LatexPaperSplit(): if not res: break before = res.string[:res.span()[0]] this = res.group(0) - # core = res.group(1) after = res.string[res.span()[1]:] # ====== - if before.endswith('\n'): - this = '\n' + this - before = before[:-1] - if after.startswith('\n'): - # move \n - this = this + '\n' - after = after[1:] - # ====== lt.string = before tmp = lt.next # ====== @@ -210,17 +201,13 @@ class LatexPaperSplit(): return False # ====== # search for first encounter of \begin \end pair with less than 25 lines in the middle - this = search_with_line_limit(target_string) - if not this: break - before, after = target_string.split(this) - # ====== - if before.endswith('\n'): - this = '\n' + this - before = before[:-1] - if after.startswith('\n'): - # move \n - this = this + '\n' - after = after[1:] + ps = search_with_line_limit(target_string) + if not ps: break + res = re.search(re.escape(ps), target_string, flags) + if not res: assert False + before = res.string[:res.span()[0]] + this = res.group(0) + after = res.string[res.span()[1]:] # ====== lt.string = before tmp = lt.next @@ -356,7 +343,7 @@ class LatexPaperSplit(): show_html = node.string.replace('\n','
') if not node.preserve: res_to_t.append(node.string) - f.write(f'

{show_html}

') + f.write(f'

#{show_html}#

') else: f.write(f'

{show_html}

') node = node.next