From 7296d054a28c32412efd45e3db4535fcc3025592 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Sun, 4 Jun 2023 13:56:15 +0800
Subject: [PATCH] patch latex segmentation
---
crazy_functions/crazy_functions_test.py | 2 ++
crazy_functions/latex_utils.py | 29 +++++++------------------
2 files changed, 10 insertions(+), 21 deletions(-)
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index d9c6d85..4852948 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -189,6 +189,8 @@ def test_Latex():
txt = r"https://arxiv.org/abs/1902.03185"
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40"
txt = r"https://arxiv.org/abs/2305.18290"
+ txt = r"https://arxiv.org/abs/2305.17608"
+
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index d759377..fd56d88 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -160,17 +160,8 @@ class LatexPaperSplit():
if not res: break
before = res.string[:res.span()[0]]
this = res.group(0)
- # core = res.group(1)
after = res.string[res.span()[1]:]
# ======
- if before.endswith('\n'):
- this = '\n' + this
- before = before[:-1]
- if after.startswith('\n'):
- # move \n
- this = this + '\n'
- after = after[1:]
- # ======
lt.string = before
tmp = lt.next
# ======
@@ -210,17 +201,13 @@ class LatexPaperSplit():
return False
# ======
# search for first encounter of \begin \end pair with less than 25 lines in the middle
- this = search_with_line_limit(target_string)
- if not this: break
- before, after = target_string.split(this)
- # ======
- if before.endswith('\n'):
- this = '\n' + this
- before = before[:-1]
- if after.startswith('\n'):
- # move \n
- this = this + '\n'
- after = after[1:]
+ ps = search_with_line_limit(target_string)
+ if not ps: break
+ res = re.search(re.escape(ps), target_string, flags)
+ if not res: assert False
+ before = res.string[:res.span()[0]]
+ this = res.group(0)
+ after = res.string[res.span()[1]:]
# ======
lt.string = before
tmp = lt.next
@@ -356,7 +343,7 @@ class LatexPaperSplit():
show_html = node.string.replace('\n','
')
if not node.preserve:
res_to_t.append(node.string)
- f.write(f'
{show_html}
') + f.write(f'#{show_html}#
') else: f.write(f'{show_html}
') node = node.next