patch latex segmentation

This commit is contained in:
qingxu fu 2023-06-04 13:56:15 +08:00
parent d57c7d352d
commit 7296d054a2
2 changed files with 10 additions and 21 deletions

View File

@ -189,6 +189,8 @@ def test_Latex():
txt = r"https://arxiv.org/abs/1902.03185" txt = r"https://arxiv.org/abs/1902.03185"
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40" txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40"
txt = r"https://arxiv.org/abs/2305.18290" txt = r"https://arxiv.org/abs/2305.18290"
txt = r"https://arxiv.org/abs/2305.17608"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb) cli_printer.print(cb) # print(cb)

View File

@ -160,17 +160,8 @@ class LatexPaperSplit():
if not res: break if not res: break
before = res.string[:res.span()[0]] before = res.string[:res.span()[0]]
this = res.group(0) this = res.group(0)
# core = res.group(1)
after = res.string[res.span()[1]:] after = res.string[res.span()[1]:]
# ====== # ======
if before.endswith('\n'):
this = '\n' + this
before = before[:-1]
if after.startswith('\n'):
# move \n
this = this + '\n'
after = after[1:]
# ======
lt.string = before lt.string = before
tmp = lt.next tmp = lt.next
# ====== # ======
@ -210,17 +201,13 @@ class LatexPaperSplit():
return False return False
# ====== # ======
# search for first encounter of \begin \end pair with less than 25 lines in the middle # search for first encounter of \begin \end pair with less than 25 lines in the middle
this = search_with_line_limit(target_string) ps = search_with_line_limit(target_string)
if not this: break if not ps: break
before, after = target_string.split(this) res = re.search(re.escape(ps), target_string, flags)
# ====== if not res: assert False
if before.endswith('\n'): before = res.string[:res.span()[0]]
this = '\n' + this this = res.group(0)
before = before[:-1] after = res.string[res.span()[1]:]
if after.startswith('\n'):
# move \n
this = this + '\n'
after = after[1:]
# ====== # ======
lt.string = before lt.string = before
tmp = lt.next tmp = lt.next
@ -356,7 +343,7 @@ class LatexPaperSplit():
show_html = node.string.replace('\n','<br/>') show_html = node.string.replace('\n','<br/>')
if not node.preserve: if not node.preserve:
res_to_t.append(node.string) res_to_t.append(node.string)
f.write(f'<p style="color:black;">{show_html}</p>') f.write(f'<p style="color:black;">#{show_html}#</p>')
else: else:
f.write(f'<p style="color:red;">{show_html}</p>') f.write(f'<p style="color:red;">{show_html}</p>')
node = node.next node = node.next