修正一些正则匹配bug
This commit is contained in:
parent
85b838b302
commit
7f5be93c1d
@ -65,8 +65,10 @@ def move_project(project_folder, arxiv_id=None):
|
|||||||
new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
|
new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
|
||||||
else:
|
else:
|
||||||
new_workfolder = f'gpt_log/{gen_time_str()}'
|
new_workfolder = f'gpt_log/{gen_time_str()}'
|
||||||
try: shutil.rmtree(new_workfolder)
|
try:
|
||||||
except: pass
|
shutil.rmtree(new_workfolder)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
shutil.copytree(src=project_folder, dst=new_workfolder)
|
shutil.copytree(src=project_folder, dst=new_workfolder)
|
||||||
return new_workfolder
|
return new_workfolder
|
||||||
|
|
||||||
@ -134,7 +136,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
|
|||||||
try:
|
try:
|
||||||
import glob, os, time
|
import glob, os, time
|
||||||
os.system(f'pdflatex -version')
|
os.system(f'pdflatex -version')
|
||||||
from .latex_utils import Latex精细分解与转化, 编译Latex差别
|
from .latex_utils import Latex精细分解与转化, 编译Latex
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
chatbot.append([ f"解析项目: {txt}",
|
chatbot.append([ f"解析项目: {txt}",
|
||||||
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
||||||
@ -172,7 +174,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
|
|||||||
|
|
||||||
|
|
||||||
# <-------------- compile PDF ------------->
|
# <-------------- compile PDF ------------->
|
||||||
success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread',
|
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread',
|
||||||
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
|
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
|
||||||
|
|
||||||
|
|
||||||
@ -204,7 +206,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
|||||||
try:
|
try:
|
||||||
import glob, os, time
|
import glob, os, time
|
||||||
os.system(f'pdflatex -version')
|
os.system(f'pdflatex -version')
|
||||||
from .latex_utils import Latex精细分解与转化, 编译Latex差别
|
from .latex_utils import Latex精细分解与转化, 编译Latex
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
chatbot.append([ f"解析项目: {txt}",
|
chatbot.append([ f"解析项目: {txt}",
|
||||||
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
||||||
@ -247,7 +249,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
|||||||
|
|
||||||
|
|
||||||
# <-------------- compile PDF ------------->
|
# <-------------- compile PDF ------------->
|
||||||
success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh',
|
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh',
|
||||||
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
|
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
|
||||||
|
|
||||||
# <-------------- zip PDF ------------->
|
# <-------------- zip PDF ------------->
|
||||||
|
@ -186,12 +186,12 @@ def test_Latex():
|
|||||||
txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/papery"
|
txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/papery"
|
||||||
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-14-57-06"
|
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-14-57-06"
|
||||||
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-15-40-20"
|
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-15-40-20"
|
||||||
txt = r"https://arxiv.org/abs/1902.03185"
|
|
||||||
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40"
|
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40"
|
||||||
txt = r"https://arxiv.org/abs/2305.18290"
|
txt = r"https://arxiv.org/abs/1902.03185"
|
||||||
txt = r"https://arxiv.org/abs/2305.17608"
|
# txt = r"https://arxiv.org/abs/2305.18290"
|
||||||
# txt = r"https://arxiv.org/abs/2306.00324"
|
# txt = r"https://arxiv.org/abs/2305.17608"
|
||||||
txt = r"https://arxiv.org/abs/2211.16068"
|
# txt = r"https://arxiv.org/abs/2211.16068" # ACE
|
||||||
|
# txt = r"C:\Users\fuqingxu\arxiv_cache\2211.16068\workfolder" # ACE
|
||||||
|
|
||||||
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||||
cli_printer.print(cb) # print(cb)
|
cli_printer.print(cb) # print(cb)
|
||||||
|
@ -2,8 +2,76 @@ from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界
|
|||||||
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
|
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
|
||||||
import os, shutil
|
import os, shutil
|
||||||
import re
|
import re
|
||||||
|
import numpy as np
|
||||||
pj = os.path.join
|
pj = os.path.join
|
||||||
|
|
||||||
|
"""
|
||||||
|
========================================================================
|
||||||
|
第一部分
|
||||||
|
Latex 文件切分到一个链表中
|
||||||
|
========================================================================
|
||||||
|
"""
|
||||||
|
PRESERVE = 0
|
||||||
|
TRANSFORM = 1
|
||||||
|
|
||||||
|
def split_worker(text, mask, pattern, flags=0):
|
||||||
|
pattern_compile = re.compile(pattern, flags)
|
||||||
|
for res in pattern_compile.finditer(text):
|
||||||
|
mask[res.span()[0]:res.span()[1]] = PRESERVE
|
||||||
|
return text, mask
|
||||||
|
|
||||||
|
def split_worker_reverse_caption(text, mask, pattern, flags=0):
|
||||||
|
pattern_compile = re.compile(pattern, flags)
|
||||||
|
for res in pattern_compile.finditer(text):
|
||||||
|
mask[res.regs[1][0]:res.regs[1][1]] = TRANSFORM
|
||||||
|
return text, mask
|
||||||
|
|
||||||
|
def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=25):
|
||||||
|
pattern_compile = re.compile(pattern, flags)
|
||||||
|
def search_with_line_limit(text, mask):
|
||||||
|
for res in pattern_compile.finditer(text):
|
||||||
|
cmd = res.group(1) # begin{what}
|
||||||
|
this = res.group(2) # content between begin and end
|
||||||
|
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
|
||||||
|
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
|
||||||
|
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
|
||||||
|
if (cmd in white_list) or this.count('\n') >= 42: # use a magical number 42
|
||||||
|
this, this_mask = search_with_line_limit(this, this_mask)
|
||||||
|
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
|
||||||
|
else:
|
||||||
|
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
|
||||||
|
return text, mask
|
||||||
|
return search_with_line_limit(text, mask)
|
||||||
|
|
||||||
|
class LinkedListNode():
|
||||||
|
"""
|
||||||
|
链表单元
|
||||||
|
"""
|
||||||
|
def __init__(self, string, preserve=True) -> None:
|
||||||
|
self.string = string
|
||||||
|
self.preserve = preserve
|
||||||
|
self.next = None
|
||||||
|
self.begin_line = 0
|
||||||
|
self.begin_char = 0
|
||||||
|
|
||||||
|
def convert_to_linklist(text, mask):
|
||||||
|
root = LinkedListNode("", preserve=True)
|
||||||
|
current_node = root
|
||||||
|
for c, m, i in zip(text, mask, range(len(text))):
|
||||||
|
if (m==PRESERVE and current_node.preserve) \
|
||||||
|
or (m==TRANSFORM and not current_node.preserve):
|
||||||
|
# add
|
||||||
|
current_node.string += c
|
||||||
|
else:
|
||||||
|
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
|
||||||
|
current_node = current_node.next
|
||||||
|
return root
|
||||||
|
"""
|
||||||
|
========================================================================
|
||||||
|
Latex 文件融合
|
||||||
|
========================================================================
|
||||||
|
"""
|
||||||
|
|
||||||
def 寻找Latex主文件(file_manifest, mode):
|
def 寻找Latex主文件(file_manifest, mode):
|
||||||
"""
|
"""
|
||||||
在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
|
在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
|
||||||
@ -71,19 +139,15 @@ def merge_tex_files(project_foler, main_file, mode):
|
|||||||
return main_file
|
return main_file
|
||||||
|
|
||||||
|
|
||||||
class LinkedListNode():
|
|
||||||
"""
|
|
||||||
链表单元
|
|
||||||
"""
|
|
||||||
def __init__(self, string, preserve=True) -> None:
|
|
||||||
self.string = string
|
|
||||||
self.preserve = preserve
|
|
||||||
self.next = None
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
========================================================================
|
||||||
|
后处理
|
||||||
|
========================================================================
|
||||||
|
"""
|
||||||
def mod_inbraket(match):
|
def mod_inbraket(match):
|
||||||
"""
|
"""
|
||||||
为啥chatgpt会把cite里面的逗号换成中文逗号呀 艹
|
为啥chatgpt会把cite里面的逗号换成中文逗号呀
|
||||||
"""
|
"""
|
||||||
# get the matched string
|
# get the matched string
|
||||||
cmd = match.group(1)
|
cmd = match.group(1)
|
||||||
@ -98,19 +162,24 @@ def fix_content(final_tex, node_string):
|
|||||||
"""
|
"""
|
||||||
Fix common GPT errors to increase success rate
|
Fix common GPT errors to increase success rate
|
||||||
"""
|
"""
|
||||||
final_tex = final_tex.replace('%', r'\%')
|
final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
|
||||||
final_tex = final_tex.replace(r'\%', r'\\%')
|
|
||||||
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
|
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
|
||||||
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
|
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
|
||||||
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
|
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
|
||||||
|
|
||||||
|
if node_string.count('\\begin') != final_tex.count('\\begin'):
|
||||||
|
final_tex = node_string # 出问题了,还原原文
|
||||||
|
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
|
||||||
|
# walk and replace any _ without \
|
||||||
|
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
|
||||||
if node_string.count('{') != node_string.count('}'):
|
if node_string.count('{') != node_string.count('}'):
|
||||||
if final_tex.count('{') != node_string.count('{'):
|
if final_tex.count('{') != node_string.count('{'):
|
||||||
final_tex = node_string # 出问题了,还原原文
|
final_tex = node_string # 出问题了,还原原文
|
||||||
if final_tex.count('}') != node_string.count('}'):
|
if final_tex.count('}') != node_string.count('}'):
|
||||||
final_tex = node_string # 出问题了,还原原文
|
final_tex = node_string # 出问题了,还原原文
|
||||||
|
|
||||||
return final_tex
|
return final_tex
|
||||||
|
|
||||||
|
|
||||||
class LatexPaperSplit():
|
class LatexPaperSplit():
|
||||||
"""
|
"""
|
||||||
将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理
|
将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理
|
||||||
@ -155,131 +224,62 @@ class LatexPaperSplit():
|
|||||||
"""
|
"""
|
||||||
将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理
|
将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理
|
||||||
"""
|
"""
|
||||||
root = LinkedListNode(txt, False)
|
text = txt
|
||||||
def split_worker(root, pattern, flags=0):
|
mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
|
||||||
lt = root
|
|
||||||
cnt = 0
|
|
||||||
pattern_compile = re.compile(pattern, flags)
|
|
||||||
while True:
|
|
||||||
if not lt.preserve:
|
|
||||||
while True:
|
|
||||||
res = pattern_compile.search(lt.string)
|
|
||||||
if not res: break
|
|
||||||
before = res.string[:res.span()[0]]
|
|
||||||
this = res.group(0)
|
|
||||||
after = res.string[res.span()[1]:]
|
|
||||||
# ======
|
|
||||||
lt.string = before
|
|
||||||
tmp = lt.next
|
|
||||||
# ======
|
|
||||||
mid = LinkedListNode(this, True)
|
|
||||||
lt.next = mid
|
|
||||||
# ======
|
|
||||||
aft = LinkedListNode(after, False)
|
|
||||||
mid.next = aft
|
|
||||||
aft.next = tmp
|
|
||||||
# ======
|
|
||||||
lt = aft
|
|
||||||
lt = lt.next
|
|
||||||
cnt += 1
|
|
||||||
# print(cnt)
|
|
||||||
if lt is None: break
|
|
||||||
|
|
||||||
def split_worker_begin_end(root, pattern, flags=0, limit_n_lines=25):
|
# 吸收title与作者以上的部分
|
||||||
lt = root
|
text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL)
|
||||||
cnt = 0
|
|
||||||
pattern_compile = re.compile(pattern, flags)
|
|
||||||
while True:
|
|
||||||
if not lt.preserve:
|
|
||||||
while True:
|
|
||||||
target_string = lt.string
|
|
||||||
|
|
||||||
def search_with_line_limit(target_string):
|
|
||||||
for res in pattern_compile.finditer(target_string):
|
|
||||||
cmd = res.group(1) # begin{what}
|
|
||||||
this = res.group(2) # content between begin and end
|
|
||||||
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 'em', 'emph', 'textit', 'textbf']
|
|
||||||
if cmd in white_list or this.count('\n') > 25:
|
|
||||||
sub_res = search_with_line_limit(this)
|
|
||||||
if not sub_res: continue
|
|
||||||
else: return sub_res
|
|
||||||
else:
|
|
||||||
return res.group(0)
|
|
||||||
return False
|
|
||||||
# ======
|
|
||||||
# search for first encounter of \begin \end pair with less than 25 lines in the middle
|
|
||||||
ps = search_with_line_limit(target_string)
|
|
||||||
if not ps: break
|
|
||||||
res = re.search(re.escape(ps), target_string, flags)
|
|
||||||
if not res: assert False
|
|
||||||
before = res.string[:res.span()[0]]
|
|
||||||
this = res.group(0)
|
|
||||||
after = res.string[res.span()[1]:]
|
|
||||||
# ======
|
|
||||||
lt.string = before
|
|
||||||
tmp = lt.next
|
|
||||||
# ======
|
|
||||||
mid = LinkedListNode(this, True)
|
|
||||||
lt.next = mid
|
|
||||||
# ======
|
|
||||||
aft = LinkedListNode(after, False)
|
|
||||||
mid.next = aft
|
|
||||||
aft.next = tmp
|
|
||||||
# ======
|
|
||||||
lt = aft
|
|
||||||
lt = lt.next
|
|
||||||
cnt += 1
|
|
||||||
# print(cnt)
|
|
||||||
if lt is None: break
|
|
||||||
|
|
||||||
|
|
||||||
# root 是链表的头
|
|
||||||
print('正在分解Latex源文件,构建链表结构')
|
|
||||||
# 删除iffalse注释
|
# 删除iffalse注释
|
||||||
split_worker(root, r"\\iffalse(.*?)\\fi", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
|
||||||
# 吸收在25行以内的begin-end组合
|
# 吸收在25行以内的begin-end组合
|
||||||
split_worker_begin_end(root, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
|
text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
|
||||||
# 吸收匿名公式
|
# 吸收匿名公式
|
||||||
split_worker(root, r"\$\$(.*?)\$\$", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
|
||||||
# 吸收其他杂项
|
# 吸收其他杂项
|
||||||
split_worker(root, r"(.*?)\\maketitle", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\section\{(.*?)\}")
|
||||||
split_worker(root, r"\\section\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}")
|
||||||
split_worker(root, r"\\section\*\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}")
|
||||||
split_worker(root, r"\\subsection\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}")
|
||||||
split_worker(root, r"\\subsubsection\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}")
|
||||||
split_worker(root, r"\\bibliography\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}")
|
||||||
split_worker(root, r"\\bibliographystyle\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
|
text, mask = split_worker(text, mask, r"\\item ")
|
||||||
split_worker(root, r"\\item ")
|
text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
|
||||||
split_worker(root, r"\\label\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
|
||||||
split_worker(root, r"\\begin\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
|
||||||
split_worker(root, r"\\vspace\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
|
||||||
split_worker(root, r"\\hspace\{(.*?)\}")
|
text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
|
||||||
split_worker(root, r"\\end\{(.*?)\}")
|
# text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
|
||||||
|
root = convert_to_linklist(text, mask)
|
||||||
node = root
|
# 将分解结果返回 res_to_t
|
||||||
while True:
|
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
|
||||||
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
|
res_to_t = []
|
||||||
if len(node.string.strip('\n').strip(''))<50: node.preserve = True
|
node = root
|
||||||
node = node.next
|
while True:
|
||||||
if node is None: break
|
show_html = node.string.replace('\n','<br/>')
|
||||||
|
if not node.preserve:
|
||||||
|
res_to_t.append(node.string)
|
||||||
|
f.write(f'<p style="color:black;">#{show_html}#</p>')
|
||||||
|
else:
|
||||||
|
f.write(f'<p style="color:red;">{show_html}</p>')
|
||||||
|
node = node.next
|
||||||
|
if node is None: break
|
||||||
# 修复括号
|
# 修复括号
|
||||||
node = root
|
node = root
|
||||||
while True:
|
while True:
|
||||||
@ -295,7 +295,7 @@ class LatexPaperSplit():
|
|||||||
str_stack.append('{')
|
str_stack.append('{')
|
||||||
elif c == '}':
|
elif c == '}':
|
||||||
if len(str_stack) == 1:
|
if len(str_stack) == 1:
|
||||||
print('stack kill')
|
print('stack fix')
|
||||||
return i
|
return i
|
||||||
str_stack.pop(-1)
|
str_stack.pop(-1)
|
||||||
else:
|
else:
|
||||||
@ -322,7 +322,7 @@ class LatexPaperSplit():
|
|||||||
node = root
|
node = root
|
||||||
while True:
|
while True:
|
||||||
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
|
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
|
||||||
if len(node.string.strip('\n').strip(''))<50: node.preserve = True
|
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
|
||||||
node = node.next
|
node = node.next
|
||||||
if node is None: break
|
if node is None: break
|
||||||
|
|
||||||
@ -418,7 +418,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
|
|||||||
maintex = 寻找Latex主文件(file_manifest, mode)
|
maintex = 寻找Latex主文件(file_manifest, mode)
|
||||||
chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
|
chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
time.sleep(5)
|
time.sleep(3)
|
||||||
|
|
||||||
# <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
|
# <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
|
||||||
main_tex_basename = os.path.basename(maintex)
|
main_tex_basename = os.path.basename(maintex)
|
||||||
@ -529,7 +529,7 @@ def compile_latex_with_timeout(command, timeout=60):
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def 编译Latex差别(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder):
|
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder):
|
||||||
import os, time
|
import os, time
|
||||||
current_dir = os.getcwd()
|
current_dir = os.getcwd()
|
||||||
n_fix = 1
|
n_fix = 1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user