avoid most compile failure
This commit is contained in:
parent
70ee810133
commit
b745074160
@ -1,6 +1,6 @@
|
|||||||
from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
|
from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
|
||||||
from toolbox import zip_folder, objdump, objload
|
from toolbox import zip_folder, objdump, objload
|
||||||
import os
|
import os, shutil
|
||||||
import re
|
import re
|
||||||
pj = os.path.join
|
pj = os.path.join
|
||||||
|
|
||||||
@ -96,6 +96,12 @@ def fix_content(final_tex, node_string):
|
|||||||
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
|
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
|
||||||
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
|
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
|
||||||
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
|
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
|
||||||
|
if node_string.count('{') != node_string.count('}'):
|
||||||
|
if final_tex.count('{') != node_string.count('{'):
|
||||||
|
final_tex = node_string # 出问题了,还原原文
|
||||||
|
if final_tex.count('}') != node_string.count('}'):
|
||||||
|
final_tex = node_string # 出问题了,还原原文
|
||||||
|
|
||||||
return final_tex
|
return final_tex
|
||||||
|
|
||||||
class LatexPaperSplit():
|
class LatexPaperSplit():
|
||||||
@ -138,7 +144,7 @@ class LatexPaperSplit():
|
|||||||
pass
|
pass
|
||||||
return result_string
|
return result_string
|
||||||
|
|
||||||
def split(self, txt):
|
def split(self, txt, project_folder):
|
||||||
"""
|
"""
|
||||||
将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理
|
将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理
|
||||||
"""
|
"""
|
||||||
@ -146,10 +152,11 @@ class LatexPaperSplit():
|
|||||||
def split_worker(root, pattern, flags=0):
|
def split_worker(root, pattern, flags=0):
|
||||||
lt = root
|
lt = root
|
||||||
cnt = 0
|
cnt = 0
|
||||||
|
pattern_compile = re.compile(pattern, flags)
|
||||||
while True:
|
while True:
|
||||||
if not lt.preserve:
|
if not lt.preserve:
|
||||||
while True:
|
while True:
|
||||||
res = re.search(pattern, lt.string, flags)
|
res = pattern_compile.search(lt.string)
|
||||||
if not res: break
|
if not res: break
|
||||||
before = res.string[:res.span()[0]]
|
before = res.string[:res.span()[0]]
|
||||||
this = res.group(0)
|
this = res.group(0)
|
||||||
@ -180,20 +187,93 @@ class LatexPaperSplit():
|
|||||||
# print(cnt)
|
# print(cnt)
|
||||||
if lt is None: break
|
if lt is None: break
|
||||||
|
|
||||||
|
def split_worker_begin_end(root, pattern, flags=0, limit_n_lines=25):
|
||||||
|
lt = root
|
||||||
|
cnt = 0
|
||||||
|
pattern_compile = re.compile(pattern, flags)
|
||||||
|
while True:
|
||||||
|
if not lt.preserve:
|
||||||
|
while True:
|
||||||
|
target_string = lt.string
|
||||||
|
|
||||||
|
def search_with_line_limit(target_string):
|
||||||
|
for res in pattern_compile.finditer(target_string):
|
||||||
|
cmd = res.group(1) # begin{what}
|
||||||
|
this = res.group(2) # content between begin and end
|
||||||
|
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof']
|
||||||
|
if cmd in white_list or this.count('\n') > 25:
|
||||||
|
sub_res = search_with_line_limit(this)
|
||||||
|
if not sub_res: continue
|
||||||
|
else: return sub_res
|
||||||
|
else:
|
||||||
|
return res.group(0)
|
||||||
|
return False
|
||||||
|
# ======
|
||||||
|
# search for first encounter of \begin \end pair with less than 25 lines in the middle
|
||||||
|
this = search_with_line_limit(target_string)
|
||||||
|
if not this: break
|
||||||
|
before, after = target_string.split(this)
|
||||||
|
# ======
|
||||||
|
if before.endswith('\n'):
|
||||||
|
this = '\n' + this
|
||||||
|
before = before[:-1]
|
||||||
|
if after.startswith('\n'):
|
||||||
|
# move \n
|
||||||
|
this = this + '\n'
|
||||||
|
after = after[1:]
|
||||||
|
# ======
|
||||||
|
lt.string = before
|
||||||
|
tmp = lt.next
|
||||||
|
# ======
|
||||||
|
mid = LinkedListNode(this, True)
|
||||||
|
lt.next = mid
|
||||||
|
# ======
|
||||||
|
aft = LinkedListNode(after, False)
|
||||||
|
mid.next = aft
|
||||||
|
aft.next = tmp
|
||||||
|
# ======
|
||||||
|
lt = aft
|
||||||
|
lt = lt.next
|
||||||
|
cnt += 1
|
||||||
|
# print(cnt)
|
||||||
|
if lt is None: break
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# root 是链表的头
|
# root 是链表的头
|
||||||
print('正在分解Latex源文件,构建链表结构')
|
print('正在分解Latex源文件,构建链表结构')
|
||||||
|
|
||||||
|
split_worker_begin_end(root, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
|
||||||
|
# 将分解结果返回 res_to_t
|
||||||
|
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
|
||||||
|
res_to_t = []
|
||||||
|
node = root
|
||||||
|
while True:
|
||||||
|
show_html = node.string.replace('\n','<br/>')
|
||||||
|
if not node.preserve:
|
||||||
|
res_to_t.append(node.string)
|
||||||
|
f.write(f'<p style="color:black;">{show_html}</p>')
|
||||||
|
else:
|
||||||
|
f.write(f'<p style="color:red;">{show_html}</p>')
|
||||||
|
node = node.next
|
||||||
|
if node is None: break
|
||||||
|
|
||||||
split_worker(root, r"(.*?)\\maketitle", re.DOTALL)
|
split_worker(root, r"(.*?)\\maketitle", re.DOTALL)
|
||||||
split_worker(root, r"\\section\{(.*?)\}")
|
split_worker(root, r"\\section\{(.*?)\}")
|
||||||
|
split_worker(root, r"\\section\*\{(.*?)\}")
|
||||||
split_worker(root, r"\\subsection\{(.*?)\}")
|
split_worker(root, r"\\subsection\{(.*?)\}")
|
||||||
split_worker(root, r"\\subsubsection\{(.*?)\}")
|
split_worker(root, r"\\subsubsection\{(.*?)\}")
|
||||||
split_worker(root, r"\\bibliography\{(.*?)\}")
|
split_worker(root, r"\\bibliography\{(.*?)\}")
|
||||||
split_worker(root, r"\\bibliographystyle\{(.*?)\}")
|
split_worker(root, r"\\bibliographystyle\{(.*?)\}")
|
||||||
split_worker(root, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
|
split_worker(root, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
|
||||||
|
split_worker(root, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
|
split_worker(root, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
|
split_worker(root, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
|
split_worker(root, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
|
split_worker(root, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
|
split_worker(root, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
|
||||||
|
split_worker(root, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
|
||||||
|
split_worker(root, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
|
split_worker(root, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
|
split_worker(root, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
|
||||||
split_worker(root, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
|
split_worker(root, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
|
||||||
@ -207,17 +287,62 @@ class LatexPaperSplit():
|
|||||||
split_worker(root, r"\\begin\{(.*?)\}")
|
split_worker(root, r"\\begin\{(.*?)\}")
|
||||||
split_worker(root, r"\\end\{(.*?)\}")
|
split_worker(root, r"\\end\{(.*?)\}")
|
||||||
|
|
||||||
res = []
|
|
||||||
node = root
|
node = root
|
||||||
while True:
|
while True:
|
||||||
res.append((node.string, node.preserve))
|
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
|
||||||
|
if len(node.string.strip('\n').strip(''))<50: node.preserve = True
|
||||||
|
node = node.next
|
||||||
|
if node is None: break
|
||||||
|
|
||||||
|
# 修复括号
|
||||||
|
node = root
|
||||||
|
while True:
|
||||||
|
string = node.string
|
||||||
|
if node.preserve:
|
||||||
|
node = node.next
|
||||||
|
if node is None: break
|
||||||
|
continue
|
||||||
|
def break_check(string):
|
||||||
|
str_stack = [""] # (lv, index)
|
||||||
|
for i, c in enumerate(string):
|
||||||
|
if c == '{':
|
||||||
|
str_stack.append('{')
|
||||||
|
elif c == '}':
|
||||||
|
if len(str_stack) == 1:
|
||||||
|
print('stack kill')
|
||||||
|
return i
|
||||||
|
str_stack.pop(-1)
|
||||||
|
else:
|
||||||
|
str_stack[-1] += c
|
||||||
|
return -1
|
||||||
|
bp = break_check(string)
|
||||||
|
|
||||||
|
if bp == -1:
|
||||||
|
pass
|
||||||
|
elif bp == 0:
|
||||||
|
node.string = string[:1]
|
||||||
|
q = LinkedListNode(string[1:], False)
|
||||||
|
q.next = node.next
|
||||||
|
node.next = q
|
||||||
|
else:
|
||||||
|
node.string = string[:bp]
|
||||||
|
q = LinkedListNode(string[bp:], False)
|
||||||
|
q.next = node.next
|
||||||
|
node.next = q
|
||||||
|
|
||||||
|
node = node.next
|
||||||
|
if node is None: break
|
||||||
|
|
||||||
|
node = root
|
||||||
|
while True:
|
||||||
|
|
||||||
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
|
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
|
||||||
if len(node.string.strip('\n').strip(''))<50: node.preserve = True
|
if len(node.string.strip('\n').strip(''))<50: node.preserve = True
|
||||||
node = node.next
|
node = node.next
|
||||||
if node is None: break
|
if node is None: break
|
||||||
|
|
||||||
# 将分解结果返回 res_to_t
|
# 将分解结果返回 res_to_t
|
||||||
with open('debug_log.html', 'w', encoding='utf8') as f:
|
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
|
||||||
res_to_t = []
|
res_to_t = []
|
||||||
node = root
|
node = root
|
||||||
while True:
|
while True:
|
||||||
@ -299,6 +424,15 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
|
|||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
# <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
|
# <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
|
||||||
|
main_tex_basename = os.path.basename(maintex)
|
||||||
|
assert main_tex_basename.endswith('.tex')
|
||||||
|
main_tex_basename_bare = main_tex_basename[:-4]
|
||||||
|
may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
|
||||||
|
if os.path.exists(may_exist_bbl):
|
||||||
|
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
|
||||||
|
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
|
||||||
|
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
|
||||||
|
|
||||||
with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
|
with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
merged_content = merge_tex_files(project_folder, content, mode)
|
merged_content = merge_tex_files(project_folder, content, mode)
|
||||||
@ -308,7 +442,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
|
|||||||
|
|
||||||
# <-------- 精细切分latex文件 ---------->
|
# <-------- 精细切分latex文件 ---------->
|
||||||
lps = LatexPaperSplit()
|
lps = LatexPaperSplit()
|
||||||
res = lps.split(merged_content)
|
res = lps.split(merged_content, project_folder)
|
||||||
|
|
||||||
# <-------- 拆分过长的latex片段 ---------->
|
# <-------- 拆分过长的latex片段 ---------->
|
||||||
pfg = LatexPaperFileGroup()
|
pfg = LatexPaperFileGroup()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user