修正误判latex模板文件的bug
This commit is contained in:
parent
d841d13b04
commit
b8560b7510
@ -190,7 +190,6 @@ def test_Latex():
|
|||||||
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
|
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
|
||||||
# txt = r"https://arxiv.org/abs/2002.09253"
|
# txt = r"https://arxiv.org/abs/2002.09253"
|
||||||
# txt = r"https://arxiv.org/abs/2306.07831"
|
# txt = r"https://arxiv.org/abs/2306.07831"
|
||||||
# txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result"
|
|
||||||
# txt = r"https://arxiv.org/abs/2212.10156"
|
# txt = r"https://arxiv.org/abs/2212.10156"
|
||||||
txt = r"https://arxiv.org/abs/2211.11559"
|
txt = r"https://arxiv.org/abs/2211.11559"
|
||||||
|
|
||||||
|
@ -110,19 +110,41 @@ Latex Merge File
|
|||||||
def 寻找Latex主文件(file_manifest, mode):
|
def 寻找Latex主文件(file_manifest, mode):
|
||||||
"""
|
"""
|
||||||
在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
|
在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
|
||||||
P.S. 但愿没人把latex模板放在里面传进来
|
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
|
||||||
"""
|
"""
|
||||||
|
canidates = []
|
||||||
for texf in file_manifest:
|
for texf in file_manifest:
|
||||||
if os.path.basename(texf).startswith('merge'):
|
if os.path.basename(texf).startswith('merge'):
|
||||||
continue
|
continue
|
||||||
with open(texf, 'r', encoding='utf8') as f:
|
with open(texf, 'r', encoding='utf8') as f:
|
||||||
file_content = f.read()
|
file_content = f.read()
|
||||||
if r'\documentclass' in file_content:
|
if r'\documentclass' in file_content:
|
||||||
return texf
|
canidates.append(texf)
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
|
|
||||||
|
|
||||||
|
if len(canidates) == 0:
|
||||||
|
raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
|
||||||
|
elif len(canidates) == 1:
|
||||||
|
return canidates[0]
|
||||||
|
else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
|
||||||
|
canidates_score = []
|
||||||
|
# 给出一些判定模板文档的词作为扣分项
|
||||||
|
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
|
||||||
|
expected_words = ['\input', '\ref', '\cite']
|
||||||
|
for texf in canidates:
|
||||||
|
canidates_score.append(0)
|
||||||
|
with open(texf, 'r', encoding='utf8') as f:
|
||||||
|
file_content = f.read()
|
||||||
|
for uw in unexpected_words:
|
||||||
|
if uw in file_content:
|
||||||
|
canidates_score[-1] -= 1
|
||||||
|
for uw in expected_words:
|
||||||
|
if uw in file_content:
|
||||||
|
canidates_score[-1] += 1
|
||||||
|
select = np.argmax(canidates_score) # 取评分最高者返回
|
||||||
|
return canidates[select]
|
||||||
|
|
||||||
def rm_comments(main_file):
|
def rm_comments(main_file):
|
||||||
new_file_remove_comment_lines = []
|
new_file_remove_comment_lines = []
|
||||||
for l in main_file.splitlines():
|
for l in main_file.splitlines():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user