From b8560b75101437f7ab13e478c63d6a412d815790 Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Sun, 25 Jun 2023 22:46:16 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E8=AF=AF=E5=88=A4latex?= =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E6=96=87=E4=BB=B6=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/crazy_functions_test.py | 1 - crazy_functions/latex_utils.py | 28 ++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index 6e17fb3..7edd04f 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -190,7 +190,6 @@ def test_Latex(): # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE # txt = r"https://arxiv.org/abs/2002.09253" # txt = r"https://arxiv.org/abs/2306.07831" - # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result" # txt = r"https://arxiv.org/abs/2212.10156" txt = r"https://arxiv.org/abs/2211.11559" diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 48df10b..def4be2 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -110,19 +110,41 @@ Latex Merge File def 寻找Latex主文件(file_manifest, mode): """ 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。 - P.S. 但愿没人把latex模板放在里面传进来 + P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码) """ + canidates = [] for texf in file_manifest: if os.path.basename(texf).startswith('merge'): continue with open(texf, 'r', encoding='utf8') as f: file_content = f.read() if r'\documentclass' in file_content: - return texf + canidates.append(texf) else: continue - raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') + if len(canidates) == 0: + raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') + elif len(canidates) == 1: + return canidates[0] + else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回 + canidates_score = [] + # 给出一些判定模板文档的词作为扣分项 + unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers'] + expected_words = ['\input', '\ref', '\cite'] + for texf in canidates: + canidates_score.append(0) + with open(texf, 'r', encoding='utf8') as f: + file_content = f.read() + for uw in unexpected_words: + if uw in file_content: + canidates_score[-1] -= 1 + for uw in expected_words: + if uw in file_content: + canidates_score[-1] += 1 + select = np.argmax(canidates_score) # 取评分最高者返回 + return canidates[select] + def rm_comments(main_file): new_file_remove_comment_lines = [] for l in main_file.splitlines():