修正误判latex模板文件的bug

2023-06-25 22:46:16 +08:00 · 2023-06-25 22:46:16 +08:00 · b8560b7510
commit b8560b7510
parent d841d13b04
2 changed files with 25 additions and 4 deletions
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@ -190,7 +190,6 @@ def test_Latex():
    # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
    # txt = r"https://arxiv.org/abs/2002.09253"
    # txt = r"https://arxiv.org/abs/2306.07831"
    # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result"
    # txt = r"https://arxiv.org/abs/2212.10156"
    txt = r"https://arxiv.org/abs/2211.11559"
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@ -110,19 +110,41 @@ Latex Merge File
 def 寻找Latex主文件(file_manifest, mode):
    """
    在多Tex文档中，寻找主文件，必须包含documentclass，返回找到的第一个。
-    P.S. 但愿没人把latex模板放在里面传进来
+    P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
    """
    canidates = []
    for texf in file_manifest:
        if os.path.basename(texf).startswith('merge'):
            continue
        with open(texf, 'r', encoding='utf8') as f:
            file_content = f.read()
        if r'\documentclass' in file_content:
-            return texf
+            canidates.append(texf)
        else:
            continue
    raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
    if len(canidates) == 0:
        raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
    elif len(canidates) == 1:
        return canidates[0]
    else: # if len(canidates) >= 2 通过一些Latex模板中常见（但通常不会出现在正文）的单词，对不同latex源文件扣分，取评分最高者返回
        canidates_score = []
        # 给出一些判定模板文档的词作为扣分项
        unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
        expected_words = ['\input', '\ref', '\cite']
        for texf in canidates:
            canidates_score.append(0)
            with open(texf, 'r', encoding='utf8') as f:
                file_content = f.read()
            for uw in unexpected_words:
                if uw in file_content:
                    canidates_score[-1] -= 1
            for uw in expected_words:
                if uw in file_content:
                    canidates_score[-1] += 1
        select = np.argmax(canidates_score) # 取评分最高者返回
        return canidates[select]
 def rm_comments(main_file):
    new_file_remove_comment_lines = []
    for l in main_file.splitlines():