From b8560b75101437f7ab13e478c63d6a412d815790 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Sun, 25 Jun 2023 22:46:16 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E8=AF=AF=E5=88=A4latex?=
 =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E6=96=87=E4=BB=B6=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/crazy_functions_test.py |  1 -
 crazy_functions/latex_utils.py          | 28 ++++++++++++++++++++++---
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 6e17fb3..7edd04f 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -190,7 +190,6 @@ def test_Latex():
     # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
     # txt = r"https://arxiv.org/abs/2002.09253"
     # txt = r"https://arxiv.org/abs/2306.07831"
-    # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result"
     # txt = r"https://arxiv.org/abs/2212.10156"
     txt = r"https://arxiv.org/abs/2211.11559"
     
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 48df10b..def4be2 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -110,19 +110,41 @@ Latex Merge File
 def 寻找Latex主文件(file_manifest, mode):
     """
     在多Tex文档中，寻找主文件，必须包含documentclass，返回找到的第一个。
-    P.S. 但愿没人把latex模板放在里面传进来
+    P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
     """
+    canidates = []
     for texf in file_manifest:
         if os.path.basename(texf).startswith('merge'):
             continue
         with open(texf, 'r', encoding='utf8') as f:
             file_content = f.read()
         if r'\documentclass' in file_content:
-            return texf
+            canidates.append(texf)
         else:
             continue
-    raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
 
+    if len(canidates) == 0:
+        raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
+    elif len(canidates) == 1:
+        return canidates[0]
+    else: # if len(canidates) >= 2 通过一些Latex模板中常见（但通常不会出现在正文）的单词，对不同latex源文件扣分，取评分最高者返回
+        canidates_score = []
+        # 给出一些判定模板文档的词作为扣分项
+        unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
+        expected_words = ['\input', '\ref', '\cite']
+        for texf in canidates:
+            canidates_score.append(0)
+            with open(texf, 'r', encoding='utf8') as f:
+                file_content = f.read()
+            for uw in unexpected_words:
+                if uw in file_content:
+                    canidates_score[-1] -= 1
+            for uw in expected_words:
+                if uw in file_content:
+                    canidates_score[-1] += 1
+        select = np.argmax(canidates_score) # 取评分最高者返回
+        return canidates[select]
+    
 def rm_comments(main_file):
     new_file_remove_comment_lines = []
     for l in main_file.splitlines():