增强Latex修复能力
This commit is contained in:
		
							parent
							
								
									067bc97cce
								
							
						
					
					
						commit
						4f4c09a5f3
					
				@ -157,7 +157,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
 | 
				
			|||||||
    try:
 | 
					    try:
 | 
				
			||||||
        import glob, os, time, subprocess
 | 
					        import glob, os, time, subprocess
 | 
				
			||||||
        subprocess.Popen(['pdflatex', '-version'])
 | 
					        subprocess.Popen(['pdflatex', '-version'])
 | 
				
			||||||
        from .latex_utils import Latex精细分解与转化, 编译Latex
 | 
					        from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
 | 
				
			||||||
    except Exception as e:
 | 
					    except Exception as e:
 | 
				
			||||||
        chatbot.append([ f"解析项目: {txt}",
 | 
					        chatbot.append([ f"解析项目: {txt}",
 | 
				
			||||||
            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
 | 
					            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
 | 
				
			||||||
@ -234,7 +234,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
 | 
				
			|||||||
    try:
 | 
					    try:
 | 
				
			||||||
        import glob, os, time, subprocess
 | 
					        import glob, os, time, subprocess
 | 
				
			||||||
        subprocess.Popen(['pdflatex', '-version'])
 | 
					        subprocess.Popen(['pdflatex', '-version'])
 | 
				
			||||||
        from .latex_utils import Latex精细分解与转化, 编译Latex
 | 
					        from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
 | 
				
			||||||
    except Exception as e:
 | 
					    except Exception as e:
 | 
				
			||||||
        chatbot.append([ f"解析项目: {txt}",
 | 
					        chatbot.append([ f"解析项目: {txt}",
 | 
				
			||||||
            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
 | 
					            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
 | 
				
			||||||
 | 
				
			|||||||
@ -195,9 +195,10 @@ def test_Latex():
 | 
				
			|||||||
    # txt = r"https://arxiv.org/abs/2303.08774"
 | 
					    # txt = r"https://arxiv.org/abs/2303.08774"
 | 
				
			||||||
    # txt = r"https://arxiv.org/abs/2303.12712"
 | 
					    # txt = r"https://arxiv.org/abs/2303.12712"
 | 
				
			||||||
    # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
 | 
					    # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
 | 
				
			||||||
    txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误!
 | 
					    # txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误!
 | 
				
			||||||
    
 | 
					    # txt = "https://arxiv.org/abs/2205.14135"
 | 
				
			||||||
 | 
					    # txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
 | 
				
			||||||
 | 
					    txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
 | 
				
			||||||
    for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
 | 
					    for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
 | 
				
			||||||
        cli_printer.print(cb)   #  print(cb)
 | 
					        cli_printer.print(cb)   #  print(cb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -240,7 +241,7 @@ if __name__ == "__main__":
 | 
				
			|||||||
    # test_数学动画生成manim()
 | 
					    # test_数学动画生成manim()
 | 
				
			||||||
    # test_Langchain知识库()
 | 
					    # test_Langchain知识库()
 | 
				
			||||||
    # test_Langchain知识库读取()
 | 
					    # test_Langchain知识库读取()
 | 
				
			||||||
    # test_Latex()
 | 
					    test_Latex()
 | 
				
			||||||
    test_chatglm_finetune()
 | 
					    # test_chatglm_finetune()
 | 
				
			||||||
    input("程序完成,回车退出。")
 | 
					    input("程序完成,回车退出。")
 | 
				
			||||||
    print("退出。")
 | 
					    print("退出。")
 | 
				
			||||||
@ -1,320 +1,16 @@
 | 
				
			|||||||
from toolbox import update_ui, update_ui_lastest_msg    # 刷新Gradio前端界面
 | 
					from toolbox import update_ui, update_ui_lastest_msg    # 刷新Gradio前端界面
 | 
				
			||||||
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
 | 
					from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
 | 
				
			||||||
 | 
					from .latex_toolbox import PRESERVE, TRANSFORM
 | 
				
			||||||
 | 
					from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
 | 
				
			||||||
 | 
					from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
 | 
				
			||||||
 | 
					from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os, shutil
 | 
					import os, shutil
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pj = os.path.join
 | 
					pj = os.path.join
 | 
				
			||||||
 | 
					buggy_line_surgery_n_lines = 10
 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
========================================================================
 | 
					 | 
				
			||||||
Part One
 | 
					 | 
				
			||||||
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
 | 
					 | 
				
			||||||
========================================================================
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
PRESERVE = 0
 | 
					 | 
				
			||||||
TRANSFORM = 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def set_forbidden_text(text, mask, pattern, flags=0):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Add a preserve text area in this paper
 | 
					 | 
				
			||||||
    e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
 | 
					 | 
				
			||||||
    you can mask out (mask = PRESERVE so that text become untouchable for GPT) 
 | 
					 | 
				
			||||||
    everything between "\begin{equation}" and "\end{equation}"
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    if isinstance(pattern, list): pattern = '|'.join(pattern)
 | 
					 | 
				
			||||||
    pattern_compile = re.compile(pattern, flags)
 | 
					 | 
				
			||||||
    for res in pattern_compile.finditer(text):
 | 
					 | 
				
			||||||
        mask[res.span()[0]:res.span()[1]] = PRESERVE
 | 
					 | 
				
			||||||
    return text, mask
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Move area out of preserve area (make text editable for GPT)
 | 
					 | 
				
			||||||
    count the number of the braces so as to catch compelete text area. 
 | 
					 | 
				
			||||||
    e.g.
 | 
					 | 
				
			||||||
    \begin{abstract} blablablablablabla. \end{abstract} 
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    if isinstance(pattern, list): pattern = '|'.join(pattern)
 | 
					 | 
				
			||||||
    pattern_compile = re.compile(pattern, flags)
 | 
					 | 
				
			||||||
    for res in pattern_compile.finditer(text):
 | 
					 | 
				
			||||||
        if not forbid_wrapper:
 | 
					 | 
				
			||||||
            mask[res.span()[0]:res.span()[1]] = TRANSFORM
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE   # '\\begin{abstract}'
 | 
					 | 
				
			||||||
            mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM   # abstract
 | 
					 | 
				
			||||||
            mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE   # abstract
 | 
					 | 
				
			||||||
    return text, mask
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Add a preserve text area in this paper (text become untouchable for GPT).
 | 
					 | 
				
			||||||
    count the number of the braces so as to catch compelete text area. 
 | 
					 | 
				
			||||||
    e.g.
 | 
					 | 
				
			||||||
    \caption{blablablablabla\texbf{blablabla}blablabla.} 
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    pattern_compile = re.compile(pattern, flags)
 | 
					 | 
				
			||||||
    for res in pattern_compile.finditer(text):
 | 
					 | 
				
			||||||
        brace_level = -1
 | 
					 | 
				
			||||||
        p = begin = end = res.regs[0][0]
 | 
					 | 
				
			||||||
        for _ in range(1024*16):
 | 
					 | 
				
			||||||
            if text[p] == '}' and brace_level == 0: break
 | 
					 | 
				
			||||||
            elif text[p] == '}':  brace_level -= 1
 | 
					 | 
				
			||||||
            elif text[p] == '{':  brace_level += 1
 | 
					 | 
				
			||||||
            p += 1
 | 
					 | 
				
			||||||
        end = p+1
 | 
					 | 
				
			||||||
        mask[begin:end] = PRESERVE
 | 
					 | 
				
			||||||
    return text, mask
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Move area out of preserve area (make text editable for GPT)
 | 
					 | 
				
			||||||
    count the number of the braces so as to catch compelete text area. 
 | 
					 | 
				
			||||||
    e.g.
 | 
					 | 
				
			||||||
    \caption{blablablablabla\texbf{blablabla}blablabla.} 
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    pattern_compile = re.compile(pattern, flags)
 | 
					 | 
				
			||||||
    for res in pattern_compile.finditer(text):
 | 
					 | 
				
			||||||
        brace_level = 0
 | 
					 | 
				
			||||||
        p = begin = end = res.regs[1][0]
 | 
					 | 
				
			||||||
        for _ in range(1024*16):
 | 
					 | 
				
			||||||
            if text[p] == '}' and brace_level == 0: break
 | 
					 | 
				
			||||||
            elif text[p] == '}':  brace_level -= 1
 | 
					 | 
				
			||||||
            elif text[p] == '{':  brace_level += 1
 | 
					 | 
				
			||||||
            p += 1
 | 
					 | 
				
			||||||
        end = p
 | 
					 | 
				
			||||||
        mask[begin:end] = TRANSFORM
 | 
					 | 
				
			||||||
        if forbid_wrapper:
 | 
					 | 
				
			||||||
            mask[res.regs[0][0]:begin] = PRESERVE
 | 
					 | 
				
			||||||
            mask[end:res.regs[0][1]] = PRESERVE
 | 
					 | 
				
			||||||
    return text, mask
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
 | 
					 | 
				
			||||||
    Add it to preserve area
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    pattern_compile = re.compile(pattern, flags)
 | 
					 | 
				
			||||||
    def search_with_line_limit(text, mask):
 | 
					 | 
				
			||||||
        for res in pattern_compile.finditer(text):
 | 
					 | 
				
			||||||
            cmd = res.group(1)  # begin{what}
 | 
					 | 
				
			||||||
            this = res.group(2) # content between begin and end
 | 
					 | 
				
			||||||
            this_mask = mask[res.regs[2][0]:res.regs[2][1]]
 | 
					 | 
				
			||||||
            white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 
 | 
					 | 
				
			||||||
                          'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
 | 
					 | 
				
			||||||
            if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
 | 
					 | 
				
			||||||
                this, this_mask = search_with_line_limit(this, this_mask)
 | 
					 | 
				
			||||||
                mask[res.regs[2][0]:res.regs[2][1]] = this_mask
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
 | 
					 | 
				
			||||||
        return text, mask
 | 
					 | 
				
			||||||
    return search_with_line_limit(text, mask) 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class LinkedListNode():
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Linked List Node
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    def __init__(self, string, preserve=True) -> None:
 | 
					 | 
				
			||||||
        self.string = string
 | 
					 | 
				
			||||||
        self.preserve = preserve
 | 
					 | 
				
			||||||
        self.next = None
 | 
					 | 
				
			||||||
        # self.begin_line = 0
 | 
					 | 
				
			||||||
        # self.begin_char = 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def convert_to_linklist(text, mask):
 | 
					 | 
				
			||||||
    root = LinkedListNode("", preserve=True)
 | 
					 | 
				
			||||||
    current_node = root
 | 
					 | 
				
			||||||
    for c, m, i in zip(text, mask, range(len(text))):
 | 
					 | 
				
			||||||
        if (m==PRESERVE and current_node.preserve) \
 | 
					 | 
				
			||||||
            or (m==TRANSFORM and not current_node.preserve):
 | 
					 | 
				
			||||||
            # add
 | 
					 | 
				
			||||||
            current_node.string += c
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
 | 
					 | 
				
			||||||
            current_node = current_node.next
 | 
					 | 
				
			||||||
    return root
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
========================================================================
 | 
					 | 
				
			||||||
Latex Merge File
 | 
					 | 
				
			||||||
========================================================================
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def 寻找Latex主文件(file_manifest, mode):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
 | 
					 | 
				
			||||||
    P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    canidates = []
 | 
					 | 
				
			||||||
    for texf in file_manifest:
 | 
					 | 
				
			||||||
        if os.path.basename(texf).startswith('merge'):
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
        with open(texf, 'r', encoding='utf8', errors='ignore') as f:
 | 
					 | 
				
			||||||
            file_content = f.read()
 | 
					 | 
				
			||||||
        if r'\documentclass' in file_content:
 | 
					 | 
				
			||||||
            canidates.append(texf)
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if len(canidates) == 0:
 | 
					 | 
				
			||||||
        raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
 | 
					 | 
				
			||||||
    elif len(canidates) == 1:
 | 
					 | 
				
			||||||
        return canidates[0]
 | 
					 | 
				
			||||||
    else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
 | 
					 | 
				
			||||||
        canidates_score = []
 | 
					 | 
				
			||||||
        # 给出一些判定模板文档的词作为扣分项
 | 
					 | 
				
			||||||
        unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
 | 
					 | 
				
			||||||
        expected_words = ['\input', '\ref', '\cite']
 | 
					 | 
				
			||||||
        for texf in canidates:
 | 
					 | 
				
			||||||
            canidates_score.append(0)
 | 
					 | 
				
			||||||
            with open(texf, 'r', encoding='utf8', errors='ignore') as f:
 | 
					 | 
				
			||||||
                file_content = f.read()
 | 
					 | 
				
			||||||
            for uw in unexpected_words:
 | 
					 | 
				
			||||||
                if uw in file_content:
 | 
					 | 
				
			||||||
                    canidates_score[-1] -= 1
 | 
					 | 
				
			||||||
            for uw in expected_words:
 | 
					 | 
				
			||||||
                if uw in file_content:
 | 
					 | 
				
			||||||
                    canidates_score[-1] += 1
 | 
					 | 
				
			||||||
        select = np.argmax(canidates_score) # 取评分最高者返回
 | 
					 | 
				
			||||||
        return canidates[select]
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
def rm_comments(main_file):
 | 
					 | 
				
			||||||
    new_file_remove_comment_lines = []
 | 
					 | 
				
			||||||
    for l in main_file.splitlines():
 | 
					 | 
				
			||||||
        # 删除整行的空注释
 | 
					 | 
				
			||||||
        if l.lstrip().startswith("%"):
 | 
					 | 
				
			||||||
            pass
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            new_file_remove_comment_lines.append(l)
 | 
					 | 
				
			||||||
    main_file = '\n'.join(new_file_remove_comment_lines)
 | 
					 | 
				
			||||||
    # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file)  # 将 \include 命令转换为 \input 命令
 | 
					 | 
				
			||||||
    main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
 | 
					 | 
				
			||||||
    return main_file
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def find_tex_file_ignore_case(fp):
 | 
					 | 
				
			||||||
    dir_name = os.path.dirname(fp)
 | 
					 | 
				
			||||||
    base_name = os.path.basename(fp)
 | 
					 | 
				
			||||||
    if not base_name.endswith('.tex'): base_name+='.tex'
 | 
					 | 
				
			||||||
    if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
 | 
					 | 
				
			||||||
    # go case in-sensitive
 | 
					 | 
				
			||||||
    import glob
 | 
					 | 
				
			||||||
    for f in glob.glob(dir_name+'/*.tex'):
 | 
					 | 
				
			||||||
        base_name_s = os.path.basename(fp)
 | 
					 | 
				
			||||||
        if base_name_s.lower() == base_name.lower(): return f
 | 
					 | 
				
			||||||
    return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def merge_tex_files_(project_foler, main_file, mode):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Merge Tex project recrusively
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    main_file = rm_comments(main_file)
 | 
					 | 
				
			||||||
    for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
 | 
					 | 
				
			||||||
        f = s.group(1)
 | 
					 | 
				
			||||||
        fp = os.path.join(project_foler, f)
 | 
					 | 
				
			||||||
        fp = find_tex_file_ignore_case(fp)
 | 
					 | 
				
			||||||
        if fp:
 | 
					 | 
				
			||||||
            with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            raise RuntimeError(f'找不到{fp},Tex源文件缺失!')
 | 
					 | 
				
			||||||
        c = merge_tex_files_(project_foler, c, mode)
 | 
					 | 
				
			||||||
        main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
 | 
					 | 
				
			||||||
    return main_file
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def merge_tex_files(project_foler, main_file, mode):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Merge Tex project recrusively
 | 
					 | 
				
			||||||
    P.S. 顺便把CTEX塞进去以支持中文
 | 
					 | 
				
			||||||
    P.S. 顺便把Latex的注释去除
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    main_file = merge_tex_files_(project_foler, main_file, mode)
 | 
					 | 
				
			||||||
    main_file = rm_comments(main_file)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if mode == 'translate_zh':
 | 
					 | 
				
			||||||
        # find paper documentclass
 | 
					 | 
				
			||||||
        pattern = re.compile(r'\\documentclass.*\n')
 | 
					 | 
				
			||||||
        match = pattern.search(main_file)
 | 
					 | 
				
			||||||
        assert match is not None, "Cannot find documentclass statement!"
 | 
					 | 
				
			||||||
        position = match.end()
 | 
					 | 
				
			||||||
        add_ctex = '\\usepackage{ctex}\n'
 | 
					 | 
				
			||||||
        add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
 | 
					 | 
				
			||||||
        main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
 | 
					 | 
				
			||||||
        # fontset=windows
 | 
					 | 
				
			||||||
        import platform
 | 
					 | 
				
			||||||
        main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
 | 
					 | 
				
			||||||
        main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
 | 
					 | 
				
			||||||
        # find paper abstract
 | 
					 | 
				
			||||||
        pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
 | 
					 | 
				
			||||||
        pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
 | 
					 | 
				
			||||||
        match_opt1 = pattern_opt1.search(main_file)
 | 
					 | 
				
			||||||
        match_opt2 = pattern_opt2.search(main_file)
 | 
					 | 
				
			||||||
        assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
 | 
					 | 
				
			||||||
    return main_file
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
========================================================================
 | 
					 | 
				
			||||||
Post process
 | 
					 | 
				
			||||||
========================================================================
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
def mod_inbraket(match):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    为啥chatgpt会把cite里面的逗号换成中文逗号呀 
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    # get the matched string
 | 
					 | 
				
			||||||
    cmd = match.group(1)
 | 
					 | 
				
			||||||
    str_to_modify = match.group(2)
 | 
					 | 
				
			||||||
    # modify the matched string
 | 
					 | 
				
			||||||
    str_to_modify = str_to_modify.replace(':', ':')    # 前面是中文冒号,后面是英文冒号
 | 
					 | 
				
			||||||
    str_to_modify = str_to_modify.replace(',', ',')    # 前面是中文逗号,后面是英文逗号
 | 
					 | 
				
			||||||
    # str_to_modify = 'BOOM'
 | 
					 | 
				
			||||||
    return "\\" + cmd + "{" + str_to_modify + "}"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def fix_content(final_tex, node_string):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Fix common GPT errors to increase success rate
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
 | 
					 | 
				
			||||||
    final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
 | 
					 | 
				
			||||||
    final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
 | 
					 | 
				
			||||||
    final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if "Traceback" in final_tex and "[Local Message]" in final_tex:
 | 
					 | 
				
			||||||
        final_tex = node_string # 出问题了,还原原文
 | 
					 | 
				
			||||||
    if node_string.count('\\begin') != final_tex.count('\\begin'):
 | 
					 | 
				
			||||||
        final_tex = node_string # 出问题了,还原原文
 | 
					 | 
				
			||||||
    if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
 | 
					 | 
				
			||||||
        # walk and replace any _ without \
 | 
					 | 
				
			||||||
        final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def compute_brace_level(string):
 | 
					 | 
				
			||||||
        # this function count the number of { and }
 | 
					 | 
				
			||||||
        brace_level = 0
 | 
					 | 
				
			||||||
        for c in string:
 | 
					 | 
				
			||||||
            if c == "{": brace_level += 1
 | 
					 | 
				
			||||||
            elif c == "}": brace_level -= 1
 | 
					 | 
				
			||||||
        return brace_level
 | 
					 | 
				
			||||||
    def join_most(tex_t, tex_o):
 | 
					 | 
				
			||||||
        # this function join translated string and original string when something goes wrong
 | 
					 | 
				
			||||||
        p_t = 0
 | 
					 | 
				
			||||||
        p_o = 0
 | 
					 | 
				
			||||||
        def find_next(string, chars, begin):
 | 
					 | 
				
			||||||
            p = begin
 | 
					 | 
				
			||||||
            while p < len(string):
 | 
					 | 
				
			||||||
                if string[p] in chars: return p, string[p]
 | 
					 | 
				
			||||||
                p += 1
 | 
					 | 
				
			||||||
            return None, None
 | 
					 | 
				
			||||||
        while True:
 | 
					 | 
				
			||||||
            res1, char = find_next(tex_o, ['{','}'], p_o)
 | 
					 | 
				
			||||||
            if res1 is None: break
 | 
					 | 
				
			||||||
            res2, char = find_next(tex_t, [char], p_t)
 | 
					 | 
				
			||||||
            if res2 is None: break
 | 
					 | 
				
			||||||
            p_o = res1 + 1
 | 
					 | 
				
			||||||
            p_t = res2 + 1
 | 
					 | 
				
			||||||
        return tex_t[:p_t] + tex_o[p_o:]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if compute_brace_level(final_tex) != compute_brace_level(node_string):
 | 
					 | 
				
			||||||
        # 出问题了,还原部分原文,保证括号正确
 | 
					 | 
				
			||||||
        final_tex = join_most(final_tex, node_string)
 | 
					 | 
				
			||||||
    return final_tex
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def split_subprocess(txt, project_folder, return_dict, opts):
 | 
					def split_subprocess(txt, project_folder, return_dict, opts):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
@ -356,77 +52,9 @@ def split_subprocess(txt, project_folder, return_dict, opts):
 | 
				
			|||||||
    text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
 | 
					    text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
 | 
				
			||||||
    root = convert_to_linklist(text, mask)
 | 
					    root = convert_to_linklist(text, mask)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # 修复括号
 | 
					    # 最后一步处理,增强稳健性
 | 
				
			||||||
    node = root
 | 
					    root = post_process(root)
 | 
				
			||||||
    while True:
 | 
					 | 
				
			||||||
        string = node.string
 | 
					 | 
				
			||||||
        if node.preserve: 
 | 
					 | 
				
			||||||
            node = node.next
 | 
					 | 
				
			||||||
            if node is None: break
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
        def break_check(string):
 | 
					 | 
				
			||||||
            str_stack = [""] # (lv, index)
 | 
					 | 
				
			||||||
            for i, c in enumerate(string):
 | 
					 | 
				
			||||||
                if c == '{':
 | 
					 | 
				
			||||||
                    str_stack.append('{')
 | 
					 | 
				
			||||||
                elif c == '}':
 | 
					 | 
				
			||||||
                    if len(str_stack) == 1:
 | 
					 | 
				
			||||||
                        print('stack fix')
 | 
					 | 
				
			||||||
                        return i
 | 
					 | 
				
			||||||
                    str_stack.pop(-1)
 | 
					 | 
				
			||||||
                else:
 | 
					 | 
				
			||||||
                    str_stack[-1] += c
 | 
					 | 
				
			||||||
            return -1
 | 
					 | 
				
			||||||
        bp = break_check(string)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if bp == -1:
 | 
					 | 
				
			||||||
            pass
 | 
					 | 
				
			||||||
        elif bp == 0:
 | 
					 | 
				
			||||||
            node.string = string[:1]
 | 
					 | 
				
			||||||
            q = LinkedListNode(string[1:], False)
 | 
					 | 
				
			||||||
            q.next = node.next
 | 
					 | 
				
			||||||
            node.next = q
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            node.string = string[:bp]
 | 
					 | 
				
			||||||
            q = LinkedListNode(string[bp:], False)
 | 
					 | 
				
			||||||
            q.next = node.next
 | 
					 | 
				
			||||||
            node.next = q
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        node = node.next
 | 
					 | 
				
			||||||
        if node is None: break
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 屏蔽空行和太短的句子
 | 
					 | 
				
			||||||
    node = root
 | 
					 | 
				
			||||||
    while True:
 | 
					 | 
				
			||||||
        if len(node.string.strip('\n').strip(''))==0: node.preserve = True
 | 
					 | 
				
			||||||
        if len(node.string.strip('\n').strip(''))<42: node.preserve = True
 | 
					 | 
				
			||||||
        node = node.next
 | 
					 | 
				
			||||||
        if node is None: break
 | 
					 | 
				
			||||||
    node = root
 | 
					 | 
				
			||||||
    while True:
 | 
					 | 
				
			||||||
        if node.next and node.preserve and node.next.preserve:
 | 
					 | 
				
			||||||
            node.string += node.next.string
 | 
					 | 
				
			||||||
            node.next = node.next.next
 | 
					 | 
				
			||||||
        node = node.next
 | 
					 | 
				
			||||||
        if node is None: break
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # 将前后断行符脱离
 | 
					 | 
				
			||||||
    node = root
 | 
					 | 
				
			||||||
    prev_node = None
 | 
					 | 
				
			||||||
    while True:
 | 
					 | 
				
			||||||
        if not node.preserve:
 | 
					 | 
				
			||||||
            lstriped_ = node.string.lstrip().lstrip('\n')
 | 
					 | 
				
			||||||
            if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
 | 
					 | 
				
			||||||
                prev_node.string += node.string[:-len(lstriped_)]
 | 
					 | 
				
			||||||
                node.string = lstriped_
 | 
					 | 
				
			||||||
            rstriped_ = node.string.rstrip().rstrip('\n')
 | 
					 | 
				
			||||||
            if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
 | 
					 | 
				
			||||||
                node.next.string = node.string[len(rstriped_):] + node.next.string
 | 
					 | 
				
			||||||
                node.string = rstriped_
 | 
					 | 
				
			||||||
        # =====
 | 
					 | 
				
			||||||
        prev_node = node
 | 
					 | 
				
			||||||
        node = node.next
 | 
					 | 
				
			||||||
        if node is None: break
 | 
					 | 
				
			||||||
    # 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM)
 | 
					    # 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM)
 | 
				
			||||||
    with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
 | 
					    with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
 | 
				
			||||||
        segment_parts_for_gpt = []
 | 
					        segment_parts_for_gpt = []
 | 
				
			||||||
@ -437,7 +65,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
 | 
				
			|||||||
            show_html = node.string.replace('\n','<br/>')
 | 
					            show_html = node.string.replace('\n','<br/>')
 | 
				
			||||||
            if not node.preserve:
 | 
					            if not node.preserve:
 | 
				
			||||||
                segment_parts_for_gpt.append(node.string)
 | 
					                segment_parts_for_gpt.append(node.string)
 | 
				
			||||||
                f.write(f'<p style="color:black;">#{show_html}#</p>')
 | 
					                f.write(f'<p style="color:black;">#{node.range}{show_html}#</p>')
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                f.write(f'<p style="color:red;">{show_html}</p>')
 | 
					                f.write(f'<p style="color:red;">{show_html}</p>')
 | 
				
			||||||
            node = node.next
 | 
					            node = node.next
 | 
				
			||||||
@ -448,8 +76,6 @@ def split_subprocess(txt, project_folder, return_dict, opts):
 | 
				
			|||||||
    return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
 | 
					    return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
 | 
				
			||||||
    return return_dict
 | 
					    return return_dict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class LatexPaperSplit():
 | 
					class LatexPaperSplit():
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    break down latex file to a linked list,
 | 
					    break down latex file to a linked list,
 | 
				
			||||||
@ -464,18 +90,32 @@ class LatexPaperSplit():
 | 
				
			|||||||
        # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
 | 
					        # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
 | 
				
			||||||
        self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" 
 | 
					        self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def merge_result(self, arr, mode, msg):
 | 
					
 | 
				
			||||||
 | 
					    def merge_result(self, arr, mode, msg, buggy_lines=[]):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Merge the result after the GPT process completed
 | 
					        Merge the result after the GPT process completed
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        result_string = ""
 | 
					        result_string = ""
 | 
				
			||||||
        p = 0
 | 
					        node_cnt = 0
 | 
				
			||||||
 | 
					        line_cnt = 0
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
        for node in self.nodes:
 | 
					        for node in self.nodes:
 | 
				
			||||||
            if node.preserve:
 | 
					            if node.preserve:
 | 
				
			||||||
 | 
					                line_cnt += node.string.count('\n')
 | 
				
			||||||
                result_string += node.string
 | 
					                result_string += node.string
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                result_string += fix_content(arr[p], node.string)
 | 
					                translated_txt = fix_content(arr[node_cnt], node.string)
 | 
				
			||||||
                p += 1
 | 
					                begin_line = line_cnt
 | 
				
			||||||
 | 
					                end_line = line_cnt + translated_txt.count('\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                # reverse translation if any error
 | 
				
			||||||
 | 
					                if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]):
 | 
				
			||||||
 | 
					                    translated_txt = node.string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                result_string += translated_txt
 | 
				
			||||||
 | 
					                node_cnt += 1
 | 
				
			||||||
 | 
					                line_cnt += translated_txt.count('\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if mode == 'translate_zh':
 | 
					        if mode == 'translate_zh':
 | 
				
			||||||
            pattern = re.compile(r'\\begin\{abstract\}.*\n')
 | 
					            pattern = re.compile(r'\\begin\{abstract\}.*\n')
 | 
				
			||||||
            match = pattern.search(result_string)
 | 
					            match = pattern.search(result_string)
 | 
				
			||||||
@ -490,6 +130,7 @@ class LatexPaperSplit():
 | 
				
			|||||||
            result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
 | 
					            result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
 | 
				
			||||||
        return result_string
 | 
					        return result_string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def split(self, txt, project_folder, opts): 
 | 
					    def split(self, txt, project_folder, opts): 
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        break down latex file to a linked list,
 | 
					        break down latex file to a linked list,
 | 
				
			||||||
@ -511,7 +152,6 @@ class LatexPaperSplit():
 | 
				
			|||||||
        return self.sp
 | 
					        return self.sp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
class LatexPaperFileGroup():
 | 
					class LatexPaperFileGroup():
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    use tokenizer to break down text according to max_token_limit
 | 
					    use tokenizer to break down text according to max_token_limit
 | 
				
			||||||
@ -539,7 +179,7 @@ class LatexPaperFileGroup():
 | 
				
			|||||||
                self.sp_file_index.append(index)
 | 
					                self.sp_file_index.append(index)
 | 
				
			||||||
                self.sp_file_tag.append(self.file_paths[index])
 | 
					                self.sp_file_tag.append(self.file_paths[index])
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
 | 
					                from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
 | 
				
			||||||
                segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
 | 
					                segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
 | 
				
			||||||
                for j, segment in enumerate(segments):
 | 
					                for j, segment in enumerate(segments):
 | 
				
			||||||
                    self.sp_file_contents.append(segment)
 | 
					                    self.sp_file_contents.append(segment)
 | 
				
			||||||
@ -560,41 +200,14 @@ class LatexPaperFileGroup():
 | 
				
			|||||||
                f.write(res)
 | 
					                f.write(res)
 | 
				
			||||||
        return manifest
 | 
					        return manifest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # write html
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        import shutil
 | 
					 | 
				
			||||||
        from .crazy_utils import construct_html
 | 
					 | 
				
			||||||
        from toolbox import gen_time_str
 | 
					 | 
				
			||||||
        ch = construct_html() 
 | 
					 | 
				
			||||||
        orig = ""
 | 
					 | 
				
			||||||
        trans = ""
 | 
					 | 
				
			||||||
        final = []
 | 
					 | 
				
			||||||
        for c,r in zip(sp_file_contents, sp_file_result): 
 | 
					 | 
				
			||||||
            final.append(c)
 | 
					 | 
				
			||||||
            final.append(r)
 | 
					 | 
				
			||||||
        for i, k in enumerate(final): 
 | 
					 | 
				
			||||||
            if i%2==0:
 | 
					 | 
				
			||||||
                orig = k
 | 
					 | 
				
			||||||
            if i%2==1:
 | 
					 | 
				
			||||||
                trans = k
 | 
					 | 
				
			||||||
                ch.add_row(a=orig, b=trans)
 | 
					 | 
				
			||||||
        create_report_file_name = f"{gen_time_str()}.trans.html"
 | 
					 | 
				
			||||||
        ch.save_file(create_report_file_name)
 | 
					 | 
				
			||||||
        shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
 | 
					 | 
				
			||||||
        promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
 | 
					 | 
				
			||||||
    except:
 | 
					 | 
				
			||||||
        from toolbox import trimmed_format_exc
 | 
					 | 
				
			||||||
        print('writing html result failed:', trimmed_format_exc())
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
 | 
					def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
 | 
				
			||||||
    import time, os, re
 | 
					    import time, os, re
 | 
				
			||||||
    from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
 | 
					    from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
 | 
				
			||||||
    from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
 | 
					    from .latex_actions import LatexPaperFileGroup, LatexPaperSplit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #  <-------- 寻找主tex文件 ----------> 
 | 
					    #  <-------- 寻找主tex文件 ----------> 
 | 
				
			||||||
    maintex = 寻找Latex主文件(file_manifest, mode)
 | 
					    maintex = find_main_tex_file(file_manifest, mode)
 | 
				
			||||||
    chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
 | 
					    chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
 | 
				
			||||||
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
					    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
				
			||||||
    time.sleep(3)
 | 
					    time.sleep(3)
 | 
				
			||||||
@ -668,54 +281,51 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
 | 
				
			|||||||
    #  <-------- 写出文件 ----------> 
 | 
					    #  <-------- 写出文件 ----------> 
 | 
				
			||||||
    msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
 | 
					    msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
 | 
				
			||||||
    final_tex = lps.merge_result(pfg.file_result, mode, msg)
 | 
					    final_tex = lps.merge_result(pfg.file_result, mode, msg)
 | 
				
			||||||
 | 
					    objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
 | 
					    with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
 | 
				
			||||||
        if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
 | 
					        if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #  <-------- 整理结果, 退出 ----------> 
 | 
					    #  <-------- 整理结果, 退出 ----------> 
 | 
				
			||||||
    chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF'))
 | 
					    chatbot.append((f"完成了吗?", 'GPT结果已输出, 即将编译PDF'))
 | 
				
			||||||
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
					    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #  <-------- 返回 ----------> 
 | 
					    #  <-------- 返回 ----------> 
 | 
				
			||||||
    return project_folder + f'/merge_{mode}.tex'
 | 
					    return project_folder + f'/merge_{mode}.tex'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]):
 | 
				
			||||||
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
 | 
					 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
 | 
					        with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
 | 
				
			||||||
            log = f.read()
 | 
					            log = f.read()
 | 
				
			||||||
        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
 | 
					 | 
				
			||||||
            file_lines = f.readlines()
 | 
					 | 
				
			||||||
        import re
 | 
					        import re
 | 
				
			||||||
        buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
 | 
					        buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
 | 
				
			||||||
        buggy_lines = [int(l) for l in buggy_lines]
 | 
					        buggy_lines = [int(l) for l in buggy_lines]
 | 
				
			||||||
        buggy_lines = sorted(buggy_lines)
 | 
					        buggy_lines = sorted(buggy_lines)
 | 
				
			||||||
        print("removing lines that has errors", buggy_lines)
 | 
					        buggy_line = buggy_lines[0]-1
 | 
				
			||||||
        file_lines.pop(buggy_lines[0]-1)
 | 
					        print("reversing tex line that has errors", buggy_line)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # 重组,逆转出错的段落
 | 
				
			||||||
 | 
					        if buggy_line in fixed_line: raise RuntimeError
 | 
				
			||||||
 | 
					        fixed_line.append(buggy_line)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl'))
 | 
				
			||||||
 | 
					        final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
 | 
					        with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
 | 
				
			||||||
            f.writelines(file_lines)
 | 
					            f.write(final_tex)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
 | 
					        return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
        print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
 | 
					        print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
 | 
				
			||||||
        return False, -1, [-1]
 | 
					        return False, -1, [-1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def compile_latex_with_timeout(command, cwd, timeout=60):
 | 
					 | 
				
			||||||
    import subprocess
 | 
					 | 
				
			||||||
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        stdout, stderr = process.communicate(timeout=timeout)
 | 
					 | 
				
			||||||
    except subprocess.TimeoutExpired:
 | 
					 | 
				
			||||||
        process.kill()
 | 
					 | 
				
			||||||
        stdout, stderr = process.communicate()
 | 
					 | 
				
			||||||
        print("Process timed out!")
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
    return True
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
 | 
					def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
 | 
				
			||||||
    import os, time
 | 
					    import os, time
 | 
				
			||||||
    current_dir = os.getcwd()
 | 
					 | 
				
			||||||
    n_fix = 1
 | 
					    n_fix = 1
 | 
				
			||||||
 | 
					    fixed_line = []
 | 
				
			||||||
    max_try = 32
 | 
					    max_try = 32
 | 
				
			||||||
    chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
 | 
					    chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
 | 
				
			||||||
    chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
 | 
					    chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
 | 
				
			||||||
@ -723,6 +333,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    while True:
 | 
					    while True:
 | 
				
			||||||
        import os
 | 
					        import os
 | 
				
			||||||
 | 
					        may_exist_bbl = pj(work_folder_modified, f'merge.bbl')
 | 
				
			||||||
 | 
					        target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl')
 | 
				
			||||||
 | 
					        if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl):
 | 
				
			||||||
 | 
					            shutil.copyfile(may_exist_bbl, target_bbl)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
 | 
					        # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
 | 
				
			||||||
        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
 | 
					        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
 | 
				
			||||||
@ -756,7 +370,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
 | 
				
			|||||||
                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
 | 
					                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
 | 
				
			||||||
                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
 | 
					                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
        # <---------- 检查结果 ----------->
 | 
					        # <---------- 检查结果 ----------->
 | 
				
			||||||
        results_ = ""
 | 
					        results_ = ""
 | 
				
			||||||
        original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
 | 
					        original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
 | 
				
			||||||
@ -783,7 +396,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
 | 
				
			|||||||
                    concat_pdf = pj(work_folder_modified, f'comparison.pdf')
 | 
					                    concat_pdf = pj(work_folder_modified, f'comparison.pdf')
 | 
				
			||||||
                    merge_pdfs(origin_pdf, result_pdf, concat_pdf)
 | 
					                    merge_pdfs(origin_pdf, result_pdf, concat_pdf)
 | 
				
			||||||
                    promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
 | 
					                    promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
 | 
				
			||||||
                except:
 | 
					                except Exception as e:
 | 
				
			||||||
                    pass
 | 
					                    pass
 | 
				
			||||||
            return True # 成功啦
 | 
					            return True # 成功啦
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
@ -796,6 +409,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
 | 
				
			|||||||
                tex_name_pure=f'{main_file_modified}',
 | 
					                tex_name_pure=f'{main_file_modified}',
 | 
				
			||||||
                n_fix=n_fix,
 | 
					                n_fix=n_fix,
 | 
				
			||||||
                work_folder_modified=work_folder_modified,
 | 
					                work_folder_modified=work_folder_modified,
 | 
				
			||||||
 | 
					                fixed_line=fixed_line
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history)   # 刷新Gradio前端界面
 | 
					            yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history)   # 刷新Gradio前端界面
 | 
				
			||||||
            if not can_retry: break
 | 
					            if not can_retry: break
 | 
				
			||||||
@ -803,14 +417,15 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
 | 
				
			|||||||
    return False # 失败啦
 | 
					    return False # 失败啦
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def merge_pdfs(pdf1_path, pdf2_path, output_path):
 | 
					def merge_pdfs(pdf1_path, pdf2_path, output_path):
 | 
				
			||||||
    import PyPDF2
 | 
					    import PyPDF2
 | 
				
			||||||
    # Open the first PDF file
 | 
					    # Open the first PDF file
 | 
				
			||||||
    with open(pdf1_path, 'rb') as pdf1_file:
 | 
					    with open(pdf1_path, 'rb') as pdf1_file:
 | 
				
			||||||
        pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
 | 
					        pdf1_reader = PyPDF2.PdfReader(pdf1_file)
 | 
				
			||||||
        # Open the second PDF file
 | 
					        # Open the second PDF file
 | 
				
			||||||
        with open(pdf2_path, 'rb') as pdf2_file:
 | 
					        with open(pdf2_path, 'rb') as pdf2_file:
 | 
				
			||||||
            pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
 | 
					            pdf2_reader = PyPDF2.PdfReader(pdf2_file)
 | 
				
			||||||
            # Create a new PDF file to store the merged pages
 | 
					            # Create a new PDF file to store the merged pages
 | 
				
			||||||
            output_writer = PyPDF2.PdfFileWriter()
 | 
					            output_writer = PyPDF2.PdfFileWriter()
 | 
				
			||||||
            # Determine the number of pages in each PDF file
 | 
					            # Determine the number of pages in each PDF file
 | 
				
			||||||
@ -838,3 +453,32 @@ def merge_pdfs(pdf1_path, pdf2_path, output_path):
 | 
				
			|||||||
            # Save the merged PDF file
 | 
					            # Save the merged PDF file
 | 
				
			||||||
            with open(output_path, 'wb') as output_file:
 | 
					            with open(output_path, 'wb') as output_file:
 | 
				
			||||||
                output_writer.write(output_file)
 | 
					                output_writer.write(output_file)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
 | 
				
			||||||
 | 
					    # write html
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        import shutil
 | 
				
			||||||
 | 
					        from ..crazy_utils import construct_html
 | 
				
			||||||
 | 
					        from toolbox import gen_time_str
 | 
				
			||||||
 | 
					        ch = construct_html() 
 | 
				
			||||||
 | 
					        orig = ""
 | 
				
			||||||
 | 
					        trans = ""
 | 
				
			||||||
 | 
					        final = []
 | 
				
			||||||
 | 
					        for c,r in zip(sp_file_contents, sp_file_result): 
 | 
				
			||||||
 | 
					            final.append(c)
 | 
				
			||||||
 | 
					            final.append(r)
 | 
				
			||||||
 | 
					        for i, k in enumerate(final): 
 | 
				
			||||||
 | 
					            if i%2==0:
 | 
				
			||||||
 | 
					                orig = k
 | 
				
			||||||
 | 
					            if i%2==1:
 | 
				
			||||||
 | 
					                trans = k
 | 
				
			||||||
 | 
					                ch.add_row(a=orig, b=trans)
 | 
				
			||||||
 | 
					        create_report_file_name = f"{gen_time_str()}.trans.html"
 | 
				
			||||||
 | 
					        ch.save_file(create_report_file_name)
 | 
				
			||||||
 | 
					        shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
 | 
				
			||||||
 | 
					        promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        from toolbox import trimmed_format_exc
 | 
				
			||||||
 | 
					        print('writing html result failed:', trimmed_format_exc())
 | 
				
			||||||
							
								
								
									
										417
									
								
								crazy_functions/latex_fns/latex_toolbox.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										417
									
								
								crazy_functions/latex_fns/latex_toolbox.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,417 @@
 | 
				
			|||||||
 | 
					import os, shutil
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					PRESERVE = 0
 | 
				
			||||||
 | 
					TRANSFORM = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pj = os.path.join
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class LinkedListNode():
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Linked List Node
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    def __init__(self, string, preserve=True) -> None:
 | 
				
			||||||
 | 
					        self.string = string
 | 
				
			||||||
 | 
					        self.preserve = preserve
 | 
				
			||||||
 | 
					        self.next = None
 | 
				
			||||||
 | 
					        self.range = None
 | 
				
			||||||
 | 
					        # self.begin_line = 0
 | 
				
			||||||
 | 
					        # self.begin_char = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def convert_to_linklist(text, mask):
 | 
				
			||||||
 | 
					    root = LinkedListNode("", preserve=True)
 | 
				
			||||||
 | 
					    current_node = root
 | 
				
			||||||
 | 
					    for c, m, i in zip(text, mask, range(len(text))):
 | 
				
			||||||
 | 
					        if (m==PRESERVE and current_node.preserve) \
 | 
				
			||||||
 | 
					            or (m==TRANSFORM and not current_node.preserve):
 | 
				
			||||||
 | 
					            # add
 | 
				
			||||||
 | 
					            current_node.string += c
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
 | 
				
			||||||
 | 
					            current_node = current_node.next
 | 
				
			||||||
 | 
					    return root
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def post_process(root):
 | 
				
			||||||
 | 
					    # 修复括号
 | 
				
			||||||
 | 
					    node = root
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        string = node.string
 | 
				
			||||||
 | 
					        if node.preserve: 
 | 
				
			||||||
 | 
					            node = node.next
 | 
				
			||||||
 | 
					            if node is None: break
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					        def break_check(string):
 | 
				
			||||||
 | 
					            str_stack = [""] # (lv, index)
 | 
				
			||||||
 | 
					            for i, c in enumerate(string):
 | 
				
			||||||
 | 
					                if c == '{':
 | 
				
			||||||
 | 
					                    str_stack.append('{')
 | 
				
			||||||
 | 
					                elif c == '}':
 | 
				
			||||||
 | 
					                    if len(str_stack) == 1:
 | 
				
			||||||
 | 
					                        print('stack fix')
 | 
				
			||||||
 | 
					                        return i
 | 
				
			||||||
 | 
					                    str_stack.pop(-1)
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    str_stack[-1] += c
 | 
				
			||||||
 | 
					            return -1
 | 
				
			||||||
 | 
					        bp = break_check(string)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if bp == -1:
 | 
				
			||||||
 | 
					            pass
 | 
				
			||||||
 | 
					        elif bp == 0:
 | 
				
			||||||
 | 
					            node.string = string[:1]
 | 
				
			||||||
 | 
					            q = LinkedListNode(string[1:], False)
 | 
				
			||||||
 | 
					            q.next = node.next
 | 
				
			||||||
 | 
					            node.next = q
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            node.string = string[:bp]
 | 
				
			||||||
 | 
					            q = LinkedListNode(string[bp:], False)
 | 
				
			||||||
 | 
					            q.next = node.next
 | 
				
			||||||
 | 
					            node.next = q
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        node = node.next
 | 
				
			||||||
 | 
					        if node is None: break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # 屏蔽空行和太短的句子
 | 
				
			||||||
 | 
					    node = root
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        if len(node.string.strip('\n').strip(''))==0: node.preserve = True
 | 
				
			||||||
 | 
					        if len(node.string.strip('\n').strip(''))<42: node.preserve = True
 | 
				
			||||||
 | 
					        node = node.next
 | 
				
			||||||
 | 
					        if node is None: break
 | 
				
			||||||
 | 
					    node = root
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        if node.next and node.preserve and node.next.preserve:
 | 
				
			||||||
 | 
					            node.string += node.next.string
 | 
				
			||||||
 | 
					            node.next = node.next.next
 | 
				
			||||||
 | 
					        node = node.next
 | 
				
			||||||
 | 
					        if node is None: break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # 将前后断行符脱离
 | 
				
			||||||
 | 
					    node = root
 | 
				
			||||||
 | 
					    prev_node = None
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        if not node.preserve:
 | 
				
			||||||
 | 
					            lstriped_ = node.string.lstrip().lstrip('\n')
 | 
				
			||||||
 | 
					            if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
 | 
				
			||||||
 | 
					                prev_node.string += node.string[:-len(lstriped_)]
 | 
				
			||||||
 | 
					                node.string = lstriped_
 | 
				
			||||||
 | 
					            rstriped_ = node.string.rstrip().rstrip('\n')
 | 
				
			||||||
 | 
					            if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
 | 
				
			||||||
 | 
					                node.next.string = node.string[len(rstriped_):] + node.next.string
 | 
				
			||||||
 | 
					                node.string = rstriped_
 | 
				
			||||||
 | 
					        # =====
 | 
				
			||||||
 | 
					        prev_node = node
 | 
				
			||||||
 | 
					        node = node.next
 | 
				
			||||||
 | 
					        if node is None: break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # 标注节点的行数范围
 | 
				
			||||||
 | 
					    node = root
 | 
				
			||||||
 | 
					    n_line = 0
 | 
				
			||||||
 | 
					    expansion = 2
 | 
				
			||||||
 | 
					    while True:
 | 
				
			||||||
 | 
					        n_l = node.string.count('\n')
 | 
				
			||||||
 | 
					        node.range = [n_line-expansion, n_line+n_l+expansion]   # 失败时,扭转的范围
 | 
				
			||||||
 | 
					        n_line = n_line+n_l
 | 
				
			||||||
 | 
					        node = node.next
 | 
				
			||||||
 | 
					        if node is None: break
 | 
				
			||||||
 | 
					    return root
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 | 
				
			||||||
 | 
					Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
 | 
				
			||||||
 | 
					=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def set_forbidden_text(text, mask, pattern, flags=0):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Add a preserve text area in this paper
 | 
				
			||||||
 | 
					    e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
 | 
				
			||||||
 | 
					    you can mask out (mask = PRESERVE so that text become untouchable for GPT) 
 | 
				
			||||||
 | 
					    everything between "\begin{equation}" and "\end{equation}"
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    if isinstance(pattern, list): pattern = '|'.join(pattern)
 | 
				
			||||||
 | 
					    pattern_compile = re.compile(pattern, flags)
 | 
				
			||||||
 | 
					    for res in pattern_compile.finditer(text):
 | 
				
			||||||
 | 
					        mask[res.span()[0]:res.span()[1]] = PRESERVE
 | 
				
			||||||
 | 
					    return text, mask
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Move area out of preserve area (make text editable for GPT)
 | 
				
			||||||
 | 
					    count the number of the braces so as to catch compelete text area. 
 | 
				
			||||||
 | 
					    e.g.
 | 
				
			||||||
 | 
					    \begin{abstract} blablablablablabla. \end{abstract} 
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    if isinstance(pattern, list): pattern = '|'.join(pattern)
 | 
				
			||||||
 | 
					    pattern_compile = re.compile(pattern, flags)
 | 
				
			||||||
 | 
					    for res in pattern_compile.finditer(text):
 | 
				
			||||||
 | 
					        if not forbid_wrapper:
 | 
				
			||||||
 | 
					            mask[res.span()[0]:res.span()[1]] = TRANSFORM
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE   # '\\begin{abstract}'
 | 
				
			||||||
 | 
					            mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM   # abstract
 | 
				
			||||||
 | 
					            mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE   # abstract
 | 
				
			||||||
 | 
					    return text, mask
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Add a preserve text area in this paper (text become untouchable for GPT).
 | 
				
			||||||
 | 
					    count the number of the braces so as to catch compelete text area. 
 | 
				
			||||||
 | 
					    e.g.
 | 
				
			||||||
 | 
					    \caption{blablablablabla\texbf{blablabla}blablabla.} 
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    pattern_compile = re.compile(pattern, flags)
 | 
				
			||||||
 | 
					    for res in pattern_compile.finditer(text):
 | 
				
			||||||
 | 
					        brace_level = -1
 | 
				
			||||||
 | 
					        p = begin = end = res.regs[0][0]
 | 
				
			||||||
 | 
					        for _ in range(1024*16):
 | 
				
			||||||
 | 
					            if text[p] == '}' and brace_level == 0: break
 | 
				
			||||||
 | 
					            elif text[p] == '}':  brace_level -= 1
 | 
				
			||||||
 | 
					            elif text[p] == '{':  brace_level += 1
 | 
				
			||||||
 | 
					            p += 1
 | 
				
			||||||
 | 
					        end = p+1
 | 
				
			||||||
 | 
					        mask[begin:end] = PRESERVE
 | 
				
			||||||
 | 
					    return text, mask
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Move area out of preserve area (make text editable for GPT)
 | 
				
			||||||
 | 
					    count the number of the braces so as to catch compelete text area. 
 | 
				
			||||||
 | 
					    e.g.
 | 
				
			||||||
 | 
					    \caption{blablablablabla\texbf{blablabla}blablabla.} 
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    pattern_compile = re.compile(pattern, flags)
 | 
				
			||||||
 | 
					    for res in pattern_compile.finditer(text):
 | 
				
			||||||
 | 
					        brace_level = 0
 | 
				
			||||||
 | 
					        p = begin = end = res.regs[1][0]
 | 
				
			||||||
 | 
					        for _ in range(1024*16):
 | 
				
			||||||
 | 
					            if text[p] == '}' and brace_level == 0: break
 | 
				
			||||||
 | 
					            elif text[p] == '}':  brace_level -= 1
 | 
				
			||||||
 | 
					            elif text[p] == '{':  brace_level += 1
 | 
				
			||||||
 | 
					            p += 1
 | 
				
			||||||
 | 
					        end = p
 | 
				
			||||||
 | 
					        mask[begin:end] = TRANSFORM
 | 
				
			||||||
 | 
					        if forbid_wrapper:
 | 
				
			||||||
 | 
					            mask[res.regs[0][0]:begin] = PRESERVE
 | 
				
			||||||
 | 
					            mask[end:res.regs[0][1]] = PRESERVE
 | 
				
			||||||
 | 
					    return text, mask
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
 | 
				
			||||||
 | 
					    Add it to preserve area
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    pattern_compile = re.compile(pattern, flags)
 | 
				
			||||||
 | 
					    def search_with_line_limit(text, mask):
 | 
				
			||||||
 | 
					        for res in pattern_compile.finditer(text):
 | 
				
			||||||
 | 
					            cmd = res.group(1)  # begin{what}
 | 
				
			||||||
 | 
					            this = res.group(2) # content between begin and end
 | 
				
			||||||
 | 
					            this_mask = mask[res.regs[2][0]:res.regs[2][1]]
 | 
				
			||||||
 | 
					            white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 
 | 
				
			||||||
 | 
					                          'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
 | 
				
			||||||
 | 
					            if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
 | 
				
			||||||
 | 
					                this, this_mask = search_with_line_limit(this, this_mask)
 | 
				
			||||||
 | 
					                mask[res.regs[2][0]:res.regs[2][1]] = this_mask
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
 | 
				
			||||||
 | 
					        return text, mask
 | 
				
			||||||
 | 
					    return search_with_line_limit(text, mask) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 | 
				
			||||||
 | 
					Latex Merge File
 | 
				
			||||||
 | 
					=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def find_main_tex_file(file_manifest, mode):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
 | 
				
			||||||
 | 
					    P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    canidates = []
 | 
				
			||||||
 | 
					    for texf in file_manifest:
 | 
				
			||||||
 | 
					        if os.path.basename(texf).startswith('merge'):
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					        with open(texf, 'r', encoding='utf8', errors='ignore') as f:
 | 
				
			||||||
 | 
					            file_content = f.read()
 | 
				
			||||||
 | 
					        if r'\documentclass' in file_content:
 | 
				
			||||||
 | 
					            canidates.append(texf)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if len(canidates) == 0:
 | 
				
			||||||
 | 
					        raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
 | 
				
			||||||
 | 
					    elif len(canidates) == 1:
 | 
				
			||||||
 | 
					        return canidates[0]
 | 
				
			||||||
 | 
					    else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
 | 
				
			||||||
 | 
					        canidates_score = []
 | 
				
			||||||
 | 
					        # 给出一些判定模板文档的词作为扣分项
 | 
				
			||||||
 | 
					        unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
 | 
				
			||||||
 | 
					        expected_words = ['\input', '\ref', '\cite']
 | 
				
			||||||
 | 
					        for texf in canidates:
 | 
				
			||||||
 | 
					            canidates_score.append(0)
 | 
				
			||||||
 | 
					            with open(texf, 'r', encoding='utf8', errors='ignore') as f:
 | 
				
			||||||
 | 
					                file_content = f.read()
 | 
				
			||||||
 | 
					            for uw in unexpected_words:
 | 
				
			||||||
 | 
					                if uw in file_content:
 | 
				
			||||||
 | 
					                    canidates_score[-1] -= 1
 | 
				
			||||||
 | 
					            for uw in expected_words:
 | 
				
			||||||
 | 
					                if uw in file_content:
 | 
				
			||||||
 | 
					                    canidates_score[-1] += 1
 | 
				
			||||||
 | 
					        select = np.argmax(canidates_score) # 取评分最高者返回
 | 
				
			||||||
 | 
					        return canidates[select]
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					def rm_comments(main_file):
 | 
				
			||||||
 | 
					    new_file_remove_comment_lines = []
 | 
				
			||||||
 | 
					    for l in main_file.splitlines():
 | 
				
			||||||
 | 
					        # 删除整行的空注释
 | 
				
			||||||
 | 
					        if l.lstrip().startswith("%"):
 | 
				
			||||||
 | 
					            pass
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            new_file_remove_comment_lines.append(l)
 | 
				
			||||||
 | 
					    main_file = '\n'.join(new_file_remove_comment_lines)
 | 
				
			||||||
 | 
					    # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file)  # 将 \include 命令转换为 \input 命令
 | 
				
			||||||
 | 
					    main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
 | 
				
			||||||
 | 
					    return main_file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def find_tex_file_ignore_case(fp):
 | 
				
			||||||
 | 
					    dir_name = os.path.dirname(fp)
 | 
				
			||||||
 | 
					    base_name = os.path.basename(fp)
 | 
				
			||||||
 | 
					    if not base_name.endswith('.tex'): base_name+='.tex'
 | 
				
			||||||
 | 
					    if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
 | 
				
			||||||
 | 
					    # go case in-sensitive
 | 
				
			||||||
 | 
					    import glob
 | 
				
			||||||
 | 
					    for f in glob.glob(dir_name+'/*.tex'):
 | 
				
			||||||
 | 
					        base_name_s = os.path.basename(fp)
 | 
				
			||||||
 | 
					        if base_name_s.lower() == base_name.lower(): return f
 | 
				
			||||||
 | 
					    return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def merge_tex_files_(project_foler, main_file, mode):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Merge Tex project recrusively
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    main_file = rm_comments(main_file)
 | 
				
			||||||
 | 
					    for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
 | 
				
			||||||
 | 
					        f = s.group(1)
 | 
				
			||||||
 | 
					        fp = os.path.join(project_foler, f)
 | 
				
			||||||
 | 
					        fp = find_tex_file_ignore_case(fp)
 | 
				
			||||||
 | 
					        if fp:
 | 
				
			||||||
 | 
					            with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            raise RuntimeError(f'找不到{fp},Tex源文件缺失!')
 | 
				
			||||||
 | 
					        c = merge_tex_files_(project_foler, c, mode)
 | 
				
			||||||
 | 
					        main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
 | 
				
			||||||
 | 
					    return main_file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def merge_tex_files(project_foler, main_file, mode):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Merge Tex project recrusively
 | 
				
			||||||
 | 
					    P.S. 顺便把CTEX塞进去以支持中文
 | 
				
			||||||
 | 
					    P.S. 顺便把Latex的注释去除
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    main_file = merge_tex_files_(project_foler, main_file, mode)
 | 
				
			||||||
 | 
					    main_file = rm_comments(main_file)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if mode == 'translate_zh':
 | 
				
			||||||
 | 
					        # find paper documentclass
 | 
				
			||||||
 | 
					        pattern = re.compile(r'\\documentclass.*\n')
 | 
				
			||||||
 | 
					        match = pattern.search(main_file)
 | 
				
			||||||
 | 
					        assert match is not None, "Cannot find documentclass statement!"
 | 
				
			||||||
 | 
					        position = match.end()
 | 
				
			||||||
 | 
					        add_ctex = '\\usepackage{ctex}\n'
 | 
				
			||||||
 | 
					        add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
 | 
				
			||||||
 | 
					        main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
 | 
				
			||||||
 | 
					        # fontset=windows
 | 
				
			||||||
 | 
					        import platform
 | 
				
			||||||
 | 
					        main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
 | 
				
			||||||
 | 
					        main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
 | 
				
			||||||
 | 
					        # find paper abstract
 | 
				
			||||||
 | 
					        pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
 | 
				
			||||||
 | 
					        pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
 | 
				
			||||||
 | 
					        match_opt1 = pattern_opt1.search(main_file)
 | 
				
			||||||
 | 
					        match_opt2 = pattern_opt2.search(main_file)
 | 
				
			||||||
 | 
					        assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
 | 
				
			||||||
 | 
					    return main_file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 | 
				
			||||||
 | 
					Post process
 | 
				
			||||||
 | 
					=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					def mod_inbraket(match):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    为啥chatgpt会把cite里面的逗号换成中文逗号呀 
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    # get the matched string
 | 
				
			||||||
 | 
					    cmd = match.group(1)
 | 
				
			||||||
 | 
					    str_to_modify = match.group(2)
 | 
				
			||||||
 | 
					    # modify the matched string
 | 
				
			||||||
 | 
					    str_to_modify = str_to_modify.replace(':', ':')    # 前面是中文冒号,后面是英文冒号
 | 
				
			||||||
 | 
					    str_to_modify = str_to_modify.replace(',', ',')    # 前面是中文逗号,后面是英文逗号
 | 
				
			||||||
 | 
					    # str_to_modify = 'BOOM'
 | 
				
			||||||
 | 
					    return "\\" + cmd + "{" + str_to_modify + "}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def fix_content(final_tex, node_string):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Fix common GPT errors to increase success rate
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
 | 
				
			||||||
 | 
					    final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
 | 
				
			||||||
 | 
					    final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
 | 
				
			||||||
 | 
					    final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if "Traceback" in final_tex and "[Local Message]" in final_tex:
 | 
				
			||||||
 | 
					        final_tex = node_string # 出问题了,还原原文
 | 
				
			||||||
 | 
					    if node_string.count('\\begin') != final_tex.count('\\begin'):
 | 
				
			||||||
 | 
					        final_tex = node_string # 出问题了,还原原文
 | 
				
			||||||
 | 
					    if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
 | 
				
			||||||
 | 
					        # walk and replace any _ without \
 | 
				
			||||||
 | 
					        final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def compute_brace_level(string):
 | 
				
			||||||
 | 
					        # this function count the number of { and }
 | 
				
			||||||
 | 
					        brace_level = 0
 | 
				
			||||||
 | 
					        for c in string:
 | 
				
			||||||
 | 
					            if c == "{": brace_level += 1
 | 
				
			||||||
 | 
					            elif c == "}": brace_level -= 1
 | 
				
			||||||
 | 
					        return brace_level
 | 
				
			||||||
 | 
					    def join_most(tex_t, tex_o):
 | 
				
			||||||
 | 
					        # this function join translated string and original string when something goes wrong
 | 
				
			||||||
 | 
					        p_t = 0
 | 
				
			||||||
 | 
					        p_o = 0
 | 
				
			||||||
 | 
					        def find_next(string, chars, begin):
 | 
				
			||||||
 | 
					            p = begin
 | 
				
			||||||
 | 
					            while p < len(string):
 | 
				
			||||||
 | 
					                if string[p] in chars: return p, string[p]
 | 
				
			||||||
 | 
					                p += 1
 | 
				
			||||||
 | 
					            return None, None
 | 
				
			||||||
 | 
					        while True:
 | 
				
			||||||
 | 
					            res1, char = find_next(tex_o, ['{','}'], p_o)
 | 
				
			||||||
 | 
					            if res1 is None: break
 | 
				
			||||||
 | 
					            res2, char = find_next(tex_t, [char], p_t)
 | 
				
			||||||
 | 
					            if res2 is None: break
 | 
				
			||||||
 | 
					            p_o = res1 + 1
 | 
				
			||||||
 | 
					            p_t = res2 + 1
 | 
				
			||||||
 | 
					        return tex_t[:p_t] + tex_o[p_o:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if compute_brace_level(final_tex) != compute_brace_level(node_string):
 | 
				
			||||||
 | 
					        # 出问题了,还原部分原文,保证括号正确
 | 
				
			||||||
 | 
					        final_tex = join_most(final_tex, node_string)
 | 
				
			||||||
 | 
					    return final_tex
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					def compile_latex_with_timeout(command, cwd, timeout=60):
 | 
				
			||||||
 | 
					    import subprocess
 | 
				
			||||||
 | 
					    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        stdout, stderr = process.communicate(timeout=timeout)
 | 
				
			||||||
 | 
					    except subprocess.TimeoutExpired:
 | 
				
			||||||
 | 
					        process.kill()
 | 
				
			||||||
 | 
					        stdout, stderr = process.communicate()
 | 
				
			||||||
 | 
					        print("Process timed out!")
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					    return True
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user