fix subprocess bug in Windows
This commit is contained in:
parent
40da1b0afe
commit
8ef7344101
@ -192,61 +192,7 @@ def fix_content(final_tex, node_string):
|
|||||||
final_tex = node_string # 出问题了,还原原文
|
final_tex = node_string # 出问题了,还原原文
|
||||||
return final_tex
|
return final_tex
|
||||||
|
|
||||||
|
def split_subprocess(txt, project_folder, return_dict):
|
||||||
class LatexPaperSplit():
|
|
||||||
"""
|
|
||||||
break down latex file to a linked list,
|
|
||||||
each node use a preserve flag to indicate whether it should
|
|
||||||
be proccessed by GPT.
|
|
||||||
"""
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.nodes = None
|
|
||||||
self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
|
|
||||||
"版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \
|
|
||||||
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
|
|
||||||
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
|
|
||||||
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
|
|
||||||
|
|
||||||
def merge_result(self, arr, mode, msg):
|
|
||||||
"""
|
|
||||||
Merge the result after the GPT process completed
|
|
||||||
"""
|
|
||||||
result_string = ""
|
|
||||||
p = 0
|
|
||||||
for node in self.nodes:
|
|
||||||
if node.preserve:
|
|
||||||
result_string += node.string
|
|
||||||
else:
|
|
||||||
result_string += fix_content(arr[p], node.string)
|
|
||||||
p += 1
|
|
||||||
if mode == 'translate_zh':
|
|
||||||
pattern = re.compile(r'\\begin\{abstract\}.*\n')
|
|
||||||
match = pattern.search(result_string)
|
|
||||||
position = match.end()
|
|
||||||
result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
|
|
||||||
return result_string
|
|
||||||
|
|
||||||
def split(self, txt, project_folder):
|
|
||||||
"""
|
|
||||||
break down latex file to a linked list,
|
|
||||||
each node use a preserve flag to indicate whether it should
|
|
||||||
be proccessed by GPT.
|
|
||||||
P.S. use multiprocessing to avoid timeout error
|
|
||||||
"""
|
|
||||||
import multiprocessing
|
|
||||||
manager = multiprocessing.Manager()
|
|
||||||
return_dict = manager.dict()
|
|
||||||
p = multiprocessing.Process(
|
|
||||||
target=lambda lps, txt, project_folder, return_dict:
|
|
||||||
lps.split_subprocess(txt, project_folder, return_dict),
|
|
||||||
args=(self, txt, project_folder, return_dict))
|
|
||||||
p.start()
|
|
||||||
p.join()
|
|
||||||
self.nodes = return_dict['nodes']
|
|
||||||
self.sp = return_dict['segment_parts_for_gpt']
|
|
||||||
return self.sp
|
|
||||||
|
|
||||||
def split_subprocess(self, txt, project_folder, return_dict):
|
|
||||||
"""
|
"""
|
||||||
break down latex file to a linked list,
|
break down latex file to a linked list,
|
||||||
each node use a preserve flag to indicate whether it should
|
each node use a preserve flag to indicate whether it should
|
||||||
@ -388,6 +334,62 @@ class LatexPaperSplit():
|
|||||||
return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
|
return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
|
||||||
return return_dict
|
return return_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LatexPaperSplit():
|
||||||
|
"""
|
||||||
|
break down latex file to a linked list,
|
||||||
|
each node use a preserve flag to indicate whether it should
|
||||||
|
be proccessed by GPT.
|
||||||
|
"""
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.nodes = None
|
||||||
|
self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
|
||||||
|
"版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \
|
||||||
|
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
|
||||||
|
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
|
||||||
|
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
|
||||||
|
|
||||||
|
def merge_result(self, arr, mode, msg):
|
||||||
|
"""
|
||||||
|
Merge the result after the GPT process completed
|
||||||
|
"""
|
||||||
|
result_string = ""
|
||||||
|
p = 0
|
||||||
|
for node in self.nodes:
|
||||||
|
if node.preserve:
|
||||||
|
result_string += node.string
|
||||||
|
else:
|
||||||
|
result_string += fix_content(arr[p], node.string)
|
||||||
|
p += 1
|
||||||
|
if mode == 'translate_zh':
|
||||||
|
pattern = re.compile(r'\\begin\{abstract\}.*\n')
|
||||||
|
match = pattern.search(result_string)
|
||||||
|
position = match.end()
|
||||||
|
result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
|
||||||
|
return result_string
|
||||||
|
|
||||||
|
def split(self, txt, project_folder):
|
||||||
|
"""
|
||||||
|
break down latex file to a linked list,
|
||||||
|
each node use a preserve flag to indicate whether it should
|
||||||
|
be proccessed by GPT.
|
||||||
|
P.S. use multiprocessing to avoid timeout error
|
||||||
|
"""
|
||||||
|
import multiprocessing
|
||||||
|
manager = multiprocessing.Manager()
|
||||||
|
return_dict = manager.dict()
|
||||||
|
p = multiprocessing.Process(
|
||||||
|
target=split_subprocess,
|
||||||
|
args=(txt, project_folder, return_dict))
|
||||||
|
p.start()
|
||||||
|
p.join()
|
||||||
|
self.nodes = return_dict['nodes']
|
||||||
|
self.sp = return_dict['segment_parts_for_gpt']
|
||||||
|
return self.sp
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LatexPaperFileGroup():
|
class LatexPaperFileGroup():
|
||||||
"""
|
"""
|
||||||
use tokenizer to break down text according to max_token_limit
|
use tokenizer to break down text according to max_token_limit
|
||||||
|
Loading…
x
Reference in New Issue
Block a user