Merge pull request #1255 from xiangsam/master
[Feature] 更新精准翻译PDF文档(NOUGAT)插件
This commit is contained in:
commit
fcf04554c6
@ -748,7 +748,7 @@ class nougat_interface():
|
|||||||
|
|
||||||
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
|
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
|
||||||
chatbot=chatbot, history=history, delay=0)
|
chatbot=chatbot, history=history, delay=0)
|
||||||
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
|
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}" --recompute --no-skipping --markdown --batchsize 8', os.getcwd(), timeout=3600)
|
||||||
res = glob.glob(os.path.join(dst,'*.mmd'))
|
res = glob.glob(os.path.join(dst,'*.mmd'))
|
||||||
if len(res) == 0:
|
if len(res) == 0:
|
||||||
self.threadLock.release()
|
self.threadLock.release()
|
||||||
|
@ -57,6 +57,12 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
|||||||
"批量翻译PDF文档。函数插件贡献者: Binary-Husky"])
|
"批量翻译PDF文档。函数插件贡献者: Binary-Husky"])
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
|
# 清空历史,以免输入溢出
|
||||||
|
history = []
|
||||||
|
|
||||||
|
from .crazy_utils import get_files_from_everything
|
||||||
|
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
||||||
|
if len(file_manifest) > 0:
|
||||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||||
try:
|
try:
|
||||||
import nougat
|
import nougat
|
||||||
@ -67,12 +73,11 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
|||||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。")
|
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。")
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
return
|
return
|
||||||
|
success_mmd, file_manifest_mmd, _ = get_files_from_everything(txt, type='.mmd')
|
||||||
# 清空历史,以免输入溢出
|
success = success or success_mmd
|
||||||
history = []
|
file_manifest += file_manifest_mmd
|
||||||
|
chatbot.append(["文件列表:", ", ".join([e.split('/')[-1] for e in file_manifest])]);
|
||||||
from .crazy_utils import get_files_from_everything
|
yield from update_ui( chatbot=chatbot, history=history)
|
||||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
|
||||||
# 检测输入参数,如没有给定输入参数,直接退出
|
# 检测输入参数,如没有给定输入参数,直接退出
|
||||||
if not success:
|
if not success:
|
||||||
if txt == "": txt = '空空如也的输入栏'
|
if txt == "": txt = '空空如也的输入栏'
|
||||||
@ -101,9 +106,13 @@ def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwa
|
|||||||
from crazy_functions.pdf_fns.report_gen_html import construct_html
|
from crazy_functions.pdf_fns.report_gen_html import construct_html
|
||||||
nougat_handle = nougat_interface()
|
nougat_handle = nougat_interface()
|
||||||
for index, fp in enumerate(file_manifest):
|
for index, fp in enumerate(file_manifest):
|
||||||
|
if fp.endswith('pdf'):
|
||||||
chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history)
|
fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history)
|
||||||
promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot)
|
promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot)
|
||||||
|
else:
|
||||||
|
chatbot.append(["当前论文无需解析:", fp]); yield from update_ui( chatbot=chatbot, history=history)
|
||||||
|
fpp = fp
|
||||||
with open(fpp, 'r', encoding='utf8') as f:
|
with open(fpp, 'r', encoding='utf8') as f:
|
||||||
article_content = f.readlines()
|
article_content = f.readlines()
|
||||||
article_dict = markdown_to_dict(article_content)
|
article_dict = markdown_to_dict(article_content)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user