Merge pull request #1255 from xiangsam/master
[Feature] 更新精准翻译PDF文档(NOUGAT)插件
This commit is contained in:
commit
fcf04554c6
@ -748,7 +748,7 @@ class nougat_interface():
|
||||
|
||||
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
|
||||
chatbot=chatbot, history=history, delay=0)
|
||||
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
|
||||
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}" --recompute --no-skipping --markdown --batchsize 8', os.getcwd(), timeout=3600)
|
||||
res = glob.glob(os.path.join(dst,'*.mmd'))
|
||||
if len(res) == 0:
|
||||
self.threadLock.release()
|
||||
|
@ -57,22 +57,27 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
"批量翻译PDF文档。函数插件贡献者: Binary-Husky"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import nougat
|
||||
import tiktoken
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}",
|
||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
|
||||
from .crazy_utils import get_files_from_everything
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
||||
if len(file_manifest) > 0:
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import nougat
|
||||
import tiktoken
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}",
|
||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
success_mmd, file_manifest_mmd, _ = get_files_from_everything(txt, type='.mmd')
|
||||
success = success or success_mmd
|
||||
file_manifest += file_manifest_mmd
|
||||
chatbot.append(["文件列表:", ", ".join([e.split('/')[-1] for e in file_manifest])]);
|
||||
yield from update_ui( chatbot=chatbot, history=history)
|
||||
# 检测输入参数,如没有给定输入参数,直接退出
|
||||
if not success:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
@ -101,9 +106,13 @@ def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwa
|
||||
from crazy_functions.pdf_fns.report_gen_html import construct_html
|
||||
nougat_handle = nougat_interface()
|
||||
for index, fp in enumerate(file_manifest):
|
||||
chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history)
|
||||
promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot)
|
||||
if fp.endswith('pdf'):
|
||||
chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history)
|
||||
promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot)
|
||||
else:
|
||||
chatbot.append(["当前论文无需解析:", fp]); yield from update_ui( chatbot=chatbot, history=history)
|
||||
fpp = fp
|
||||
with open(fpp, 'r', encoding='utf8') as f:
|
||||
article_content = f.readlines()
|
||||
article_dict = markdown_to_dict(article_content)
|
||||
|
Loading…
x
Reference in New Issue
Block a user