diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index a23c732..ce7a2e3 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -748,7 +748,7 @@ class nougat_interface(): yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)", chatbot=chatbot, history=history, delay=0) - self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600) + self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}" --recompute --no-skipping --markdown --batchsize 8', os.getcwd(), timeout=3600) res = glob.glob(os.path.join(dst,'*.mmd')) if len(res) == 0: self.threadLock.release() diff --git a/crazy_functions/批量翻译PDF文档_NOUGAT.py b/crazy_functions/批量翻译PDF文档_NOUGAT.py index 3e50c93..16dfd6b 100644 --- a/crazy_functions/批量翻译PDF文档_NOUGAT.py +++ b/crazy_functions/批量翻译PDF文档_NOUGAT.py @@ -57,22 +57,27 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst "批量翻译PDF文档。函数插件贡献者: Binary-Husky"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - # 尝试导入依赖,如果缺少依赖,则给出安装建议 - try: - import nougat - import tiktoken - except: - report_execption(chatbot, history, - a=f"解析项目: {txt}", - b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。") - yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - return - # 清空历史,以免输入溢出 history = [] from .crazy_utils import get_files_from_everything success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf') + if len(file_manifest) > 0: + # 尝试导入依赖,如果缺少依赖,则给出安装建议 + try: + import nougat + import tiktoken + except: + report_execption(chatbot, history, + a=f"解析项目: {txt}", + b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + success_mmd, file_manifest_mmd, _ = get_files_from_everything(txt, type='.mmd') + success = success or success_mmd + file_manifest += file_manifest_mmd + chatbot.append(["文件列表:", ", ".join([e.split('/')[-1] for e in file_manifest])]); + yield from update_ui( chatbot=chatbot, history=history) # 检测输入参数,如没有给定输入参数,直接退出 if not success: if txt == "": txt = '空空如也的输入栏' @@ -101,9 +106,13 @@ def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwa from crazy_functions.pdf_fns.report_gen_html import construct_html nougat_handle = nougat_interface() for index, fp in enumerate(file_manifest): - chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history) - promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot) + if fp.endswith('pdf'): + chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history) + promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot) + else: + chatbot.append(["当前论文无需解析:", fp]); yield from update_ui( chatbot=chatbot, history=history) + fpp = fp with open(fpp, 'r', encoding='utf8') as f: article_content = f.readlines() article_dict = markdown_to_dict(article_content)