From 13c9606af7cdee8f95eca79641bf6968a494c79e Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 8 Sep 2023 09:47:29 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=B8=8B=E8=BD=BDPDF?= =?UTF-8?q?=E5=A4=B1=E8=B4=A5=E6=97=B6=E4=BA=A7=E7=94=9F=E7=9A=84=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/crazy_utils.py | 5 ++++- crazy_functions/pdf_fns/parse_pdf.py | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 8069703..5a314b3 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -593,7 +593,10 @@ def get_files_from_everything(txt, type): # type='.md' from toolbox import get_conf from toolbox import get_log_folder, gen_time_str proxies, = get_conf('proxies') - r = requests.get(txt, proxies=proxies) + try: + r = requests.get(txt, proxies=proxies) + except: + raise ConnectionRefusedError(f"无法下载资源{txt},请检查。") path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type) with open(path, 'wb+') as f: f.write(r.content) project_folder = get_log_folder(plugin_name='web_download') diff --git a/crazy_functions/pdf_fns/parse_pdf.py b/crazy_functions/pdf_fns/parse_pdf.py index 00016be..8a7117a 100644 --- a/crazy_functions/pdf_fns/parse_pdf.py +++ b/crazy_functions/pdf_fns/parse_pdf.py @@ -20,6 +20,11 @@ def get_avail_grobid_url(): def parse_pdf(pdf_path, grobid_url): import scipdf # pip install scipdf_parser if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/') - article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url) + try: + article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url) + except GROBID_OFFLINE_EXCEPTION: + raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。") + except: + raise RuntimeError("解析PDF失败,请检查PDF是否损坏。") return article_dict