修正下载PDF失败时产生的错误提示

This commit is contained in:
qingxu fu 2023-09-08 09:47:29 +08:00
parent bac6810e75
commit 13c9606af7
2 changed files with 10 additions and 2 deletions

View File

@ -593,7 +593,10 @@ def get_files_from_everything(txt, type): # type='.md'
from toolbox import get_conf from toolbox import get_conf
from toolbox import get_log_folder, gen_time_str from toolbox import get_log_folder, gen_time_str
proxies, = get_conf('proxies') proxies, = get_conf('proxies')
r = requests.get(txt, proxies=proxies) try:
r = requests.get(txt, proxies=proxies)
except:
raise ConnectionRefusedError(f"无法下载资源{txt},请检查。")
path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type) path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type)
with open(path, 'wb+') as f: f.write(r.content) with open(path, 'wb+') as f: f.write(r.content)
project_folder = get_log_folder(plugin_name='web_download') project_folder = get_log_folder(plugin_name='web_download')

View File

@ -20,6 +20,11 @@ def get_avail_grobid_url():
def parse_pdf(pdf_path, grobid_url): def parse_pdf(pdf_path, grobid_url):
import scipdf # pip install scipdf_parser import scipdf # pip install scipdf_parser
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/') if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url) try:
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
except GROBID_OFFLINE_EXCEPTION:
raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用请修改config中的GROBID_URL可修改成本地GROBID服务。")
except:
raise RuntimeError("解析PDF失败请检查PDF是否损坏。")
return article_dict return article_dict