From 0844b6e9cfda2d55a70ca60d89954fd8b91ef81f Mon Sep 17 00:00:00 2001 From: binary-husky Date: Wed, 27 Sep 2023 15:40:55 +0800 Subject: [PATCH] =?UTF-8?q?GROBID=E6=9C=8D=E5=8A=A1=E4=BB=A3=E7=90=86?= =?UTF-8?q?=E8=AE=BF=E9=97=AE=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 +- crazy_functions/pdf_fns/parse_pdf.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/config.py b/config.py index 88e0df2..e4b23a1 100644 --- a/config.py +++ b/config.py @@ -184,7 +184,7 @@ PATH_PRIVATE_UPLOAD = "private_upload" # 日志文件夹的位置,请勿修改 PATH_LOGGING = "gpt_log" # 除了连接OpenAI之外,还有哪些场合允许使用代理,请勿修改 -WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme"] +WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid"] """ diff --git a/crazy_functions/pdf_fns/parse_pdf.py b/crazy_functions/pdf_fns/parse_pdf.py index a726aa2..a047efc 100644 --- a/crazy_functions/pdf_fns/parse_pdf.py +++ b/crazy_functions/pdf_fns/parse_pdf.py @@ -2,6 +2,8 @@ from functools import lru_cache from toolbox import gen_time_str from toolbox import promote_file_to_downloadzone from toolbox import write_history_to_file, promote_file_to_downloadzone +from toolbox import get_conf +from toolbox import ProxyNetworkActivate from colorful import * import requests import random @@ -12,13 +14,13 @@ import math class GROBID_OFFLINE_EXCEPTION(Exception): pass def get_avail_grobid_url(): - from toolbox import get_conf GROBID_URLS, = get_conf('GROBID_URLS') if len(GROBID_URLS) == 0: return None try: _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡 if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/') - res = requests.get(_grobid_url+'/api/isalive') + with ProxyNetworkActivate('Connect_Grobid'): + res = requests.get(_grobid_url+'/api/isalive') if res.text=='true': return _grobid_url else: return None except: @@ -29,7 +31,8 @@ def parse_pdf(pdf_path, grobid_url): import scipdf # pip install scipdf_parser if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/') try: - article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url) + with ProxyNetworkActivate('Connect_Grobid'): + article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url) except GROBID_OFFLINE_EXCEPTION: raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。") except: