修正报错消息

This commit is contained in:
binary-husky 2023-09-10 16:52:35 +08:00
parent c45fa88684
commit 28d777a96b

View File

@ -469,6 +469,7 @@ def read_and_clean_pdf_text(fp):
'- ', '') for t in text_areas['blocks'] if 'lines' in t] '- ', '') for t in text_areas['blocks'] if 'lines' in t]
############################## <第 2 步,获取正文主字体> ################################## ############################## <第 2 步,获取正文主字体> ##################################
try:
fsize_statiscs = {} fsize_statiscs = {}
for span in meta_span: for span in meta_span:
if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0 if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
@ -476,7 +477,8 @@ def read_and_clean_pdf_text(fp):
main_fsize = max(fsize_statiscs, key=fsize_statiscs.get) main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
if REMOVE_FOOT_NOTE: if REMOVE_FOOT_NOTE:
give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
except:
raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}')
############################## <第 3 步,切分和重新整合> ################################## ############################## <第 3 步,切分和重新整合> ##################################
mega_sec = [] mega_sec = []
sec = [] sec = []