diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 5a314b3..1567172 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -469,14 +469,16 @@ def read_and_clean_pdf_text(fp): '- ', '') for t in text_areas['blocks'] if 'lines' in t] ############################## <第 2 步,获取正文主字体> ################################## - fsize_statiscs = {} - for span in meta_span: - if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0 - fsize_statiscs[span[1]] += span[2] - main_fsize = max(fsize_statiscs, key=fsize_statiscs.get) - if REMOVE_FOOT_NOTE: - give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT - + try: + fsize_statiscs = {} + for span in meta_span: + if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0 + fsize_statiscs[span[1]] += span[2] + main_fsize = max(fsize_statiscs, key=fsize_statiscs.get) + if REMOVE_FOOT_NOTE: + give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT + except: + raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}。') ############################## <第 3 步,切分和重新整合> ################################## mega_sec = [] sec = [] diff --git a/crazy_functions/latex_fns/latex_toolbox.py b/crazy_functions/latex_fns/latex_toolbox.py index 5adc7ea..e39ac2d 100644 --- a/crazy_functions/latex_fns/latex_toolbox.py +++ b/crazy_functions/latex_fns/latex_toolbox.py @@ -423,7 +423,7 @@ def compile_latex_with_timeout(command, cwd, timeout=60): def merge_pdfs(pdf1_path, pdf2_path, output_path): import PyPDF2 - Percent = 0.8 + Percent = 0.95 # Open the first PDF file with open(pdf1_path, 'rb') as pdf1_file: pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)