Merge branch 'master' of https://github.com/binary-husky/chatgpt_academic into master
This commit is contained in:
commit
e699b6b13f
@ -469,6 +469,7 @@ def read_and_clean_pdf_text(fp):
|
|||||||
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
||||||
|
|
||||||
############################## <第 2 步,获取正文主字体> ##################################
|
############################## <第 2 步,获取正文主字体> ##################################
|
||||||
|
try:
|
||||||
fsize_statiscs = {}
|
fsize_statiscs = {}
|
||||||
for span in meta_span:
|
for span in meta_span:
|
||||||
if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
|
if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
|
||||||
@ -476,7 +477,8 @@ def read_and_clean_pdf_text(fp):
|
|||||||
main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
|
main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
|
||||||
if REMOVE_FOOT_NOTE:
|
if REMOVE_FOOT_NOTE:
|
||||||
give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
|
give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
|
||||||
|
except:
|
||||||
|
raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}。')
|
||||||
############################## <第 3 步,切分和重新整合> ##################################
|
############################## <第 3 步,切分和重新整合> ##################################
|
||||||
mega_sec = []
|
mega_sec = []
|
||||||
sec = []
|
sec = []
|
||||||
|
@ -423,7 +423,7 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
|
|||||||
|
|
||||||
def merge_pdfs(pdf1_path, pdf2_path, output_path):
|
def merge_pdfs(pdf1_path, pdf2_path, output_path):
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
Percent = 0.8
|
Percent = 0.95
|
||||||
# Open the first PDF file
|
# Open the first PDF file
|
||||||
with open(pdf1_path, 'rb') as pdf1_file:
|
with open(pdf1_path, 'rb') as pdf1_file:
|
||||||
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
|
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user