修复pdf分解bug
This commit is contained in:
parent
4c486f27c8
commit
d35d7710c1
@ -444,6 +444,7 @@ def read_and_clean_pdf_text(fp):
|
||||
pf = 998
|
||||
for l in t['lines']:
|
||||
txt_line = "".join([wtf['text'] for wtf in l['spans']])
|
||||
if len(txt_line) == 0: continue
|
||||
pf = primary_ffsize(l)
|
||||
meta_line.append([txt_line, pf, l['bbox'], l])
|
||||
for wtf in l['spans']: # for l in t['lines']:
|
||||
|
Loading…
x
Reference in New Issue
Block a user