File tree Expand file tree Collapse file tree 3 files changed +12
-1
lines changed Expand file tree Collapse file tree 3 files changed +12
-1
lines changed Original file line number Diff line number Diff line change @@ -53,6 +53,10 @@ def extract_words_for_page(
5353 )
5454
5555 df = pd .DataFrame (tokens )
56+
57+ if len (df ) == 0 :
58+ return Layout ()
59+
5660 df [["x0" , "x1" ]] = (
5761 df [["x0" , "x1" ]].clip (lower = 0 , upper = int (page .width )).astype ("float" )
5862 )
Original file line number Diff line number Diff line change @@ -78,4 +78,11 @@ def test_pdf():
7878 assert attr_name in page_layout .page_data
7979
8080 assert len (set (ele .type for ele in page_layout )) == 3
81- # Only three types of font show-up in the file
81+ # Only three types of font show-up in the file
82+
83+ def test_empty_pdf ():
84+ pdf_layout = load_pdf ("tests/fixtures/io/empty.pdf" )
85+ assert len (pdf_layout ) == 1 # Only one page
86+
87+ page_layout = pdf_layout [0 ]
88+ assert len (page_layout ) == 0 # No selectable tokens on the page
You can’t perform that action at this time.
0 commit comments