Skip to content

Commit ef4bd62

Browse files
Michael AignerMichael Aigner
authored andcommitted
add example
1 parent d74c492 commit ef4bd62

File tree

6 files changed

+20
-0
lines changed

6 files changed

+20
-0
lines changed

GraphConverter/GraphConverter.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def __init__(self, pdf, merge_boxes=False, regress_parameters=False,
2121
conv = PDFContentConverter(self.pdf).convert()
2222
self.loc_df = conv["content"]
2323
self.media_boxes = conv["media_boxes"]
24+
self.n = conv["page_count"]
2425
self.loc_df = PDFTextBoxMerging(data=self.loc_df,
2526
media_boxes=self.media_boxes).transform()
2627
if self.loc_df is not None:
@@ -317,3 +318,17 @@ def is_vertically_overlapping(node1, node2):
317318
min_height = min(node1["y_0"] - node1["y_1"], node2["y_0"] - node2["y_1"])
318319
overlap = min(node1["y_0"], node2["y_0"]) - max(node1["y_1"], node2["y_1"])
319320
return overlap / min_height >= constants.VERTICAL_OVERLAP_THRESHOLD
321+
322+
def get_media_boxes(self):
323+
"""
324+
325+
:return:
326+
"""
327+
return self.media_boxes
328+
329+
def get_page_count(self):
330+
"""
331+
332+
:return:
333+
"""
334+
return self.n

GraphConverter/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
from GraphConverter import GraphConverter
3+
from GraphConverter.util import RectangleUtil, constants, StorageUtil
4+
from GraphConverter.document import DocumentMetaCharacteristics
5+
from GraphConverter.merging import PDFTextBoxMerging

GraphConverter/document/__init__.py

Whitespace-only changes.

GraphConverter/merging/__init__.py

Whitespace-only changes.

GraphConverter/util/__init__.py

Whitespace-only changes.

pdf/eu-001.pdf

66.5 KB
Binary file not shown.

0 commit comments

Comments
 (0)