Skip to content

Commit

Permalink
enhancement: warn if low dpi chipper (#171)
Browse files Browse the repository at this point in the history
Adds a warning if Chipper is used with DPI less than 300.
  • Loading branch information
qued authored Aug 15, 2023
1 parent 15bbc56 commit c337a95
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.5.11-dev0

* Add warning when chipper is used with < 300 DPI

## 0.5.10

* Implement full-page OCR
Expand Down
13 changes: 12 additions & 1 deletion test_unstructured_inference/inference/test_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import unstructured_inference.models.base as models
from unstructured_inference.inference import elements, layout, layoutelement
from unstructured_inference.inference.layout import create_image_output_dir
from unstructured_inference.models import detectron2, tesseract
from unstructured_inference.models import chipper, detectron2, tesseract
from unstructured_inference.models.unstructuredmodel import (
UnstructuredElementExtractionModel,
UnstructuredObjectDetectionModel,
Expand Down Expand Up @@ -866,3 +866,14 @@ def test_create_image_output_dir_no_ext():
assert os.path.isdir(output_dir)
assert os.path.isabs(output_dir)
assert output_dir == expected_output_dir


def test_warning_if_chipper_and_low_dpi(caplog):
with patch.object(layout.DocumentLayout, "from_file") as mock_from_file, patch.object(
chipper.UnstructuredChipperModel,
"initialize",
):
layout.process_file_with_model("asdf", model_name="chipper", pdf_image_dpi=299)
mock_from_file.assert_called_once()
assert caplog.records[0].levelname == "WARNING"
assert "DPI >= 300" in caplog.records[0].msg
5 changes: 3 additions & 2 deletions test_unstructured_inference/models/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ def test_get_model_warns_on_chipper(monkeypatch, caplog):
"UnstructuredChipperModel",
MockModel,
)
models.get_model("chipper")
assert caplog.records[0].levelname == "WARNING"
with mock.patch.object(models, "models", {}):
models.get_model("chipper")
assert caplog.records[0].levelname == "WARNING"


def test_raises_invalid_model():
Expand Down
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.10" # pragma: no cover
__version__ = "0.5.11-dev0" # pragma: no cover
7 changes: 7 additions & 0 deletions unstructured_inference/inference/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,13 @@ def process_file_with_model(
) -> DocumentLayout:
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
model_name."""

if (pdf_image_dpi < 300) and (model_name == "chipper"):
logger.warning(
"The Chipper model performs better when images are rendered with DPI >= 300 "
f"(currently {pdf_image_dpi}).",
)

model = get_model(model_name)
if isinstance(model, UnstructuredObjectDetectionModel):
detection_model = model
Expand Down

0 comments on commit c337a95

Please sign in to comment.