mindee · charlesmindee · Feb 14, 2022 · Jan 24, 2022 · Jan 25, 2022 · Jan 25, 2022
diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py
@@ -59,6 +59,8 @@ def forward(
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
 
+        origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages]
+
         # Detect document rotation and rotate pages
         if self.straighten_pages:
             origin_page_orientations = [estimate_orientation(page) for page in pages]
@@ -85,8 +87,10 @@ def forward(
         if self.straighten_pages:
             boxes = [rotate_boxes(page_boxes,
                                   angle,
-                                  orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:]
-                                  ) for page_boxes, page, angle in zip(boxes, pages, origin_page_orientations)]
+                                  orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:],
+                                  target_shape=mask) for
+                     page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations,
+                                                          origin_page_shapes)]
 
         out = self.doc_builder(
             boxes,

diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py
@@ -83,8 +83,12 @@ def __call__(
 
         # Rotate back pages and boxes while keeping original image size
         if self.straighten_pages:
-            boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2]) for
-                     page_boxes, page, angle in zip(boxes, pages, origin_page_orientations)]
+            boxes = [rotate_boxes(page_boxes,
+                                  angle,
+                                  orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:],
+                                  target_shape=mask) for
+                     page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations,
+                                                          origin_page_shapes)]
 
         out = self.doc_builder(boxes, text_preds, origin_page_shapes)  # type: ignore[misc]
         return out
diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py
@@ -4,7 +4,7 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
 
 from math import ceil
-from typing import List, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import cv2
 import numpy as np
@@ -127,11 +127,41 @@ def rotate_abs_geoms(
     return rotated_polys
 
 
+def remap_boxes(
+    loc_preds: np.ndarray,
+    orig_shape: Tuple[int, int],
+    dest_shape: Tuple[int, int]
+) -> np.ndarray:
+    """ Remaps a batch of rotated locpred (x, y, w, h, alpha, c) expressed for an origin_shape to a destination_shape.
+    This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox
+    coordinates after a resizing of the image.
+    Args:
+        loc_preds: (N, 6) array of RELATIVE locpred (x, y, w, h, alpha, c)
+        orig_shape: shape of the origin image
+        dest_shape: shape of the destination image
+    Returns:
+        A batch of rotated loc_preds (N, 6): (x, y, w, h, alpha, c) expressed in the destination referencial
+    """
+
+    if len(dest_shape) != 2:
+        raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
+    if len(orig_shape) != 2:
+        raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}")
+    orig_height, orig_width = orig_shape
+    dest_height, dest_width = dest_shape
+    mboxes = loc_preds.copy()
+    mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width
+    mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height
+
+    return mboxes
+
+
 def rotate_boxes(
     loc_preds: np.ndarray,
     angle: float,
     orig_shape: Tuple[int, int],
     min_angle: float = 1.,
+    target_shape: Optional[Tuple[int, int]] = None,
 ) -> np.ndarray:
     """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes
     (4, 2) of an angle, if angle > min_angle, around the center of the page.
@@ -176,6 +206,11 @@ def rotate_boxes(
     rotated_boxes = np.stack(
         (rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1
     )
+
+    # Apply a mask if requested
+    if target_shape is not None:
+        rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape)
+
     return rotated_boxes
 
 

diff --git a/tests/common/test_models.py b/tests/common/test_models.py
@@ -94,7 +94,7 @@ def test_get_bitmap_angle(mock_bitmap):
     assert abs(angle - 30.) < 1.
 
 
-def test_estimate_orientation(mock_image):
+def test_estimate_orientation(mock_image, mock_tilted_payslip):
     assert estimate_orientation(mock_image * 0) == 0
 
     angle = estimate_orientation(mock_image)
@@ -103,3 +103,10 @@ def test_estimate_orientation(mock_image):
     rotated = geometry.rotate_image(mock_image, -angle)
     angle_rotated = estimate_orientation(rotated)
     assert abs(angle_rotated) < 1.
+
+    mock_tilted_payslip = reader.read_img_as_numpy(mock_tilted_payslip)
+    assert (estimate_orientation(mock_tilted_payslip) - 30.) < 1.
+
+    rotated = geometry.rotate_image(mock_tilted_payslip, -30, expand=True)
+    angle_rotated = estimate_orientation(rotated)
+    assert abs(angle_rotated) < 1.
diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py
@@ -1,3 +1,5 @@
+from math import hypot
+
 import numpy as np
 import pytest
 
@@ -28,6 +30,50 @@ def test_resolve_enclosing_rbbox():
     assert np.all(target1 - pred <= 1e-3) or np.all(target2 - pred <= 1e-3)
 
 
+def test_remap_boxes():
+    pred = geometry.remap_boxes(np.asarray([[[.25, .25], [.25, .75], [.75, .25], [.75, .75]]]), (10, 10), (20, 20))
+    target = np.asarray([[[.375, .375], [.375, .625], [.625, .375], [.625, .625]]])
+    assert np.all(pred == target)
+
+    pred = geometry.remap_boxes(np.asarray([[[.25, .25], [.25, .75], [.75, .25], [.75, .75]]]), (10, 10), (20, 10))
+    target = np.asarray([[[0.25, 0.375],
+                          [0.25, 0.625],
+                          [0.75, 0.375],
+                          [0.75, 0.625]]])
+    assert np.all(pred == target)
+
+    with pytest.raises(ValueError):
+        geometry.remap_boxes(np.asarray([[[.25, .25], [.25, .75], [.75, .25], [.75, .75]]]), (80, 40, 150), (160, 40))
+
+    with pytest.raises(ValueError):
+        geometry.remap_boxes(np.asarray([[[.25, .25], [.25, .75], [.75, .25], [.75, .75]]]), (80, 40), (160,))
+
+    orig_dimension = (100, 100)
+    dest_dimensions = (200, 100)
+    # Unpack dimensions
+    height_o, width_o = orig_dimension
+    height_d, width_d = dest_dimensions
+
+    orig_box = np.asarray([[[0.25, 0.25],
+                            [0.25, 0.25],
+                            [0.75, 0.75],
+                            [0.75, 0.75]]])
+
+    pred = geometry.remap_boxes(orig_box, orig_dimension, dest_dimensions)
+
+    # Switch to absolute coords
+    orig = np.stack((orig_box[:, :, 0] * width_o, orig_box[:, :, 1] * height_o), axis=2)[0]
+    dest = np.stack((pred[:, :, 0] * width_d, pred[:, :, 1] * height_d), axis=2)[0]
+
+    len_orig = hypot(orig[0][0] - orig[2][0], orig[0][1] - orig[2][1])
+    len_dest = hypot(dest[0][0] - dest[2][0], dest[0][1] - dest[2][1])
+    assert len_orig == len_dest
+
+    alpha_orig = np.rad2deg(np.arctan((orig[0][1] - orig[2][1]) / (orig[0][0] - orig[2][0])))
+    alpha_dest = np.rad2deg(np.arctan((dest[0][1] - dest[2][1]) / (dest[0][0] - dest[2][0])))
+    assert alpha_orig == alpha_dest
+
+
 def test_rotate_boxes():
     boxes = np.array([[0.1, 0.1, 0.8, 0.3, 0.5]])
     rboxes = np.array([[0.1, 0.1], [0.8, 0.1], [0.8, 0.3], [0.1, 0.3]])

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -3,13 +3,17 @@
 import tempfile
 from io import BytesIO
 
+import cv2
 import fitz
 import hdf5storage
 import numpy as np
 import pytest
 import requests
 import scipy.io as sio
 
+from doctr.io import reader
+from doctr.utils import geometry
+
 
 @pytest.fixture(scope="session")
 def mock_vocab():
@@ -35,6 +39,26 @@ def mock_pdf(tmpdir_factory):
     return str(fn)
 
 
+@pytest.fixture(scope="session")
+def mock_payslip(tmpdir_factory):
+    url = 'https://3.bp.blogspot.com/-Es0oHTCrVEk/UnYA-iW9rYI/AAAAAAAAAFI/hWExrXFbo9U/s1600/003.jpg'
+    file = BytesIO(requests.get(url).content)
+    folder = tmpdir_factory.mktemp("data")
+    fn = str(folder.join("mock_payslip.jpeg"))
+    with open(fn, 'wb') as f:
+        f.write(file.getbuffer())
+    return fn
+
+
+@pytest.fixture(scope="session")
+def mock_tilted_payslip(mock_payslip, tmpdir_factory):
+    image = reader.read_img_as_numpy(mock_payslip)
+    image = geometry.rotate_image(image, 30, expand=True)
+    tmp_path = str(tmpdir_factory.mktemp("data").join("mock_tilted_payslip.jpg"))
+    cv2.imwrite(tmp_path, image)
+    return tmp_path
+
+
 @pytest.fixture(scope="session")
 def mock_text_box_stream():
     url = 'https://www.pngitem.com/pimgs/m/357-3579845_love-neon-loveislove-word-text-typography-freetoedit-picsart.png'

diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py
@@ -5,9 +5,11 @@
 from doctr.io import Document, DocumentFile
 from doctr.models import detection, recognition
 from doctr.models.detection.predictor import DetectionPredictor
+from doctr.models.detection.zoo import detection_predictor
 from doctr.models.predictor import OCRPredictor
 from doctr.models.preprocessor import PreProcessor
 from doctr.models.recognition.predictor import RecognitionPredictor
+from doctr.models.recognition.zoo import recognition_predictor
 from doctr.utils.repr import NestedObject
 
 
@@ -60,6 +62,36 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         _ = predictor([input_page])
 
 
+def test_trained_ocr_predictor(mock_tilted_payslip):
+    doc = DocumentFile.from_images(mock_tilted_payslip)
+
+    det_predictor = detection_predictor('db_resnet50', pretrained=True, batch_size=2, assume_straight_pages=True)
+    reco_predictor = recognition_predictor('crnn_vgg16_bn', pretrained=True, batch_size=128)
+
+    predictor = OCRPredictor(
+        det_predictor,
+        reco_predictor,
+        assume_straight_pages=True,
+        straighten_pages=True,
+    )
+
+    out = predictor(doc)
+
+    assert out.pages[0].blocks[0].lines[0].words[0].value == 'Mr.'
+    geometry_mr = np.array([[0.08844472, 0.35763523],
+                            [0.11625107, 0.34320644],
+                            [0.12588427, 0.35771032],
+                            [0.09807791, 0.37213911]])
+    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr)
+
+    assert out.pages[0].blocks[1].lines[0].words[-1].value == 'revised'
+    geometry_revised = np.array([[0.50422498, 0.19551784],
+                                 [0.55741975, 0.16791493],
+                                 [0.56705294, 0.18241881],
+                                 [0.51385817, 0.21002172]])
+    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised)
+
+
 @pytest.mark.parametrize(
     "det_arch, reco_arch",
     [