mindspore-lab · hadipash · Jul 3, 2023 · Jun 27, 2023 · Jun 27, 2023 · Jun 27, 2023
diff --git a/configs/det/dbnet/README.md b/configs/det/dbnet/README.md
@@ -67,7 +67,7 @@ DBNet and DBNet++ were trained on the ICDAR2015, MSRA-TD500, SCUT-CTW1500, Total
 
 | **Model**           | **Context**    | **Backbone**  | **Pretrained** | **Recall** | **Precision** | **F-score** | **Train T.** | **Throughput** | **Recipe**                          | **Download**                                                                                                                                                                                              |
 |---------------------|----------------|---------------|----------------|------------|---------------|-------------|--------------|----------------|-------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| DBNet               | D910x1-MS2.0-G | MobileNetV3   | ImageNet       | 76.26%     | 78.22%        | 77.23%      | 10 s/epoch   | 100 img/s      | [yaml](db_mobilenetv3_icdar15.yaml) | [ckpt](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-62c44539.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-62c44539-f14c6a13.mindir) |
+| DBNet               | D910x1-MS2.0-G | MobileNetV3   | ImageNet       | 76.31%     | 78.27%        | 77.28%      | 10 s/epoch   | 100 img/s      | [yaml](db_mobilenetv3_icdar15.yaml) | [ckpt](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-62c44539.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_mobilenetv3-62c44539-f14c6a13.mindir) |
 | DBNet               | D910x1-MS2.0-G | ResNet-18     | ImageNet       | 80.12%     | 83.41%        | 81.73%      | 9.3 s/epoch  | 108 img/s      | [yaml](db_r18_icdar15.yaml)         | [ckpt](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet18-0c0c4cfa.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet18-0c0c4cfa-cf46eb8b.mindir)       |
 | DBNet               | D910x1-MS2.0-G | ResNet-50     | ImageNet       | 83.53%     | 86.62%        | 85.05%      | 13.3 s/epoch | 75.2 img/s       | [yaml](db_r50_icdar15.yaml)         | [ckpt](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet50-c3a4aa24.ckpt) \| [mindir](https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet50-c3a4aa24-fbf95c82.mindir)       |
 |                     |                |               |                |            |               |             |              |                |                                     |                                                                                                                                                                                                           |

diff --git a/configs/det/dbnet/db++_r50_icdar15.yaml b/configs/det/dbnet/db++_r50_icdar15.yaml
@@ -137,8 +137,6 @@ eval:
           img_mode: RGB
           to_float32: False
       - DetLabelEncode:
-      #- ScalePadImage:
-      #    target_size: [ 1152, 2048 ] # h, w
       - DetResize:
           target_size: [ 1152, 2048]
           keep_ratio: True

diff --git a/configs/det/dbnet/db_mobilenetv3_icdar15.yaml b/configs/det/dbnet/db_mobilenetv3_icdar15.yaml
@@ -140,12 +140,11 @@ eval:
           img_mode: RGB
           to_float32: False
       - DetLabelEncode:
-      - GridResize:
-          factor: 32
-      # GridResize already sets the evaluation size to [ 736, 1280 ].
-      # Uncomment ScalePadImage block for other resolutions.
-#      - ScalePadImage:
-#          target_size: [ 736, 1280 ] # h, w
+      - DetResize:  # GridResize 32
+          target_size: [ 736, 1280 ]
+          keep_ratio: False
+          limit_type: None
+          divisor: 32
       - NormalizeImage:
           bgr_to_rgb: False
           is_hwc: True

diff --git a/configs/det/dbnet/db_r18_td500.yaml b/configs/det/dbnet/db_r18_td500.yaml
@@ -135,8 +135,12 @@ eval:
           img_mode: RGB
           to_float32: False
       - DetLabelEncode:
-      - ScalePadImage:
+      - DetResize:  #ScalePadImage
           target_size: [ 800, 800 ] # h, w
+          keep_ratio: True
+          padding: True
+          limit_type: None
+          force_divisable: False
       - NormalizeImage:
           bgr_to_rgb: False
           is_hwc: True

diff --git a/configs/det/dbnet/db_r50_icdar15.yaml b/configs/det/dbnet/db_r50_icdar15.yaml
@@ -133,16 +133,11 @@ eval:
           img_mode: RGB
           to_float32: False
       - DetLabelEncode:
-      #- GridResize:
-      #    factor: 32
-      # GridResize already sets the evaluation size to [ 736, 1280 ].
-      # Uncomment ScalePadImage block for other resolutions.
-#      - ScalePadImage:
-#          target_size: [ 736, 1280 ] # h, w
-      - DetResize:
+      - DetResize:  # GridResize 32
           target_size: [ 736, 1280 ]
           keep_ratio: False
-          force_divisable: True #  GridResize 32
+          limit_type: None
+          divisor: 32
       - NormalizeImage:
           bgr_to_rgb: False
           is_hwc: True
@@ -174,12 +169,11 @@ predict:
           img_mode: RGB
           to_float32: False
 #      - DetLabelEncode:
-      - GridResize:
-          factor: 32
-      # GridResize already sets the evaluation size to [ 736, 1280 ].
-      # Uncomment ScalePadImage block for other resolutions.
-#      - ScalePadImage:
-#          target_size: [ 736, 1280 ] # h, w
+      - DetResize:  # GridResize 32
+          target_size: [ 736, 1280 ]
+          keep_ratio: False
+          limit_type: None
+          divisor: 32
       - NormalizeImage:
           bgr_to_rgb: False
           is_hwc: True

diff --git a/configs/det/dbnet/db_r50_td500.yaml b/configs/det/dbnet/db_r50_td500.yaml
@@ -135,8 +135,12 @@ eval:
           img_mode: RGB
           to_float32: False
       - DetLabelEncode:
-      - ScalePadImage:
+      - DetResize:  # ScalePadImage
           target_size: [ 800, 800] # h, w
+          keep_ratio: True
+          padding: True
+          limit_type: None
+          force_divisable: False
       - NormalizeImage:
           bgr_to_rgb: False
           is_hwc: True

diff --git a/deploy/py_infer/src/configs/det/ppocr/ch_PP-OCRv2_det_cml.yaml b/deploy/py_infer/src/configs/det/ppocr/ch_PP-OCRv2_det_cml.yaml
@@ -12,7 +12,11 @@ eval:
       - DecodeImage:
           img_mode: BGR
           channel_first: False
-      - ScalePadImage:
+      - DetResize:  #ScalePadImage
+          keep_ratio: True
+          padding: True
+          limit_type: None
+          force_divisable: False
       - NormalizeImage:
           mean: [ 123.675, 116.28 , 103.53 ]
           std: [ 58.395, 57.12 , 57.375 ]

diff --git a/deploy/py_infer/src/configs/det/ppocr/ch_PP-OCRv3_det_cml.yaml b/deploy/py_infer/src/configs/det/ppocr/ch_PP-OCRv3_det_cml.yaml
@@ -12,7 +12,11 @@ eval:
       - DecodeImage: # load image
           img_mode: BGR
           channel_first: False
-      - ScalePadImage:
+      - DetResize:  #ScalePadImage
+          keep_ratio: True
+          padding: True
+          limit_type: None
+          force_divisable: False
       - NormalizeImage:
           mean: [ 123.675, 116.28 , 103.53 ]
           std: [ 58.395, 57.12 , 57.375 ]

diff --git a/deploy/py_infer/src/configs/det/ppocr/ch_det_mv3_db_v2.0.yaml b/deploy/py_infer/src/configs/det/ppocr/ch_det_mv3_db_v2.0.yaml
@@ -12,7 +12,11 @@ eval:
       - DecodeImage:
           img_mode: BGR
           channel_first: False
-      - ScalePadImage:
+      - DetResize:  #ScalePadImage
+          keep_ratio: True
+          padding: True
+          limit_type: None
+          force_divisable: False
       - NormalizeImage:
           mean: [ 123.675, 116.28 , 103.53 ]
           std: [ 58.395, 57.12 , 57.375 ]

diff --git a/deploy/py_infer/src/configs/det/ppocr/ch_det_res18_db_v2.0.yaml b/deploy/py_infer/src/configs/det/ppocr/ch_det_res18_db_v2.0.yaml
@@ -12,7 +12,11 @@ eval:
       - DecodeImage: # load image
           img_mode: BGR
           channel_first: False
-      - ScalePadImage:
+      - DetResize:  #ScalePadImage
+          keep_ratio: True
+          padding: True
+          limit_type: None
+          force_divisable: False
       - NormalizeImage:
           mean: [ 123.675, 116.28 , 103.53 ]
           std: [ 58.395, 57.12 , 57.375 ]

diff --git a/deploy/py_infer/src/configs/det/ppocr/det_r50_vd_db.yaml b/deploy/py_infer/src/configs/det/ppocr/det_r50_vd_db.yaml
@@ -12,7 +12,11 @@ eval:
       - DecodeImage:
           img_mode: BGR
           channel_first: False
-      - ScalePadImage:
+      - DetResize:  #ScalePadImage
+          keep_ratio: True
+          padding: True
+          limit_type: None
+          force_divisable: False
       - NormalizeImage:
           mean: [ 123.675, 116.28 , 103.53 ]
           std: [ 58.395, 57.12 , 57.375 ]

diff --git a/deploy/py_infer/src/configs/det/ppocr/det_r50_vd_east.yaml b/deploy/py_infer/src/configs/det/ppocr/det_r50_vd_east.yaml
@@ -12,7 +12,11 @@ eval:
       - DecodeImage:
           img_mode: BGR
           channel_first: False
-      - ScalePadImage:
+      - DetResize:  #ScalePadImage
+          keep_ratio: True
+          padding: True
+          limit_type: None
+          force_divisable: False
       - NormalizeImage:
           mean: [ 123.675, 116.28 , 103.53 ]
           std: [ 58.395, 57.12 , 57.375 ]

diff --git a/deploy/py_infer/src/data_process/preprocess/preprocess_mapping.py b/deploy/py_infer/src/data_process/preprocess/preprocess_mapping.py
@@ -1,5 +1,3 @@
-import functools
-
 from . import transforms
 
 # other ops node will be skipped
@@ -8,8 +6,6 @@
     "DecodeImage": transforms.DecodeImage,
     "NormalizeImage": transforms.NormalizeImage,
     "ToCHWImage": transforms.ToCHWImage,
-    "GridResize": functools.partial(transforms.DetResize, keep_ratio=False, padding=False),
-    "ScalePadImage": transforms.ScalePadImage,
     # det
     "DetResize": transforms.DetResize,
     "DetResizeNormForInfer": transforms.DetResizeNormForInfer,

diff --git a/deploy/py_infer/src/data_process/preprocess/transforms/det_transforms.py b/deploy/py_infer/src/data_process/preprocess/transforms/det_transforms.py
@@ -11,7 +11,7 @@
 
 from mindocr.data.transforms import det_transforms  # noqa
 
-__all__ = ["DetResize", "ScalePadImage", "DetResizeNormForInfer"]
+__all__ = ["DetResize", "DetResizeNormForInfer"]
 
 
 class DetResize(det_transforms.DetResize):
@@ -43,19 +43,6 @@ def __call__(self, data):
         return super().__call__(data)
 
 
-class ScalePadImage(det_transforms.ScalePadImage):
-    def __init__(self, **kwargs):
-        skipped = ("target_size",)
-        [kwargs.pop(name, None) for name in skipped]
-
-        super().__init__(target_size=None, **kwargs)
-
-    # move 'target_size' to __call__ from __init__
-    def __call__(self, data: dict):
-        self.target_size = data["target_size"]
-        return super().__call__(data)
-
-
 class DetResizeNormForInfer(object):
     def __init__(
         self,

diff --git a/mindocr/data/transforms/det_transforms.py b/mindocr/data/transforms/det_transforms.py
@@ -24,8 +24,6 @@
     "RandomCropWithBBox",
     "RandomCropWithMask",
     "DetResize",
-    "GridResize",
-    "ScalePadImage",
 ]
 _logger = Logger("mindocr")
 
@@ -381,9 +379,7 @@ class DetResize:
     Note:
         1. The default choices limit_type=min, with large `limit_side_len` are recommended for inference in detection
         for better accuracy,
-        2. If target_size set, keep_ratio=True, limit_type=null, padding=True, this transform works the same as
-        ScalePadImage,
-        3. If inference speed is the first priority to guarantee, you can set limit_type=max with a small
+        2. If inference speed is the first priority to guarantee, you can set limit_type=max with a small
         `limit_side_len` like 960.
     """
 
@@ -529,45 +525,6 @@ def __call__(self, data: dict) -> dict:
         return data
 
 
-class GridResize(DetResize):
-    """
-    Resize image to make it divisible by a specified factor exactly.
-    Resize polygons correspondingly, if provided.
-
-    Args:
-        factor: by which an image should be divisible.
-    """
-
-    def __init__(self, factor: int = 32, **kwargs):
-        super().__init__(
-            target_size=None,
-            keep_ratio=False,
-            padding=False,
-            limit_type=None,
-            force_divisable=True,
-            divisor=factor,
-        )
-
-
-class ScalePadImage(DetResize):
-    """
-    Scale image and polys by the shorter side, then pad to the target_size.
-    input image format: hwc
-
-    Args:
-        target_size: [H, W] of the output image.
-    """
-
-    def __init__(self, target_size: list, **kwargs):
-        super().__init__(
-            target_size=target_size,
-            keep_ratio=True,
-            padding=True,
-            limit_type=None,
-            force_divisable=False,
-        )
-
-
 def expand_poly(poly, distance: float, joint_type=pyclipper.JT_ROUND) -> List[list]:
     offset = pyclipper.PyclipperOffset()
     offset.AddPath(poly, joint_type, pyclipper.ET_CLOSEDPOLYGON)

diff --git a/mindocr/data/transforms/transforms_factory.py b/mindocr/data/transforms/transforms_factory.py
@@ -116,8 +116,7 @@ def transforms_dbnet_icdar15(phase="train"):
         pipeline = [
             {"DecodeImage": {"img_mode": "RGB", "to_float32": False}},
             {"DetLabelEncode": None},
-            {"GridResize": {"factor": 32}},
-            {"ScalePadImage": {"target_size": [736, 1280]}},
+            {"DetResize": {"target_size": [736, 1280], "keep_ratio": False, "limit_type": None, "divisor": 32}},
             {
                 "NormalizeImage": {
                     "bgr_to_rgb": False,
@@ -131,8 +130,9 @@ def transforms_dbnet_icdar15(phase="train"):
     else:
         pipeline = [
             {"DecodeImage": {"img_mode": "RGB", "to_float32": False}},
-            {"GridResize": {"factor": 32}},
-            {"ScalePadImage": {"target_size": [736, 1280]}},
+            {
+                "DetResize": {"target_size": [736, 1280], "keep_ratio": False, "limit_type": None, "divisor": 32}
+            },  # GridResize
             {
                 "NormalizeImage": {
                     "bgr_to_rgb": False,