pytorch · lezwon · Mar 6, 2022 · Mar 6, 2022 · Mar 6, 2022 · Mar 7, 2022
diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
@@ -69,6 +69,7 @@ class TestSmoke:
         transforms.HorizontalFlip(),
         transforms.Resize([16, 16]),
         transforms.CenterCrop([16, 16]),
+        transforms.RandomCrop([16, 16], pad_if_needed=True),
         transforms.ConvertImageDtype(),
     )
     def test_common(self, transform, input):

diff --git a/torchvision/prototype/transforms/__init__.py b/torchvision/prototype/transforms/__init__.py
@@ -7,7 +7,7 @@
 from ._augment import RandomErasing, RandomMixup, RandomCutmix
 from ._auto_augment import RandAugment, TrivialAugmentWide, AutoAugment, AugMix
 from ._container import Compose, RandomApply, RandomChoice, RandomOrder
-from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop, FiveCrop, TenCrop, BatchMultiCrop
+from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop, FiveCrop, TenCrop, BatchMultiCrop, RandomCrop
 from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace
 from ._misc import Identity, Normalize, ToDtype, Lambda
 from ._presets import (

diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
@@ -1,7 +1,7 @@
 import collections.abc
 import math
 import warnings
-from typing import Any, Dict, List, Union, Sequence, Tuple, cast
+from typing import Any, Dict, List, Union, Sequence, Tuple, cast, Literal, Optional
 
 import PIL.Image
 import torch
@@ -256,3 +256,96 @@ def apply_recursively(obj: Any) -> Any:
                 return obj
 
         return apply_recursively(inputs if len(inputs) > 1 else inputs[0])
+
+
+class RandomCrop(Transform):
+    def __init__(
+        self,
+        size: Union[int, Sequence[int]],
+        padding: Optional[Sequence[int]] = None,
+        pad_if_needed: bool = False,
+        fill: Union[int, str, Sequence[int]] = 0,
+        padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
+    ) -> None:
+        super().__init__()
+        self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
+
+        self.padding = padding
+        self.pad_if_needed = pad_if_needed
+        self.fill = fill
+        self.padding_mode = padding_mode
+
+    def _get_crop_parameters(self, image: Any) -> Dict[str, Any]:
+        """Get parameters for ``crop`` for a random crop.
+        Args:
+            sample (PIL Image, Tensor or features.Image): Image to be cropped.
+        Returns:
+            dict: Dict containing 'top', 'left', 'height', and 'width'
+        """
+
+        _, h, w = get_image_dimensions(image)
+
+        th, tw = self.size
+
+        if h + 1 < th or w + 1 < tw:
+            raise ValueError(f"Required crop size {(th, tw)} is larger then input image size {(h, w)}")
+
+        if w == tw and h == th:
+            return dict(top=0, left=0, height=h, width=w)
+
+        i = torch.randint(0, h - th + 1, size=(1,)).item()
+        j = torch.randint(0, w - tw + 1, size=(1,)).item()
+        return dict(top=i, left=j, height=th, width=tw)
+
+    def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
+
+        if isinstance(input, features.Image):
+            output = F.random_pad_image_tensor(
+                input,
+                output_size=self.size,
+                image_size=get_image_dimensions(input),
+                padding=cast(List[int], tuple(self.padding)),
+                pad_if_needed=self.pad_if_needed,
+                fill=self.fill,
+                padding_mode=self.padding_mode,
+            )
+            input = features.Image.new_like(input, output)
+        elif isinstance(input, PIL.Image.Image):
+            input = F.random_pad_image_pil(
+                input,
+                output_size=self.size,
+                image_size=get_image_dimensions(input),
+                padding=self.padding,
+                pad_if_needed=self.pad_if_needed,
+                fill=self.fill,
+                padding_mode=self.padding_mode,
+            )
+        elif is_simple_tensor(input):
+            input = F.random_pad_image_tensor(
+                input,
+                output_size=self.size,
+                image_size=get_image_dimensions(input),
+                padding=self.padding,
+                pad_if_needed=self.pad_if_needed,
+                fill=self.fill,  # TODO: should be converted to number
+                padding_mode=self.padding_mode,
+            )
+
+        params.update(self._get_crop_parameters(input))
+
+        if isinstance(input, features.Image):
+            output = F.crop_image_tensor(input, **params)
+            return features.Image.new_like(input, output)
+        elif isinstance(input, PIL.Image.Image):
+            return F.crop_image_pil(input, **params)
+        elif is_simple_tensor(input):
+            return F.crop_image_tensor(input, **params)
+        else:
+            return input
+
+    def forward(self, *inputs: Any) -> Any:
+        sample = inputs if len(inputs) > 1 else inputs[0]
+        if has_any(sample, features.BoundingBox, features.SegmentationMask):
+            raise TypeError(f"BoundingBox'es and SegmentationMask's are not supported by {type(self).__name__}()")
+
+        return super().forward(sample)
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
@@ -48,6 +48,8 @@
     center_crop_image_pil,
     resized_crop_image_tensor,
     resized_crop_image_pil,
+    random_pad_image_tensor,
+    random_pad_image_pil,
     affine_image_tensor,
     affine_image_pil,
     rotate_image_tensor,

diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -1,5 +1,5 @@
 import numbers
-from typing import Tuple, List, Optional, Sequence, Union
+from typing import Tuple, List, Optional, Sequence, Union, Literal
 
 import PIL.Image
 import torch
@@ -390,3 +390,113 @@ def ten_crop_image_pil(img: PIL.Image.Image, size: List[int], vertical_flip: boo
     tl_flip, tr_flip, bl_flip, br_flip, center_flip = five_crop_image_pil(img, size)
 
     return [tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip]
+
+
+def random_crop_image_tensor(
+    img: torch.Tensor,
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+    size: List[int],
+    padding: Optional[List[int]] = None,
+    pad_if_needed: bool = False,
+    fill: int = 0,
+    padding_mode: str = "constant",
+) -> torch.Tensor:
+
+    if padding is not None:
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+
+    _, h, w = get_dimensions_image_tensor(img)
+
+    # pad the width if needed
+    if pad_if_needed and w < size[1]:
+        padding = [size[1] - w, 0]
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+
+    # pad the height if needed
+    if pad_if_needed and h < size[0]:
+        padding = [0, size[0] - h]
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+
+    return crop_image_tensor(img, top, left, height, width)
+
+
+def random_crop_image_pil(
+    img: PIL.Image.Image,
+    top: int,
+    left: int,
+    height: int,
+    width: int,
+    size: List[int],
+    padding: Optional[List[int]] = None,
+    pad_if_needed: bool = False,
+    fill: int = 0,
+    padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
+) -> PIL.Image.Image:
+    if padding is not None:
+        img = pad_image_pil(img, padding, fill, padding_mode)
+
+    _, h, w = get_dimensions_image_pil(img)
+
+    # pad the width if needed
+    if pad_if_needed and w < size[1]:
+        padding = [size[1] - w, 0]
+        img = pad_image_pil(img, padding, fill, padding_mode)
+
+    # pad the height if needed
+    if pad_if_needed and h < size[0]:
+        padding = [0, size[0] - h]
+        img = pad_image_pil(img, padding, fill, padding_mode)
+
+    return crop_image_pil(img, top, left, height, width)
+
+
+def random_pad_image_tensor(
 if self.padding is not None: 
     img = F.pad(img, self.padding, self.fill, self.padding_mode) 
 _, height, width = F.get_dimensions(img) 
 # pad the width if needed 
 if self.pad_if_needed and width < self.size[1]: 
     padding = [self.size[1] - width, 0] 
     img = F.pad(img, padding, self.fill, self.padding_mode) 
 # pad the height if needed 
 if self.pad_if_needed and height < self.size[0]: 
     padding = [0, self.size[0] - height] 
     img = F.pad(img, padding, self.fill, self.padding_mode) 
 if self.padding is not None: 
     img = F.pad(img, self.padding, self.fill, self.padding_mode) 
  
 _, height, width = F.get_dimensions(img) 
 # pad the width if needed 
 if self.pad_if_needed and width < self.size[1]: 
     padding = [self.size[1] - width, 0] 
     img = F.pad(img, padding, self.fill, self.padding_mode) 
 # pad the height if needed 
 if self.pad_if_needed and height < self.size[0]: 
     padding = [0, self.size[0] - height] 
     img = F.pad(img, padding, self.fill, self.padding_mode) 
+    img: torch.Tensor,
+    output_size: List[int],
+    image_size: Tuple[int, int, int],
+    padding: Optional[Sequence[int]] = None,
+    pad_if_needed: bool = False,
+    fill: int = 0,
+    padding_mode: str = "constant",
+) -> torch.Tensor:
+    _, height, width = image_size
+
+    if padding is not None:
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+    # pad the width if needed
+    if pad_if_needed and width < output_size[1]:
+        padding = [output_size[1] - width, 0]
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+    # pad the height if needed
+    if pad_if_needed and height < output_size[0]:
+        padding = [0, output_size[0] - height]
+        img = pad_image_tensor(img, padding, fill, padding_mode)
+    return img
+
+
+def random_pad_image_pil(
+    img: PIL.Image.Image,
+    output_size: List[int],
+    image_size: Tuple[int, int, int],
+    padding: Optional[Sequence[int]] = None,
+    pad_if_needed: bool = False,
+    fill: Union[int, str, Sequence[int]] = 0,
+    padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
+) -> PIL.Image.Image:
+
+    _, height, width = image_size
+
+    if padding is not None:
+        img = pad_image_pil(img, padding, fill, padding_mode)
+    # pad the width if needed
+    if pad_if_needed and width < output_size[1]:
+        padding = [output_size[1] - width, 0]
+        img = pad_image_pil(img, padding, fill, padding_mode)
+    # pad the height if needed
+    if pad_if_needed and height < output_size[0]:
+        padding = [0, output_size[0] - height]
+        img = pad_image_pil(img, padding, fill, padding_mode)
+    return img