Skip to content

Commit

Permalink
[Typing][A-83] Add type annotations for `paddle/vision/transforms/fun…
Browse files Browse the repository at this point in the history
…ctional.py` (PaddlePaddle#65560)


---------

Co-authored-by: SigureMo <sigure.qaq@gmail.com>
Co-authored-by: gouzi <530971494@qq.com>
  • Loading branch information
3 people authored Jun 30, 2024
1 parent 5cc9895 commit 8ff2ac1
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 48 deletions.
1 change: 1 addition & 0 deletions python/paddle/_typing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
TensorIndex as TensorIndex,
TensorLike as TensorLike,
TensorOrTensors as TensorOrTensors,
unreached as unreached,
)

# Device
Expand Down
9 changes: 8 additions & 1 deletion python/paddle/_typing/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

import numpy as np
import numpy.typing as npt
from typing_extensions import TypeAlias
from typing_extensions import Never, TypeAlias

from .backport import EllipsisType

Expand Down Expand Up @@ -63,3 +63,10 @@
ParamAttrLike: TypeAlias = Union[
"ParamAttr", "Initializer", "WeightDecayRegularizer", str, bool
]


def unreached() -> Never:
"""Mark a code path as unreachable.
Refer to https://typing.readthedocs.io/en/latest/source/unreachable.html#marking-code-as-unreachable
"""
raise RuntimeError("Unreachable code path")
2 changes: 1 addition & 1 deletion python/paddle/nn/layer/norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
DTypeLike,
ShapeLike,
)
from paddle.nn import Layer


__all__ = []

Expand Down
136 changes: 103 additions & 33 deletions python/paddle/vision/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import math
import numbers
from typing import TYPE_CHECKING, Any

import numpy as np
from PIL import Image

import paddle
from paddle._typing import unreached

from ...base.framework import Variable
from . import (
Expand All @@ -27,25 +31,49 @@
functional_tensor as F_t,
)

if TYPE_CHECKING:
from typing import Literal, TypeGuard, TypeVar, Union

import numpy.typing as npt
from PIL.Image import Image as PILImage
from typing_extensions import TypeAlias

from paddle import Tensor
from paddle._typing import DataLayoutImage, Size2, Size3, Size4

_InterpolationPil: TypeAlias = Literal[
"nearest", "bilinear", "bicubic", "lanczos", "hamming"
]
_InterpolationCv2: TypeAlias = Literal[
"nearest", "bilinear", "area", "bicubic", "lanczos"
]
_PaddingMode: TypeAlias = Literal[
"constant", "edge", "reflect", "symmetric"
]
_ImageDataT = TypeVar("_ImageDataT", Tensor, PILImage, npt.NDArray[Any])
_ImageDataType = Union[Tensor, PILImage, npt.NDArray[Any]]

__all__ = []


def _is_pil_image(img):
def _is_pil_image(img: _ImageDataType) -> TypeGuard[PILImage]:
return isinstance(img, Image.Image)


def _is_tensor_image(img):
def _is_tensor_image(img: _ImageDataType) -> TypeGuard[Tensor]:
"""
Return True if img is a Tensor for dynamic mode or Variable for static graph mode.
"""
return isinstance(img, (paddle.Tensor, Variable))


def _is_numpy_image(img):
def _is_numpy_image(img: _ImageDataType) -> TypeGuard[npt.NDArray[Any]]:
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})


def to_tensor(pic, data_format='CHW'):
def to_tensor(
pic: PILImage | npt.NDArray[Any], data_format: DataLayoutImage = 'CHW'
) -> Tensor:
"""Converts a ``PIL.Image`` or ``numpy.ndarray`` to paddle.Tensor.
Converts a PIL.Image or numpy.ndarray (H x W x C) to a paddle.Tensor of shape (C x H x W).
Expand Down Expand Up @@ -97,7 +125,11 @@ def to_tensor(pic, data_format='CHW'):
return pic if data_format.lower() == 'chw' else pic.transpose((1, 2, 0))


def resize(img, size, interpolation='bilinear'):
def resize(
img: _ImageDataT,
size: Size2,
interpolation: _InterpolationPil | _InterpolationCv2 = 'bilinear',
) -> _ImageDataT:
"""
Resizes the image to given size
Expand Down Expand Up @@ -153,7 +185,12 @@ def resize(img, size, interpolation='bilinear'):
return F_cv2.resize(img, size, interpolation)


def pad(img, padding, fill=0, padding_mode='constant'):
def pad(
img: _ImageDataT,
padding: Size2 | Size4,
fill: Size3 = 0,
padding_mode: _PaddingMode = 'constant',
) -> _ImageDataT:
"""
Pads the given PIL.Image or numpy.array or paddle.Tensor on all sides with specified padding mode and fill value.
Expand Down Expand Up @@ -217,7 +254,9 @@ def pad(img, padding, fill=0, padding_mode='constant'):
return F_cv2.pad(img, padding, fill, padding_mode)


def crop(img, top, left, height, width):
def crop(
img: _ImageDataT, top: int, left: int, height: int, width: int
) -> _ImageDataT:
"""Crops the given Image.
Args:
Expand Down Expand Up @@ -259,7 +298,7 @@ def crop(img, top, left, height, width):
return F_cv2.crop(img, top, left, height, width)


def center_crop(img, output_size):
def center_crop(img: _ImageDataT, output_size: Size2) -> _ImageDataT:
"""Crops the given Image and resize it to desired size.
Args:
Expand Down Expand Up @@ -297,7 +336,7 @@ def center_crop(img, output_size):
return F_cv2.center_crop(img, output_size)


def hflip(img):
def hflip(img: _ImageDataT) -> _ImageDataT:
"""Horizontally flips the given Image or np.array or paddle.Tensor.
Args:
Expand Down Expand Up @@ -334,7 +373,7 @@ def hflip(img):
return F_cv2.hflip(img)


def vflip(img):
def vflip(img: _ImageDataT) -> _ImageDataT:
"""Vertically flips the given Image or np.array or paddle.Tensor.
Args:
Expand Down Expand Up @@ -371,7 +410,9 @@ def vflip(img):
return F_cv2.vflip(img)


def adjust_brightness(img, brightness_factor):
def adjust_brightness(
img: _ImageDataT, brightness_factor: float
) -> _ImageDataT:
"""Adjusts brightness of an Image.
Args:
Expand Down Expand Up @@ -423,7 +464,7 @@ def adjust_brightness(img, brightness_factor):
return F_t.adjust_brightness(img, brightness_factor)


def adjust_contrast(img, contrast_factor):
def adjust_contrast(img: _ImageDataT, contrast_factor: float) -> _ImageDataT:
"""Adjusts contrast of an Image.
Args:
Expand Down Expand Up @@ -462,7 +503,9 @@ def adjust_contrast(img, contrast_factor):
return F_t.adjust_contrast(img, contrast_factor)


def adjust_saturation(img, saturation_factor):
def adjust_saturation(
img: _ImageDataT, saturation_factor: float
) -> _ImageDataT:
"""Adjusts color saturation of an image.
Args:
Expand Down Expand Up @@ -502,7 +545,7 @@ def adjust_saturation(img, saturation_factor):
return F_t.adjust_saturation(img, saturation_factor)


def adjust_hue(img, hue_factor):
def adjust_hue(img: _ImageDataT, hue_factor: float) -> _ImageDataT:
"""Adjusts hue of an image.
The image hue is adjusted by converting the image to HSV and
Expand Down Expand Up @@ -583,15 +626,15 @@ def _get_affine_matrix(center, angle, translate, scale, shear):


def affine(
img,
angle,
translate,
scale,
shear,
interpolation="nearest",
fill=0,
center=None,
):
img: _ImageDataT,
angle: float,
translate: list[float] | tuple[float, float],
scale: float,
shear: list[float] | tuple[float, float],
interpolation: _InterpolationPil | _InterpolationCv2 = "nearest",
fill: Size3 = 0,
center: list[float] | tuple[float, float] | None = None,
) -> _ImageDataT:
"""Apply affine transformation on the image.
Args:
Expand All @@ -613,7 +656,7 @@ def affine(
- "bicubic": cv2.INTER_CUBIC
fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
center (2-tuple, optional): Optional center of rotation, (x, y).
center (tuple|None, optional): Optional center of rotation, (x, y).
Origin is the upper left corner.
Default is the center of the image.
Expand Down Expand Up @@ -715,10 +758,17 @@ def affine(
matrix = _get_affine_matrix(center_f, angle, translate_f, scale, shear)
return F_t.affine(img, matrix, interpolation, fill)

unreached()


def rotate(
img, angle, interpolation="nearest", expand=False, center=None, fill=0
):
img: _ImageDataT,
angle: float,
interpolation: _InterpolationPil | _InterpolationCv2 = "nearest",
expand: bool = False,
center: list[float] | tuple[float, float] | None = None,
fill: Size3 = 0,
) -> _ImageDataT:
"""Rotates the image by angle.
Expand All @@ -739,10 +789,10 @@ def rotate(
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (2-list|2-tuple, optional): Optional center of rotation.
center (list|tuple|None, optional): Optional center of rotation.
Origin is the upper left corner.
Default is the center of the image.
fill (3-list|3-tuple or int, optional): RGB pixel fill value for area outside the rotated image.
fill (list|tuple or int, optional): RGB pixel fill value for area outside the rotated image.
If int, it is used for all channels respectively. Default value is 0.
Expand Down Expand Up @@ -827,7 +877,13 @@ def _get_perspective_coeffs(startpoints, endpoints):
return output


def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0):
def perspective(
img: _ImageDataT,
startpoints: list[list[int]],
endpoints: list[list[int]],
interpolation: _InterpolationPil | _InterpolationCv2 = 'nearest',
fill: Size3 = 0,
) -> _ImageDataT:
"""Perform perspective transform of the given image.
Args:
Expand Down Expand Up @@ -885,7 +941,7 @@ def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0):
)


def to_grayscale(img, num_output_channels=1):
def to_grayscale(img: _ImageDataT, num_output_channels: int = 1) -> _ImageDataT:
"""Converts image to grayscale version of image.
Args:
Expand Down Expand Up @@ -926,14 +982,20 @@ def to_grayscale(img, num_output_channels=1):
return F_cv2.to_grayscale(img, num_output_channels)


def normalize(img, mean, std, data_format='CHW', to_rgb=False):
def normalize(
img: _ImageDataT,
mean: list[float] | tuple[float, float, float],
std: list[float] | tuple[float, float, float],
data_format: DataLayoutImage = 'CHW',
to_rgb: bool = False,
) -> _ImageDataT:
"""Normalizes a tensor or image with mean and standard deviation.
Args:
img (PIL.Image|np.array|paddle.Tensor): input data to be normalized.
mean (list|tuple): Sequence of means for each channel.
std (list|tuple): Sequence of standard deviations for each channel.
data_format (str, optional): Data format of input img, should be 'HWC' or
data_format (str|None, optional): Data format of input img, should be 'HWC' or
'CHW'. Default: 'CHW'.
to_rgb (bool, optional): Whether to convert to rgb. If input is tensor,
this option will be ignored. Default: False.
Expand Down Expand Up @@ -966,7 +1028,15 @@ def normalize(img, mean, std, data_format='CHW', to_rgb=False):
return F_cv2.normalize(img, mean, std, data_format, to_rgb)


def erase(img, i, j, h, w, v, inplace=False):
def erase(
img: _ImageDataT,
i: int,
j: int,
h: int,
w: int,
v: npt.NDArray[Any] | Tensor,
inplace: bool = False,
) -> _ImageDataT:
"""Erase the pixels of selected area in input image with given value.
Args:
Expand Down
23 changes: 10 additions & 13 deletions python/paddle/vision/transforms/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,12 @@
_TransformInputKeys: TypeAlias = Sequence[
Literal["image", "coords", "boxes", "mask"]
]
_InterpolationPil: TypeAlias = Literal[
"nearest", "bilinear", "bicubic", "lanczos", "hamming"
]
_InterpolationCv2: TypeAlias = Literal[
"nearest", "bilinear", "area", "bicubic", "lanczos"
]
_PaddingMode: TypeAlias = Literal[
"constant", "edge", "reflect", "symmetric"
]
from .functional import (
_InterpolationCv2,
_InterpolationPil,
_PaddingMode,
)


_InputT = TypeVar(
"_InputT", "Tensor", "PILImage", "npt.NDArray[Any]", contravariant=True
Expand Down Expand Up @@ -1578,10 +1575,10 @@ class RandomAffine(BaseTransform[_InputT, _RetT]):
- "bicubic": cv2.INTER_CUBIC
fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
image. If given a number, the value is used for all bands respectively.
center (2-tuple, optional): Optional center of rotation, (x, y).
center (tuple|None, optional): Optional center of rotation, (x, y).
Origin is the upper left corner.
Default is the center of the image.
keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
keys (list[str]|tuple[str]|None, optional): Same as ``BaseTransform``. Default: None.
Shape:
- img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
Expand Down Expand Up @@ -1616,7 +1613,7 @@ class RandomAffine(BaseTransform[_InputT, _RetT]):
)
interpolation: _InterpolationPil | _InterpolationCv2
fill: Size3
center: list[float] | tuple[float, float]
center: list[float] | tuple[float, float] | None

def __init__(
self,
Expand All @@ -1632,7 +1629,7 @@ def __init__(
) = None,
interpolation: _InterpolationPil | _InterpolationCv2 = 'nearest',
fill: Size3 = 0,
center: list[float] | tuple[float, float] = None,
center: list[float] | tuple[float, float] | None = None,
keys: _TransformInputKeys | None = None,
) -> None:
self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,))
Expand Down

0 comments on commit 8ff2ac1

Please sign in to comment.