Skip to content

[prototype] Add support of inplace on convert_format_bounding_box #6858

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 31, 2022
Merged
2 changes: 1 addition & 1 deletion torchvision/prototype/transforms/_augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def _copy_paste(
# https://github.com/pytorch/vision/blob/b6feccbc4387766b76a3e22b13815dbbbfa87c0f/torchvision/models/detection/roi_heads.py#L418-L422
xyxy_boxes[:, 2:] += 1
boxes = F.convert_format_bounding_box(
xyxy_boxes, old_format=features.BoundingBoxFormat.XYXY, new_format=bbox_format
xyxy_boxes, old_format=features.BoundingBoxFormat.XYXY, new_format=bbox_format, inplace=True
)
out_target["boxes"] = torch.cat([boxes, paste_boxes])

Expand Down
11 changes: 9 additions & 2 deletions torchvision/prototype/transforms/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,9 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
continue

# check for any valid boxes with centers within the crop area
xyxy_bboxes = F.convert_format_bounding_box(bboxes, bboxes.format, features.BoundingBoxFormat.XYXY)
xyxy_bboxes = F.convert_format_bounding_box(
bboxes.as_subclass(torch.Tensor), bboxes.format, features.BoundingBoxFormat.XYXY
)
cx = 0.5 * (xyxy_bboxes[..., 0] + xyxy_bboxes[..., 2])
cy = 0.5 * (xyxy_bboxes[..., 1] + xyxy_bboxes[..., 3])
is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom)
Expand Down Expand Up @@ -799,7 +801,12 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
if needs_crop and bounding_boxes is not None:
format = bounding_boxes.format
bounding_boxes, spatial_size = F.crop_bounding_box(
bounding_boxes, format=format, top=top, left=left, height=new_height, width=new_width
bounding_boxes.as_subclass(torch.Tensor),
format=format,
top=top,
left=left,
height=new_height,
width=new_width,
)
bounding_boxes = F.clamp_bounding_box(bounding_boxes, format=format, spatial_size=spatial_size)
height_and_width = F.convert_format_bounding_box(
Expand Down
4 changes: 3 additions & 1 deletion torchvision/prototype/transforms/_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
# format,we need to convert first just to afterwards compute the width and height again, although they were
# there in the first place for these formats.
bounding_box = F.convert_format_bounding_box(
bounding_box, old_format=bounding_box.format, new_format=features.BoundingBoxFormat.XYXY
bounding_box.as_subclass(torch.Tensor),
old_format=bounding_box.format,
new_format=features.BoundingBoxFormat.XYXY,
)
valid_indices = remove_small_boxes(bounding_box, min_size=self.min_size)

Expand Down
38 changes: 17 additions & 21 deletions torchvision/prototype/transforms/functional/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,14 @@ def horizontal_flip_bounding_box(

# TODO: Investigate if it makes sense from a performance perspective to have an implementation for every
# BoundingBoxFormat instead of converting back and forth
bounding_box = (
bounding_box.clone()
if format == features.BoundingBoxFormat.XYXY
else convert_format_bounding_box(bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY)
bounding_box = convert_format_bounding_box(
bounding_box.clone(), old_format=format, new_format=features.BoundingBoxFormat.XYXY, inplace=True
).reshape(-1, 4)

bounding_box[:, [0, 2]] = spatial_size[1] - bounding_box[:, [2, 0]]

return convert_format_bounding_box(
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, inplace=True
).reshape(shape)


Expand Down Expand Up @@ -79,16 +77,14 @@ def vertical_flip_bounding_box(

# TODO: Investigate if it makes sense from a performance perspective to have an implementation for every
# BoundingBoxFormat instead of converting back and forth
bounding_box = (
bounding_box.clone()
if format == features.BoundingBoxFormat.XYXY
else convert_format_bounding_box(bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY)
bounding_box = convert_format_bounding_box(
bounding_box.clone(), old_format=format, new_format=features.BoundingBoxFormat.XYXY, inplace=True
).reshape(-1, 4)

bounding_box[:, [1, 3]] = spatial_size[0] - bounding_box[:, [3, 1]]

return convert_format_bounding_box(
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, inplace=True
).reshape(shape)


Expand Down Expand Up @@ -412,7 +408,7 @@ def affine_bounding_box(
# out_bboxes should be of shape [N boxes, 4]

return convert_format_bounding_box(
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format, inplace=True
).reshape(original_shape)


Expand Down Expand Up @@ -594,9 +590,9 @@ def rotate_bounding_box(
)

return (
convert_format_bounding_box(out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format).reshape(
original_shape
),
convert_format_bounding_box(
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format, inplace=True
).reshape(original_shape),
spatial_size,
)

Expand Down Expand Up @@ -815,18 +811,18 @@ def crop_bounding_box(
) -> Tuple[torch.Tensor, Tuple[int, int]]:
# TODO: Investigate if it makes sense from a performance perspective to have an implementation for every
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we don't have every conversion. I remember when adding I had discussion with Francisco that all conversions can simply go through intermediate xyxy conversion for sake of less complications.

# BoundingBoxFormat instead of converting back and forth
bounding_box = (
bounding_box.clone()
if format == features.BoundingBoxFormat.XYXY
else convert_format_bounding_box(bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY)
bounding_box = convert_format_bounding_box(
bounding_box.clone(), old_format=format, new_format=features.BoundingBoxFormat.XYXY, inplace=True
)

# Crop or implicit pad if left and/or top have negative values:
bounding_box[..., 0::2] -= left
bounding_box[..., 1::2] -= top

return (
convert_format_bounding_box(bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format),
convert_format_bounding_box(
bounding_box, old_format=features.BoundingBoxFormat.XYXY, new_format=format, inplace=True
),
(height, width),
)

Expand Down Expand Up @@ -964,7 +960,7 @@ def perspective_bounding_box(
# out_bboxes should be of shape [N boxes, 4]

return convert_format_bounding_box(
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format, inplace=True
).reshape(original_shape)


Expand Down Expand Up @@ -1085,7 +1081,7 @@ def elastic_bounding_box(
out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1).to(bounding_box.dtype)

return convert_format_bounding_box(
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format, inplace=True
).reshape(original_shape)


Expand Down
63 changes: 35 additions & 28 deletions torchvision/prototype/transforms/functional/_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,51 +119,60 @@ def get_num_frames(inpt: features.VideoTypeJIT) -> int:
raise TypeError(f"The video should be a Tensor. Got {type(inpt)}")


def _xywh_to_xyxy(xywh: torch.Tensor) -> torch.Tensor:
xyxy = xywh.clone()
def _xywh_to_xyxy(xywh: torch.Tensor, inplace: bool) -> torch.Tensor:
xyxy = xywh if inplace else xywh.clone()
xyxy[..., 2:] += xyxy[..., :2]
return xyxy


def _xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor:
xywh = xyxy.clone()
def _xyxy_to_xywh(xyxy: torch.Tensor, inplace: bool) -> torch.Tensor:
xywh = xyxy if inplace else xyxy.clone()
xywh[..., 2:] -= xywh[..., :2]
return xywh


def _cxcywh_to_xyxy(cxcywh: torch.Tensor) -> torch.Tensor:
cx, cy, w, h = torch.unbind(cxcywh, dim=-1)
x1 = cx - 0.5 * w
y1 = cy - 0.5 * h
x2 = cx + 0.5 * w
y2 = cy + 0.5 * h
return torch.stack((x1, y1, x2, y2), dim=-1).to(cxcywh.dtype)
def _cxcywh_to_xyxy(cxcywh: torch.Tensor, inplace: bool) -> torch.Tensor:
if not inplace:
cxcywh = cxcywh.clone()

# Trick to do fast division by 2 and ceil, without casting. It produces the same result as
# `torchvision.ops._box_convert._box_cxcywh_to_xyxy`.
half_wh = cxcywh[..., 2:].div(-2, rounding_mode=None if cxcywh.is_floating_point() else "floor").abs_()
Copy link
Contributor Author

@datumbox datumbox Oct 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a trick to do division by 2 and ceil if we deal with integers. Unfortunately rounding_mode doesn't support ceiling so I'm doing division by negatives and flooring + abs which has the same effect. This trick is slower than just doing simple division by 2 and floor, but the faster version leads to a 1 pixel misalignment with the existing behaviour in TorchVision. Obviously since this was never released, we could have just implemented the faster version. But because we already offer operators at torchvision.ops with the same behaviour, I opted to align with them. See detailed tests above for examples.

For the record the faster version is:

half_wh = cxcywh[..., 2:].div(2, rounding_mode=None if cxcywh.is_floating_point() else "floor")

# (cx - width / 2) = x1, same for y1
cxcywh[..., :2].sub_(half_wh)
# (x1 + width) = x2, same for y2
cxcywh[..., 2:].add_(cxcywh[..., :2])

def _xyxy_to_cxcywh(xyxy: torch.Tensor) -> torch.Tensor:
x1, y1, x2, y2 = torch.unbind(xyxy, dim=-1)
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
return torch.stack((cx, cy, w, h), dim=-1).to(xyxy.dtype)
return cxcywh


def _xyxy_to_cxcywh(xyxy: torch.Tensor, inplace: bool) -> torch.Tensor:
if not inplace:
xyxy = xyxy.clone()

# (x2 - x1) = width, same for height
xyxy[..., 2:].sub_(xyxy[..., :2])
# (x1 * 2 + width) / 2 = x1 + width / 2 = x1 + (x2-x1)/2 = (x1 + x2)/2 = cx, same for cy
xyxy[..., :2].mul_(2).add_(xyxy[..., 2:]).div_(2, rounding_mode=None if xyxy.is_floating_point() else "floor")

return xyxy


def convert_format_bounding_box(
bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat
bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, inplace: bool = False
) -> torch.Tensor:
if new_format == old_format:
return bounding_box

if old_format == BoundingBoxFormat.XYWH:
bounding_box = _xywh_to_xyxy(bounding_box)
bounding_box = _xywh_to_xyxy(bounding_box, inplace)
elif old_format == BoundingBoxFormat.CXCYWH:
bounding_box = _cxcywh_to_xyxy(bounding_box)
bounding_box = _cxcywh_to_xyxy(bounding_box, inplace)

if new_format == BoundingBoxFormat.XYWH:
bounding_box = _xyxy_to_xywh(bounding_box)
bounding_box = _xyxy_to_xywh(bounding_box, inplace)
elif new_format == BoundingBoxFormat.CXCYWH:
bounding_box = _xyxy_to_cxcywh(bounding_box)
bounding_box = _xyxy_to_cxcywh(bounding_box, inplace)

return bounding_box

Expand All @@ -173,14 +182,12 @@ def clamp_bounding_box(
) -> torch.Tensor:
# TODO: Investigate if it makes sense from a performance perspective to have an implementation for every
# BoundingBoxFormat instead of converting back and forth
xyxy_boxes = (
bounding_box.clone()
if format == BoundingBoxFormat.XYXY
else convert_format_bounding_box(bounding_box, format, BoundingBoxFormat.XYXY)
xyxy_boxes = convert_format_bounding_box(
bounding_box.clone(), old_format=format, new_format=features.BoundingBoxFormat.XYXY, inplace=True
)
xyxy_boxes[..., 0::2].clamp_(min=0, max=spatial_size[1])
xyxy_boxes[..., 1::2].clamp_(min=0, max=spatial_size[0])
return convert_format_bounding_box(xyxy_boxes, BoundingBoxFormat.XYXY, format)
return convert_format_bounding_box(xyxy_boxes, old_format=BoundingBoxFormat.XYXY, new_format=format, inplace=True)


def _strip_alpha(image: torch.Tensor) -> torch.Tensor:
Expand Down