From 9fa8000dff145a5f5500b3f40d444adddb9c2ace Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 28 Jan 2022 14:15:48 +0000 Subject: [PATCH 1/3] Add support for flow batches in flow_to_image (#5308) --- test/test_utils.py | 37 +++++++++++++++++++++++++------------ torchvision/utils.py | 37 +++++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8367deb7c4a..ebe35a8cd14 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -317,29 +317,42 @@ def test_draw_keypoints_errors(): utils.draw_keypoints(image=img, keypoints=invalid_keypoints) -def test_flow_to_image(): +@pytest.mark.parametrize("batch", (True, False)) +def test_flow_to_image(batch): h, w = 100, 100 flow = torch.meshgrid(torch.arange(h), torch.arange(w), indexing="ij") flow = torch.stack(flow[::-1], dim=0).float() flow[0] -= h / 2 flow[1] -= w / 2 + + if batch: + flow = torch.stack([flow, flow]) + img = utils.flow_to_image(flow) + assert img.shape == (2, 3, h, w) if batch else (3, h, w) + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "expected_flow.pt") expected_img = torch.load(path, map_location="cpu") - assert_equal(expected_img, img) + if batch: + expected_img = torch.stack([expected_img, expected_img]) + + assert_equal(expected_img, img) -def test_flow_to_image_errors(): - wrong_flow1 = torch.full((3, 10, 10), 0, dtype=torch.float) - wrong_flow2 = torch.full((2, 10), 0, dtype=torch.float) - wrong_flow3 = torch.full((2, 10, 30), 0, dtype=torch.int) - with pytest.raises(ValueError, match="Input flow should have shape"): - utils.flow_to_image(flow=wrong_flow1) - with pytest.raises(ValueError, match="Input flow should have shape"): - utils.flow_to_image(flow=wrong_flow2) - with pytest.raises(ValueError, match="Flow should be of dtype torch.float"): - utils.flow_to_image(flow=wrong_flow3) +@pytest.mark.parametrize( + "input_flow, match", + ( + (torch.full((3, 10, 10), 0, dtype=torch.float), "Input flow should have shape"), + (torch.full((5, 3, 10, 10), 0, dtype=torch.float), "Input flow should have shape"), + (torch.full((2, 10), 0, dtype=torch.float), "Input flow should have shape"), + (torch.full((5, 2, 10), 0, dtype=torch.float), "Input flow should have shape"), + (torch.full((2, 10, 30), 0, dtype=torch.int), "Flow should be of dtype torch.float"), + ), +) +def test_flow_to_image_errors(input_flow, match): + with pytest.raises(ValueError, match=match): + utils.flow_to_image(flow=input_flow) if __name__ == "__main__": diff --git a/torchvision/utils.py b/torchvision/utils.py index 091d0b10726..855f132d645 100644 --- a/torchvision/utils.py +++ b/torchvision/utils.py @@ -397,42 +397,51 @@ def flow_to_image(flow: torch.Tensor) -> torch.Tensor: Converts a flow to an RGB image. Args: - flow (Tensor): Flow of shape (2, H, W) and dtype torch.float. + flow (Tensor): Flow of shape (N, 2, H, W) or (2, H, W) and dtype torch.float. Returns: - img (Tensor(3, H, W)): Image Tensor of dtype uint8 where each color corresponds to a given flow direction. + img (Tensor): Image Tensor of dtype uint8 where each color corresponds + to a given flow direction. Shape is (N, 3, H, W) or (3, H, W) depending on the input. """ if flow.dtype != torch.float: raise ValueError(f"Flow should be of dtype torch.float, got {flow.dtype}.") - if flow.ndim != 3 or flow.size(0) != 2: - raise ValueError(f"Input flow should have shape (2, H, W), got {flow.shape}.") + orig_shape = flow.shape + if flow.ndim == 3: + flow = flow[None] # Add batch dim - max_norm = torch.sum(flow ** 2, dim=0).sqrt().max() + if flow.ndim != 4 or flow.shape[1] != 2: + raise ValueError(f"Input flow should have shape (2, H, W) or (N, 2, H, W), got {orig_shape}.") + + max_norm = torch.sum(flow ** 2, dim=1).sqrt().max() epsilon = torch.finfo((flow).dtype).eps normalized_flow = flow / (max_norm + epsilon) - return _normalized_flow_to_image(normalized_flow) + img = _normalized_flow_to_image(normalized_flow) + + if len(orig_shape) == 3: + img = img[0] # Remove batch dim + return img @torch.no_grad() def _normalized_flow_to_image(normalized_flow: torch.Tensor) -> torch.Tensor: """ - Converts a normalized flow to an RGB image. + Converts a batch of normalized flow to an RGB image. Args: - normalized_flow (torch.Tensor): Normalized flow tensor of shape (2, H, W) + normalized_flow (torch.Tensor): Normalized flow tensor of shape (N, 2, H, W) Returns: - img (Tensor(3, H, W)): Flow visualization image of dtype uint8. + img (Tensor(N, 3, H, W)): Flow visualization image of dtype uint8. """ - _, H, W = normalized_flow.shape - flow_image = torch.zeros((3, H, W), dtype=torch.uint8) + N, _, H, W = normalized_flow.shape + flow_image = torch.zeros((N, 3, H, W), dtype=torch.uint8) colorwheel = _make_colorwheel() # shape [55x3] num_cols = colorwheel.shape[0] - norm = torch.sum(normalized_flow ** 2, dim=0).sqrt() - a = torch.atan2(-normalized_flow[1], -normalized_flow[0]) / torch.pi + norm = torch.sum(normalized_flow ** 2, dim=1).sqrt() + a = torch.atan2(-normalized_flow[:, 1, :, :], -normalized_flow[:, 0, :, :]) / torch.pi fk = (a + 1) / 2 * (num_cols - 1) k0 = torch.floor(fk).to(torch.long) k1 = k0 + 1 @@ -445,7 +454,7 @@ def _normalized_flow_to_image(normalized_flow: torch.Tensor) -> torch.Tensor: col1 = tmp[k1] / 255.0 col = (1 - f) * col0 + f * col1 col = 1 - norm * (1 - col) - flow_image[c, :, :] = torch.floor(255 * col) + flow_image[:, c, :, :] = torch.floor(255 * col) return flow_image From 7d868aa608b94d022c42357ede40bda06af942f4 Mon Sep 17 00:00:00 2001 From: Yiwen Song <34639474+sallysyw@users.noreply.github.com> Date: Fri, 28 Jan 2022 22:00:41 -0800 Subject: [PATCH 2/3] [ViT] Adding conv_stem support (#5226) * Adding conv_stem support * fix lint * bug fix * address comments * fix after merge * adding back checking lines * fix failing tests * fix iignore * add unittest & address comments * fix memory issue * address comments --- .../ModelTester.test_vitc_b_16_expect.pkl | Bin 0 -> 939 bytes test/test_models.py | 30 ++++++++++ torchvision/models/vision_transformer.py | 56 +++++++++++++++--- 3 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 test/expect/ModelTester.test_vitc_b_16_expect.pkl diff --git a/test/expect/ModelTester.test_vitc_b_16_expect.pkl b/test/expect/ModelTester.test_vitc_b_16_expect.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1f846beb6a0bccf8b545f5a67b74482015cc878b GIT binary patch literal 939 zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW zr zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5(jATumGUYDqB%_@&wQ~AdEY-_!+F>p;eYzR1Ay-Hz#ul>i!MRpZGie3qz3t@Vp zVG!WW#-;;RB*&}^R}M Date: Mon, 31 Jan 2022 07:14:07 -0300 Subject: [PATCH 3/3] Use HTTPS for the HMDB51 links (#5312) Co-authored-by: Nicolas Hug --- torchvision/datasets/hmdb51.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/hmdb51.py b/torchvision/datasets/hmdb51.py index 19c00866191..5bfb604c916 100644 --- a/torchvision/datasets/hmdb51.py +++ b/torchvision/datasets/hmdb51.py @@ -11,7 +11,7 @@ class HMDB51(VisionDataset): """ - `HMDB51 `_ + `HMDB51 `_ dataset. HMDB51 is an action recognition video dataset. @@ -47,9 +47,9 @@ class HMDB51(VisionDataset): - label (int): class of the video clip """ - data_url = "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar" + data_url = "https://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar" splits = { - "url": "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar", + "url": "https://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar", "md5": "15e67781e70dcfbdce2d7dbb9b3344b5", } TRAIN_TAG = 1