Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detection Transformer Estimator #2192

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
cb610a8
ViT backbone object detector with fasterRCNN example
kieranfraser Mar 2, 2023
dc2987a
ViT backbone object detector with fasterRCNN example
kieranfraser Mar 2, 2023
36e5e6e
training pipeline for fasterrcnn with vit backbone working. requires …
kieranfraser Mar 6, 2023
cc4894e
Adding pytorch detr. Working example demonstrating object detection a…
kieranfraser Mar 15, 2023
2c5bf42
DETR with original source methods attributed
kieranfraser Mar 20, 2023
4b97069
DETR with changes to original src
kieranfraser Mar 20, 2023
fd09793
Removed unused misc files. Updated example notebook demonstrating ViT…
kieranfraser Mar 20, 2023
5fa5001
Completed tests. Added method to freeze multihead-attention module. U…
kieranfraser Apr 13, 2023
f943d67
Adding constructor for detr
kieranfraser Apr 13, 2023
61a814e
Moved notebook to correct folder for adversarial patch attack. Update…
kieranfraser Apr 13, 2023
1627d37
Updated formatting
kieranfraser Apr 19, 2023
b9d7fc7
Refactored loss classes to prevent tests for other frameworks failing
kieranfraser Apr 19, 2023
bedcc31
Refactored loss classes to prevent tests for other frameworks failing
kieranfraser Apr 19, 2023
c639bbd
Fix for static methods and styling
kieranfraser Apr 20, 2023
317aada
Framework check for detr tests
kieranfraser Apr 20, 2023
b48dff0
Updated class name, added typing and other minor fixes.
kieranfraser May 11, 2023
46f3958
Updated class name, added typing and other minor fixes.
kieranfraser May 11, 2023
8f62b12
Added test call to github workflow
kieranfraser May 11, 2023
478d9a7
fix Tensor Device Inconsistencies in pgd
May 5, 2023
64db977
Updates to DETR: cleaned up resizing; correct clipping. Updates to no…
kieranfraser Jun 13, 2023
8e4c89d
Fixing formatting
kieranfraser Jun 14, 2023
8248092
updated detection transformer notebook
kieranfraser Jun 14, 2023
a1757e0
Remove irrelevant PGD
kieranfraser Jun 14, 2023
cacc829
Merge remote-tracking branch 'upstream/dev_1.15.0' into dev_detection…
kieranfraser Jun 14, 2023
ef88ed2
Fixed pylint, mypy issues
kieranfraser Jun 14, 2023
a51b614
Remove print line
kieranfraser Jun 14, 2023
0ab98d0
Adding Apache License to original DETR functions
kieranfraser Jun 15, 2023
7a96e2c
Updated notebook with stronger adversarial patch attacks - targeted a…
kieranfraser Jun 15, 2023
0d15d2f
Removing comments to fix pylint test
kieranfraser Jun 15, 2023
40070ea
Adding missing license to functions
kieranfraser Jun 15, 2023
df3e298
Merge branch 'dev_1.15.0' into dev_detection_transformer
beat-buesser Jun 27, 2023
3e250a1
Standalone detr.py file for utility code from FB repo
kieranfraser Jun 28, 2023
496fcd3
Merge remote-tracking branch 'origin/dev_detection_transformer' into …
kieranfraser Jun 28, 2023
482b277
Removing duplicate license reference
kieranfraser Jun 28, 2023
d6ed99b
Updated reference to adapted detr functions under Apache 2.0
kieranfraser Jun 28, 2023
35f1d5a
Updated detr.py docstring with list of changes to Apache 2.0 code
kieranfraser Jun 28, 2023
3a97e66
Updated device in pytorch_detection_transformer.py and detr.py. Updat…
kieranfraser Jun 28, 2023
84c9e2b
mypy fix - .to should not be called if np.array
kieranfraser Jun 28, 2023
81408e5
Fix for black formatting
kieranfraser Jun 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Updates to DETR: cleaned up resizing; correct clipping. Updates to no…
…tebook: validated attack with PGD. Updates to test: correct loss computation and PGD.

Signed-off-by: Kieran Fraser <Kieran.Fraser@ibm.com>
  • Loading branch information
kieranfraser committed Jun 14, 2023
commit 64db9774595b27e454e814d26e9667ab9ea4a87c
142 changes: 91 additions & 51 deletions art/estimators/object_detection/pytorch_detection_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ def __init__(
between 0 and H and 0 and W
- labels (Tensor[N]): the predicted labels for each image
- scores (Tensor[N]): the scores or each prediction
:param input_shape: Tuple of the form `(height, width)` of ints representing input image height and width
:param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
maximum values allowed for features. If floats are provided, these will be used as the range of all
features. If arrays are provided, each value will be considered the bound for a feature, thus
Expand Down Expand Up @@ -577,43 +578,10 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[s
- labels [N]: the labels for each image
- scores [N]: the scores or each prediction.
"""
import cv2
import torch

# check if image with min, max dimensions, if not scale to 1000
# if is within min, max dims, but not square, resize to max of image
if (
self._input_shape[1] < self.MIN_IMAGE_SIZE
or self._input_shape[1] > self.MAX_IMAGE_SIZE
or self._input_shape[2] < self.MIN_IMAGE_SIZE
or self.input_shape[2] > self.MAX_IMAGE_SIZE
):
resized_imgs = []
for i, _ in enumerate(x):
resized_imgs.append(
cv2.resize(
(x * 255)[i].transpose(1, 2, 0).astype(np.uint8),
dsize=(1000, 1000),
interpolation=cv2.INTER_CUBIC,
)
)
x = (np.array(resized_imgs) / 255).transpose(0, 3, 1, 2).astype(np.float32)
elif self._input_shape[1] != self._input_shape[2]:
rescale_dim = max(self._input_shape[1], self._input_shape[2])
resized_imgs = []
for i, _ in enumerate(x):
resized_imgs.append(
cv2.resize(
(x * 255)[i].transpose(1, 2, 0).astype(np.uint8),
dsize=(rescale_dim, rescale_dim),
interpolation=cv2.INTER_CUBIC,
)
)
x = (np.array(resized_imgs) / 255).transpose(0, 3, 1, 2).astype(np.float32)

x = x.copy()

self._model.eval()
x, _ = self._apply_resizing(x, None)

# Apply preprocessing
x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)
Expand All @@ -633,7 +601,7 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[s
predictions.append(
{
"boxes": rescale_bboxes(
model_output["pred_boxes"][i, :, :], (self._input_shape[1], self._input_shape[2])
model_output["pred_boxes"][i, :, :], (self._input_shape[2], self._input_shape[1])
)
.detach()
.numpy(),
Expand Down Expand Up @@ -765,22 +733,8 @@ def loss_gradient(
- labels (Tensor[N]): the predicted labels for each image
:return: Loss gradients of the same shape as `x`.
"""
import torch

_y = []
for target in y:
cxcy_norm = revert_rescale_bboxes(
torch.from_numpy(target["boxes"]), (self.input_shape[1], self.input_shape[2])
)
_y.append(
{
"labels": torch.from_numpy(target["labels"]).type(torch.int64).to(self.device),
"boxes": cxcy_norm.to(self.device),
"scores": torch.from_numpy(target["scores"]).type(torch.float).to(self.device),
}
)

output, inputs_t, image_tensor_list_grad = self._get_losses(x=x, y=_y)
x, y = self._apply_resizing(x, y)
output, inputs_t, image_tensor_list_grad = self._get_losses(x=x, y=y)
loss = sum(output[k] * self.weight_dict[k] for k in output.keys() if k in self.weight_dict)

self._model.zero_grad()
Expand Down Expand Up @@ -833,6 +787,7 @@ def compute_losses(
- scores (Tensor[N]): the scores or each prediction.
:return: Dictionary of loss components.
"""
x, y = self._apply_resizing(x, y)
output_tensor, _, _ = self._get_losses(x=x, y=y)
output = {}
for key, value in output_tensor.items():
Expand All @@ -859,6 +814,7 @@ def compute_loss( # type: ignore
"""
import torch

x, y = self._apply_resizing(x, y)
output, _, _ = self._get_losses(x=x, y=y)

# Compute the gradient and return
Expand All @@ -876,6 +832,90 @@ def compute_loss( # type: ignore

return loss.detach().cpu().numpy()

def _apply_resizing(self, x: Union[np.ndarray, "torch.Tensor"],
y: List[Dict[str, Union[np.ndarray, "torch.Tensor"]]],
height: int = 800,
width: int = 800):
"""
Resize the input and targets to dimensions expected by DETR.

:param x: Array or Tensor representing images of any size
:param y: List of targets to be transformed
:param height: Int representing desired height, the default is compatible with DETR
:param width: Int representing desired width, the default is compatible with DETR
"""
import cv2
import torchvision.transforms as T
import torch

if (
self._input_shape[1] < self.MIN_IMAGE_SIZE
or self._input_shape[1] > self.MAX_IMAGE_SIZE
or self._input_shape[2] < self.MIN_IMAGE_SIZE
or self.input_shape[2] > self.MAX_IMAGE_SIZE
):
resized_imgs = []
if isinstance(x, torch.Tensor):
x = T.Resize(size = (height, width))(x)
else:
for i, _ in enumerate(x):
resized = cv2.resize(
(x)[i].transpose(1, 2, 0),
dsize=(height, width),
interpolation=cv2.INTER_CUBIC,
)
resized = resized.transpose(2, 0, 1)
resized_imgs.append(
resized
)
x = np.array(resized_imgs)

elif self._input_shape[1] != self._input_shape[2]:
rescale_dim = max(self._input_shape[1], self._input_shape[2])
resized_imgs = []
if isinstance(x, torch.Tensor):
x = T.Resize(size = (rescale_dim,rescale_dim))(x)
else:
for i, _ in enumerate(x):
resized = cv2.resize(
(x)[i].transpose(1, 2, 0),
dsize=(rescale_dim, rescale_dim),
interpolation=cv2.INTER_CUBIC,
)
resized = resized.transpose(2, 0, 1)
resized_imgs.append(
resized
)
x = np.array(resized_imgs)

targets = []
if y is not None:
if isinstance(y[0]['boxes'], torch.Tensor):
for target in y:
cxcy_norm = revert_rescale_bboxes(
target["boxes"], (self.input_shape[2], self.input_shape[1])
)
targets.append(
{
"labels": target["labels"].type(torch.int64).to(self.device),
"boxes": cxcy_norm.to(self.device),
"scores": target["scores"].type(torch.float).to(self.device),
}
)
else:
for target in y:
cxcy_norm = revert_rescale_bboxes(
torch.from_numpy(target["boxes"]), (self.input_shape[2], self.input_shape[1])
)
targets.append(
{
"labels": torch.from_numpy(target["labels"]).type(torch.int64).to(self.device),
"boxes": cxcy_norm.to(self.device),
"scores": torch.from_numpy(target["scores"]).type(torch.float).to(self.device),
}
)

return x, targets

class NestedTensor:
kieranfraser marked this conversation as resolved.
Show resolved Hide resolved
"""
Expand Down
483 changes: 202 additions & 281 deletions notebooks/adversarial_patch/attack_adversarial_patch_detr.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -74,28 +74,18 @@ def test_predict(get_pytorch_detr):
assert list(result[0].keys()) == ["boxes", "labels", "scores"]

assert result[0]["boxes"].shape == (100, 4)
expected_detection_boxes = np.asarray([9.0386868e-03, 5.1708374e00, 7.4301929e00, 3.1964935e01])
expected_detection_boxes = np.asarray([-5.9490204e-03, 1.1947733e+01, 3.1993944e+01, 3.1925127e+01])
np.testing.assert_array_almost_equal(result[0]["boxes"][2, :], expected_detection_boxes, decimal=3)

assert result[0]["scores"].shape == (100,)
expected_detection_scores = np.asarray(
[
0.00383973,
0.0167976,
0.01714019,
0.00073999,
0.00467391,
0.02399586,
0.00093301,
0.02143953,
0.00202136,
0.00266351,
]
[0.00679839, 0.0250559 , 0.07205943, 0.01115368, 0.03321039,
0.10407761, 0.00113309, 0.01442852, 0.00527624, 0.01240906]
)
np.testing.assert_array_almost_equal(result[0]["scores"][:10], expected_detection_scores, decimal=6)

assert result[0]["labels"].shape == (100,)
expected_detection_classes = np.asarray([17, 17, 17, 3, 88, 17, 17, 17, 88, 17])
expected_detection_classes = np.asarray([17, 17, 33, 17, 17, 17, 74, 17, 17, 17])
np.testing.assert_array_almost_equal(result[0]["labels"][:10], expected_detection_classes, decimal=6)


Expand All @@ -106,84 +96,31 @@ def test_loss_gradient(get_pytorch_detr):

grads = object_detector.loss_gradient(x=x_test, y=y_test)

assert grads.shape == (2, 3, 32, 32)
assert grads.shape == (2, 3, 800, 800)

expected_gradients1 = np.asarray(
[
0.04711548,
0.25275955,
0.3609573,
-0.02207462,
0.02886475,
0.05820496,
0.04151949,
-0.07008387,
0.24270807,
0.17703517,
-0.29346713,
-0.11548031,
-0.15658003,
-0.1412788,
0.02577158,
-0.00550455,
0.05846804,
-0.04419752,
0.06333683,
-0.15242189,
-0.06642783,
-0.09545745,
-0.01154867,
0.07477856,
0.05444539,
0.01678686,
0.01427085,
0.01382115,
-0.15745601,
-0.13278124,
0.06169066,
-0.03915803,
]
[-0.00061366, 0.00322502, -0.00039866, -0.00807413, -0.00476555,
0.00181204, 0.01007765, 0.00415828, -0.00073114, 0.00018387,
-0.00146992, -0.00119636, -0.00098966, -0.00295517, -0.0024271 ,
-0.00131314, -0.00149217, -0.00104926, -0.00154239, -0.00110989,
0.00092887, 0.00049146, -0.00292508, -0.00124526, 0.00140347,
0.00019833, 0.00191074, -0.00117537, -0.00080604, 0.00057427,
-0.00061728, -0.00206535]
)

np.testing.assert_array_almost_equal(grads[0, 0, 10, :], expected_gradients1, decimal=2)
np.testing.assert_array_almost_equal(grads[0, 0, 10, :32], expected_gradients1, decimal=2)

expected_gradients2 = np.asarray(
[
-0.10913675,
0.00539385,
0.11588555,
0.02486979,
-0.23739402,
-0.01673118,
-0.09709811,
0.00763445,
0.10815062,
-0.3278629,
-0.23222731,
0.28806347,
-0.14222082,
-0.24168995,
-0.20170388,
-0.24570045,
-0.01220985,
-0.18616645,
-0.19678666,
-0.12424485,
-0.36253023,
0.08978511,
-0.02874891,
-0.09320692,
-0.26761073,
-0.34595487,
-0.34932154,
-0.21606845,
-0.07342689,
-0.0573133,
-0.04900078,
0.03462576,
]
[-1.1787530e-03, -2.8500680e-03, 5.0884970e-03, 6.4504531e-04,
-6.8841036e-05, 2.8184296e-03, 3.0257765e-03, 2.8565727e-04,
-1.0701057e-04, 1.2945699e-03, 7.3593057e-04, 1.0177144e-03,
-2.4692707e-03, -1.3801848e-03, 6.3182280e-04, -4.2305476e-04,
4.4307750e-04, 8.5821096e-04, -7.1204413e-04, -3.1404425e-03,
-1.5964351e-03, -1.9222996e-03, -5.3157361e-04, -9.9202688e-04,
-1.5815455e-03, 2.0060266e-04, -2.0584739e-03, 6.6960667e-04,
9.7393827e-04, -1.6040013e-03, -6.9741381e-04, 1.4657658e-04]
)
np.testing.assert_array_almost_equal(grads[1, 0, 10, :], expected_gradients2, decimal=2)
np.testing.assert_array_almost_equal(grads[1, 0, 10, :32], expected_gradients2, decimal=2)


@pytest.mark.only_with_platform("pytorch")
Expand Down Expand Up @@ -251,7 +188,7 @@ def test_preprocessing_defences(get_pytorch_detr):
# Compute gradients
grads = object_detector.loss_gradient(x=x_test, y=y)

assert grads.shape == (2, 3, 32, 32)
assert grads.shape == (2, 3, 800, 800)


@pytest.mark.only_with_platform("pytorch")
Expand Down Expand Up @@ -286,7 +223,7 @@ def test_compute_loss(get_pytorch_detr):
# Compute loss
loss = object_detector.compute_loss(x=x_test, y=y)

assert pytest.approx(63.9855, abs=0.01) == float(loss)
assert pytest.approx(3.9634, abs=0.01) == float(loss)


@pytest.mark.only_with_platform("pytorch")
Expand All @@ -295,6 +232,14 @@ def test_pgd(get_pytorch_detr):
object_detector, x_test, y_test = get_pytorch_detr

from art.attacks.evasion import ProjectedGradientDescent
from PIL import Image

imgs = []
for i in x_test:
img = Image.fromarray((i*255).astype(np.uint8).transpose(1,2,0))
img = img.resize(size=(800, 800))
imgs.append(np.array(img))
x_test = np.array(imgs).transpose(0, 3, 1, 2)

attack = ProjectedGradientDescent(estimator=object_detector, max_iter=2)
x_test_adv = attack.generate(x=x_test, y=y_test)
Expand Down