Merge pull request #1163 from iamgroot42/torch/sparse_l1

[MRG] Sparse L1 Descent for PyTorch
cleverhans-lab · Mar 18, 2021 · 43af686 · 43af686
2 parents 42e8045 + 076eb95
commit 43af686
Show file tree

Hide file tree

Showing 3 changed files with 396 additions and 0 deletions.
diff --git a/cleverhans/torch/attacks/sparse_l1_descent.py b/cleverhans/torch/attacks/sparse_l1_descent.py
@@ -0,0 +1,178 @@
+"""The SparseL1Descent attack."""
+import numpy as np
+import torch
+
+from cleverhans.torch.utils import zero_out_clipped_grads
+
+
+def sparse_l1_descent(
+    model_fn,
+    x,
+    eps=10.0,
+    eps_iter=1.0,
+    nb_iter=20,
+    y=None,
+    targeted=False,
+    clip_min=None,
+    clip_max=None,
+    rand_init=False,
+    clip_grad=False,
+    grad_sparsity=99,
+    sanity_checks=True,
+):
+    """
+    This class implements a variant of Projected Gradient Descent for the l1-norm
+    (Tramer and Boneh 2019). The l1-norm case is more tricky than the l-inf and l2
+    cases covered by the ProjectedGradientDescent class, because the steepest
+    descent direction for the l1-norm is too sparse (it updates a single
+    coordinate in the adversarial perturbation in each step). This attack has an
+    additional parameter that controls the sparsity of the update step. For
+    moderately sparse update steps, the attack vastly outperforms Projected
+    Steepest Descent and is competitive with other attacks targeted at the l1-norm
+    such as the ElasticNetMethod attack (which is much more computationally
+    expensive).
+    Paper link (Tramer and Boneh 2019): https://arxiv.org/pdf/1904.13000.pdf
+
+    :param model_fn: a callable that takes an input tensor and returns the model logits.
+    :param x: input tensor.
+    :param eps: (optional float) maximum distortion of adversarial example
+        compared to original input
+    :param eps_iter: (optional float) step size for each attack iteration
+    :param nb_iter: (optional int) Number of attack iterations.
+    :param y: (optional) A tensor with the true labels.
+    :param targeted: (optional) bool. Is the attack targeted or untargeted?
+        Untargeted, the default, will try to make the label incorrect.
+        Targeted will instead try to move in the direction of being more like y.
+    :param clip_min: (optional float) Minimum input component value
+    :param clip_max: (optional float) Maximum input component value
+    :param clip_grad: (optional bool) Ignore gradient components
+        at positions where the input is already at the boundary
+        of the domain, and the update step will get clipped out.
+    :param grad_sparsity (optional) Relative sparsity of the gradient update
+        step, in percent. Only gradient values larger
+        than this percentile are retained. This parameter can
+        be a scalar, or a tensor of the same length as the
+        input batch dimension.
+    :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime /
+        memory or for unit tests that intentionally pass strange input)
+    :return: a tensor for the adversarial examples
+    """
+    if clip_grad and (clip_min is None or clip_max is None):
+        raise ValueError("Must set clip_min and clip_max if clip_grad is set")
+
+    # The grad_sparsity argument governs the sparsity of the gradient
+    # update. It indicates the percentile value above which gradient entries
+    # are retained. It can be specified as a scalar or as a 1-dimensional
+    # tensor of the same size as the input's batch dimension.
+    if isinstance(grad_sparsity, int) or isinstance(grad_sparsity, float):
+        if not 0 < grad_sparsity < 100:
+            raise ValueError("grad_sparsity should be in (0, 100)")
+    else:
+        grad_sparsity = torch.tensor(grad_sparsity)
+        if len(grad_sparsity.shape) > 1:
+            raise ValueError("grad_sparsity should either be a scalar or a tensor")
+        grad_sparsity = grad_sparsity.to(x.device)
+        if grad_sparsity.shape[0] != x.shape[0]:
+            raise ValueError(
+                "grad_sparsity should have same length as input if it is a tensor"
+            )
+
+    asserts = []
+
+    # eps_iter should be at most eps
+    asserts.append(eps_iter <= eps)
+
+    # If a data range was specified, check that the input was in that range
+    if clip_min is not None:
+        assert_ge = torch.all(
+            torch.ge(x, torch.tensor(clip_min, device=x.device, dtype=x.dtype))
+        )
+        asserts.append(assert_ge)
+
+    if clip_max is not None:
+        assert_le = torch.all(
+            torch.le(x, torch.tensor(clip_max, device=x.device, dtype=x.dtype))
+        )
+        asserts.append(assert_le)
+
+    if sanity_checks:
+        assert np.all(asserts)
+
+    # Initialize loop variables
+    if rand_init:
+        dist = torch.distributions.laplace.Laplace(
+            torch.tensor([1.0]), torch.tensor([1.0])
+        )
+        dim = torch.prod(torch.tensor(x.shape[1:]))
+        eta = dist.sample([x.shape[0], dim]).squeeze(-1).to(x.device)
+        norm = torch.sum(torch.abs(eta), axis=-1, keepdim=True)
+        w = torch.pow(
+            torch.rand(x.shape[0], 1, device=x.device), torch.tensor(1.0 / dim)
+        )
+        eta = torch.reshape(eps * (w * eta / norm), x.shape)
+    else:
+        eta = torch.zeros_like(x)
+
+    # Clip eta
+    adv_x = x.clone().detach().requires_grad_(True)
+    eta = eta.renorm(p=1, dim=0, maxnorm=eps)
+    adv_x = adv_x + eta
+
+    if clip_min is not None or clip_max is not None:
+        adv_x = torch.clamp(x, clip_min, clip_max)
+
+    if y is None:
+        y = torch.argmax(model_fn(x), 1)
+
+    criterion = torch.nn.CrossEntropyLoss(reduction="none")
+    for i in range(nb_iter):
+        adv_x = adv_x.clone().detach().to(torch.float).requires_grad_(True)
+        logits = model_fn(adv_x)
+
+        # Compute loss
+        loss = criterion(logits, y)
+        if targeted:
+            loss = -loss
+
+        # Define gradient of loss wrt input
+        (grad,) = torch.autograd.grad(loss.mean(), [adv_x])
+
+        if clip_grad:
+            grad = zero_out_clipped_grads(grad, adv_x, clip_min, clip_max)
+
+        grad_view = grad.view(grad.shape[0], -1)
+        abs_grad = torch.abs(grad_view)
+
+        if isinstance(grad_sparsity, int) or isinstance(grad_sparsity, float):
+            k = int(grad_sparsity / 100.0 * abs_grad.shape[1])
+            percentile_value, _ = torch.kthvalue(abs_grad, k, keepdim=True)
+        else:
+            k = (grad_sparsity / 100.0 * abs_grad.shape[1]).long()
+            percentile_value, _ = torch.sort(abs_grad, dim=1)
+            percentile_value = percentile_value.gather(1, k.view(-1, 1))
+
+        percentile_value = percentile_value.repeat(1, grad_view.shape[1])
+        tied_for_max = torch.ge(abs_grad, percentile_value).int().float()
+        num_ties = torch.sum(tied_for_max, dim=1, keepdim=True)
+
+        optimal_perturbation = (torch.sign(grad_view) * tied_for_max) / num_ties
+        optimal_perturbation = optimal_perturbation.view(grad.shape)
+
+        # Add perturbation to original example to obtain adversarial example
+        adv_x = adv_x + optimal_perturbation * eps_iter
+
+        # If clipping is needed, reset all values outside of [clip_min, clip_max]
+        if clip_min is not None or clip_max is not None:
+            adv_x = torch.clamp(adv_x, clip_min, clip_max)
+
+        # Clipping perturbation eta to the l1-ball
+        eta = adv_x - x
+        eta = eta.renorm(p=1, dim=0, maxnorm=eps)
+        adv_x = x + eta
+
+        # Redo the clipping.
+        # Subtracting and re-adding eta can add some small numerical error.
+        if clip_min is not None or clip_max is not None:
+            adv_x = torch.clamp(adv_x, clip_min, clip_max)
+
+    return adv_x.detach()
diff --git a/cleverhans/torch/tests/test_attacks.py b/cleverhans/torch/tests/test_attacks.py
@@ -16,6 +16,7 @@
 from cleverhans.torch.attacks.carlini_wagner_l2 import carlini_wagner_l2
 from cleverhans.torch.attacks.spsa import spsa
 from cleverhans.torch.attacks.hop_skip_jump_attack import hop_skip_jump_attack
+from cleverhans.torch.attacks.sparse_l1_descent import sparse_l1_descent
 
 
 class TrivialModel(torch.nn.Module):
@@ -40,6 +41,20 @@ def forward(self, x):
         return x
 
 
+class DummyModel(torch.nn.Module):
+    def __init__(self, n_features):
+        super(DummyModel, self).__init__()
+        self.model = torch.nn.Sequential(
+            torch.nn.Linear(n_features, 60),
+            torch.nn.ReLU(),
+            torch.nn.Linear(60, 10),
+        )
+
+    def forward(self, x):
+        x = x.view(x.shape[0], -1)
+        return self.model(x)
+
+
 class CommonAttackProperties(CleverHansTest):
     def setUp(self):
         super(CommonAttackProperties, self).setUp()
@@ -848,3 +863,185 @@ def test_generate_targeted_l2(self):
         adv_acc = new_labs.eq(y_target).sum().to(torch.float) / y_target.size(0)
 
         self.assertGreater(adv_acc, 0.9)
+
+
+class TestSparseL1Descent(CommonAttackProperties):
+    def setUp(self):
+        super(TestSparseL1Descent, self).setUp()
+        self.attack = sparse_l1_descent
+
+    def generate_adversarial_examples(self, **kwargs):
+        x_adv = self.attack(model_fn=self.model, x=self.normalized_x, **kwargs)
+        _, ori_label = self.model(self.normalized_x).max(1)
+        _, adv_label = self.model(x_adv).max(1)
+        adv_acc = adv_label.eq(ori_label).sum().to(
+            torch.float
+        ) / self.normalized_x.size(0)
+
+        delta = torch.sum(torch.abs(x_adv - self.normalized_x), dim=1)
+        return x_adv, delta, adv_acc
+
+    def generate_targeted_adversarial_examples(self, **kwargs):
+        y_target = torch.randint(low=0, high=2, size=(self.normalized_x.size(0),))
+        x_adv = self.attack(
+            model_fn=self.model,
+            x=self.normalized_x,
+            y=y_target,
+            targeted=True,
+            **kwargs
+        )
+
+        _, adv_label = self.model(x_adv).max(1)
+        adv_success = adv_label.eq(y_target).sum().to(
+            torch.float
+        ) / self.normalized_x.size(0)
+
+        delta = torch.sum(torch.abs(x_adv - self.normalized_x), dim=1)
+        return x_adv, delta, adv_success
+
+    def test_invalid_input(self):
+        x_val = -torch.ones((2, 2))
+        with self.assertRaises(AssertionError):
+            self.attack(self.model, x_val, eps=10.0, clip_min=0.0, clip_max=1.0)
+
+    def test_gives_adversarial_example(self):
+        _, delta, adv_acc = self.generate_adversarial_examples(
+            eps=2, clip_min=-5, clip_max=5
+        )
+        self.assertLess(adv_acc, 0.5)
+        self.assertLess(torch.max(torch.abs(delta - 2)), 1e-3)
+
+    def test_targeted_gives_adversarial_example(self):
+        _, delta, adv_acc = self.generate_targeted_adversarial_examples(
+            eps=10, clip_min=-5, clip_max=5
+        )
+        self.assertGreater(adv_acc, 0.7)
+        self.assertLessEqual(torch.max(delta), 10.001)
+
+    def test_can_be_called_with_different_eps(self):
+        for eps in [10, 20, 30, 40]:
+            _, delta, _ = self.generate_adversarial_examples(
+                eps=eps, clip_min=-5, clip_max=5
+            )
+            self.assertLessEqual(torch.max(delta), eps + 1e-4)
+
+    def test_clip_works_as_expected(self):
+        x_adv, _, _ = self.generate_adversarial_examples(
+            eps=10,
+            nb_iter=20,
+            rand_init=True,
+            clip_min=-0.2,
+            clip_max=0.1,
+            sanity_checks=False,
+        )
+
+        self.assertClose(torch.min(x_adv), -0.2)
+        self.assertClose(torch.max(x_adv), 0.1)
+
+    def test_do_not_reach_lp_boundary(self):
+        """
+        Make sure that iterative attack don't reach boundary of Lp
+        neighbourhood if nb_iter * eps_iter is relatively small compared to
+        epsilon.
+        """
+        _, delta, _ = self.generate_adversarial_examples(
+            eps=0.5, clip_min=-5, clip_max=5, nb_iter=10, eps_iter=0.01
+        )
+        self.assertTrue(torch.max(0.5 - delta) > 0.25)
+
+    def test_generate_np_gives_clipped_adversarial_examples(self):
+        x_adv, _, _ = self.generate_adversarial_examples(
+            eps=1.0,
+            eps_iter=0.1,
+            nb_iter=5,
+            clip_min=-0.2,
+            clip_max=0.3,
+            sanity_checks=False,
+        )
+
+        self.assertLess(-0.201, torch.min(x_adv))
+        self.assertLess(torch.max(x_adv), 0.301)
+
+    def test_clip_eta(self):
+        _, delta, _ = self.generate_adversarial_examples(
+            eps=1, clip_min=-5, clip_max=5, nb_iter=5, eps_iter=0.1
+        )
+
+        # this projection is less numerically stable so give it some slack
+        self.assertLessEqual(torch.max(delta), 1.0 + 1e-6)
+
+    def test_attack_strength(self):
+        # sanity checks turned off because this test initializes outside
+        # the valid range.
+        _, _, adv_acc = self.generate_adversarial_examples(
+            eps=10,
+            rand_init=True,
+            clip_min=0.5,
+            clip_max=0.7,
+            nb_iter=10,
+            sanity_checks=False,
+        )
+
+        self.assertLess(adv_acc, 0.4)
+
+    def test_grad_clip(self):
+        """
+        With clipped gradients, we achieve
+        np.mean(orig_labels == new_labels) == 0.0
+        """
+
+        # sanity checks turned off because this test initializes outside
+        # the valid range.
+        _, _, adv_acc = self.generate_adversarial_examples(
+            eps=10,
+            rand_init=True,
+            clip_grad=True,
+            clip_min=0.5,
+            clip_max=0.7,
+            nb_iter=10,
+            sanity_checks=False,
+        )
+        self.assertLess(adv_acc, 0.1)
+
+    def test_sparsity(self):
+        # use a model with larger input dimensionality for this test.
+        model_fn = DummyModel(1000)
+        x_val = torch.rand(100, 1000)
+
+        for q in [1, 9, 25.8, 50, 75.4, 90.2, 99, 99.9]:
+            x_adv = self.attack(
+                model_fn,
+                x_val,
+                eps=5.0,
+                grad_sparsity=q,
+                nb_iter=1,
+                sanity_checks=False,
+            )
+
+            numzero = torch.sum(x_adv - x_val == 0, dim=-1).float()
+            self.assertAlmostEqual(q * 1000.0 / 100.0, torch.mean(numzero), delta=1)
+
+    def test_grad_sparsity_checks(self):
+        # test that the attacks allows `grad_sparsity` to be specified as a scalar
+        # in (0, 100) or as a vector.
+
+        # scalar values out of range
+        with self.assertRaises(ValueError):
+            self.generate_adversarial_examples(grad_sparsity=0)
+
+        with self.assertRaises(ValueError):
+            self.generate_adversarial_examples(grad_sparsity=100)
+
+        # sparsity as 2D array should fail
+        with self.assertRaises(ValueError):
+            gs = torch.empty(100, 2).uniform_(90, 99)
+            self.generate_adversarial_examples(sanity_checks=False, grad_sparsity=gs)
+
+        # sparsity as 1D array should succeed
+        gs = torch.empty(100).uniform_(90, 99)
+        self.generate_adversarial_examples(sanity_checks=False, grad_sparsity=gs)
+
+        # sparsity vector of wrong size should fail
+        with self.assertRaises(ValueError) as context:
+            gs = torch.empty(101).uniform_(90, 99)
+            self.generate_adversarial_examples(sanity_checks=False, grad_sparsity=gs)