Skip to content

Commit

Permalink
Merge pull request #1163 from iamgroot42/torch/sparse_l1
Browse files Browse the repository at this point in the history
[MRG] Sparse L1 Descent for PyTorch
  • Loading branch information
nick-jia authored Mar 18, 2021
2 parents 42e8045 + 076eb95 commit 43af686
Show file tree
Hide file tree
Showing 3 changed files with 396 additions and 0 deletions.
178 changes: 178 additions & 0 deletions cleverhans/torch/attacks/sparse_l1_descent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
"""The SparseL1Descent attack."""
import numpy as np
import torch

from cleverhans.torch.utils import zero_out_clipped_grads


def sparse_l1_descent(
model_fn,
x,
eps=10.0,
eps_iter=1.0,
nb_iter=20,
y=None,
targeted=False,
clip_min=None,
clip_max=None,
rand_init=False,
clip_grad=False,
grad_sparsity=99,
sanity_checks=True,
):
"""
This class implements a variant of Projected Gradient Descent for the l1-norm
(Tramer and Boneh 2019). The l1-norm case is more tricky than the l-inf and l2
cases covered by the ProjectedGradientDescent class, because the steepest
descent direction for the l1-norm is too sparse (it updates a single
coordinate in the adversarial perturbation in each step). This attack has an
additional parameter that controls the sparsity of the update step. For
moderately sparse update steps, the attack vastly outperforms Projected
Steepest Descent and is competitive with other attacks targeted at the l1-norm
such as the ElasticNetMethod attack (which is much more computationally
expensive).
Paper link (Tramer and Boneh 2019): https://arxiv.org/pdf/1904.13000.pdf
:param model_fn: a callable that takes an input tensor and returns the model logits.
:param x: input tensor.
:param eps: (optional float) maximum distortion of adversarial example
compared to original input
:param eps_iter: (optional float) step size for each attack iteration
:param nb_iter: (optional int) Number of attack iterations.
:param y: (optional) A tensor with the true labels.
:param targeted: (optional) bool. Is the attack targeted or untargeted?
Untargeted, the default, will try to make the label incorrect.
Targeted will instead try to move in the direction of being more like y.
:param clip_min: (optional float) Minimum input component value
:param clip_max: (optional float) Maximum input component value
:param clip_grad: (optional bool) Ignore gradient components
at positions where the input is already at the boundary
of the domain, and the update step will get clipped out.
:param grad_sparsity (optional) Relative sparsity of the gradient update
step, in percent. Only gradient values larger
than this percentile are retained. This parameter can
be a scalar, or a tensor of the same length as the
input batch dimension.
:param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime /
memory or for unit tests that intentionally pass strange input)
:return: a tensor for the adversarial examples
"""
if clip_grad and (clip_min is None or clip_max is None):
raise ValueError("Must set clip_min and clip_max if clip_grad is set")

# The grad_sparsity argument governs the sparsity of the gradient
# update. It indicates the percentile value above which gradient entries
# are retained. It can be specified as a scalar or as a 1-dimensional
# tensor of the same size as the input's batch dimension.
if isinstance(grad_sparsity, int) or isinstance(grad_sparsity, float):
if not 0 < grad_sparsity < 100:
raise ValueError("grad_sparsity should be in (0, 100)")
else:
grad_sparsity = torch.tensor(grad_sparsity)
if len(grad_sparsity.shape) > 1:
raise ValueError("grad_sparsity should either be a scalar or a tensor")
grad_sparsity = grad_sparsity.to(x.device)
if grad_sparsity.shape[0] != x.shape[0]:
raise ValueError(
"grad_sparsity should have same length as input if it is a tensor"
)

asserts = []

# eps_iter should be at most eps
asserts.append(eps_iter <= eps)

# If a data range was specified, check that the input was in that range
if clip_min is not None:
assert_ge = torch.all(
torch.ge(x, torch.tensor(clip_min, device=x.device, dtype=x.dtype))
)
asserts.append(assert_ge)

if clip_max is not None:
assert_le = torch.all(
torch.le(x, torch.tensor(clip_max, device=x.device, dtype=x.dtype))
)
asserts.append(assert_le)

if sanity_checks:
assert np.all(asserts)

# Initialize loop variables
if rand_init:
dist = torch.distributions.laplace.Laplace(
torch.tensor([1.0]), torch.tensor([1.0])
)
dim = torch.prod(torch.tensor(x.shape[1:]))
eta = dist.sample([x.shape[0], dim]).squeeze(-1).to(x.device)
norm = torch.sum(torch.abs(eta), axis=-1, keepdim=True)
w = torch.pow(
torch.rand(x.shape[0], 1, device=x.device), torch.tensor(1.0 / dim)
)
eta = torch.reshape(eps * (w * eta / norm), x.shape)
else:
eta = torch.zeros_like(x)

# Clip eta
adv_x = x.clone().detach().requires_grad_(True)
eta = eta.renorm(p=1, dim=0, maxnorm=eps)
adv_x = adv_x + eta

if clip_min is not None or clip_max is not None:
adv_x = torch.clamp(x, clip_min, clip_max)

if y is None:
y = torch.argmax(model_fn(x), 1)

criterion = torch.nn.CrossEntropyLoss(reduction="none")
for i in range(nb_iter):
adv_x = adv_x.clone().detach().to(torch.float).requires_grad_(True)
logits = model_fn(adv_x)

# Compute loss
loss = criterion(logits, y)
if targeted:
loss = -loss

# Define gradient of loss wrt input
(grad,) = torch.autograd.grad(loss.mean(), [adv_x])

if clip_grad:
grad = zero_out_clipped_grads(grad, adv_x, clip_min, clip_max)

grad_view = grad.view(grad.shape[0], -1)
abs_grad = torch.abs(grad_view)

if isinstance(grad_sparsity, int) or isinstance(grad_sparsity, float):
k = int(grad_sparsity / 100.0 * abs_grad.shape[1])
percentile_value, _ = torch.kthvalue(abs_grad, k, keepdim=True)
else:
k = (grad_sparsity / 100.0 * abs_grad.shape[1]).long()
percentile_value, _ = torch.sort(abs_grad, dim=1)
percentile_value = percentile_value.gather(1, k.view(-1, 1))

percentile_value = percentile_value.repeat(1, grad_view.shape[1])
tied_for_max = torch.ge(abs_grad, percentile_value).int().float()
num_ties = torch.sum(tied_for_max, dim=1, keepdim=True)

optimal_perturbation = (torch.sign(grad_view) * tied_for_max) / num_ties
optimal_perturbation = optimal_perturbation.view(grad.shape)

# Add perturbation to original example to obtain adversarial example
adv_x = adv_x + optimal_perturbation * eps_iter

# If clipping is needed, reset all values outside of [clip_min, clip_max]
if clip_min is not None or clip_max is not None:
adv_x = torch.clamp(adv_x, clip_min, clip_max)

# Clipping perturbation eta to the l1-ball
eta = adv_x - x
eta = eta.renorm(p=1, dim=0, maxnorm=eps)
adv_x = x + eta

# Redo the clipping.
# Subtracting and re-adding eta can add some small numerical error.
if clip_min is not None or clip_max is not None:
adv_x = torch.clamp(adv_x, clip_min, clip_max)

return adv_x.detach()
197 changes: 197 additions & 0 deletions cleverhans/torch/tests/test_attacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from cleverhans.torch.attacks.carlini_wagner_l2 import carlini_wagner_l2
from cleverhans.torch.attacks.spsa import spsa
from cleverhans.torch.attacks.hop_skip_jump_attack import hop_skip_jump_attack
from cleverhans.torch.attacks.sparse_l1_descent import sparse_l1_descent


class TrivialModel(torch.nn.Module):
Expand All @@ -40,6 +41,20 @@ def forward(self, x):
return x


class DummyModel(torch.nn.Module):
def __init__(self, n_features):
super(DummyModel, self).__init__()
self.model = torch.nn.Sequential(
torch.nn.Linear(n_features, 60),
torch.nn.ReLU(),
torch.nn.Linear(60, 10),
)

def forward(self, x):
x = x.view(x.shape[0], -1)
return self.model(x)


class CommonAttackProperties(CleverHansTest):
def setUp(self):
super(CommonAttackProperties, self).setUp()
Expand Down Expand Up @@ -848,3 +863,185 @@ def test_generate_targeted_l2(self):
adv_acc = new_labs.eq(y_target).sum().to(torch.float) / y_target.size(0)

self.assertGreater(adv_acc, 0.9)


class TestSparseL1Descent(CommonAttackProperties):
def setUp(self):
super(TestSparseL1Descent, self).setUp()
self.attack = sparse_l1_descent

def generate_adversarial_examples(self, **kwargs):
x_adv = self.attack(model_fn=self.model, x=self.normalized_x, **kwargs)
_, ori_label = self.model(self.normalized_x).max(1)
_, adv_label = self.model(x_adv).max(1)
adv_acc = adv_label.eq(ori_label).sum().to(
torch.float
) / self.normalized_x.size(0)

delta = torch.sum(torch.abs(x_adv - self.normalized_x), dim=1)
return x_adv, delta, adv_acc

def generate_targeted_adversarial_examples(self, **kwargs):
y_target = torch.randint(low=0, high=2, size=(self.normalized_x.size(0),))
x_adv = self.attack(
model_fn=self.model,
x=self.normalized_x,
y=y_target,
targeted=True,
**kwargs
)

_, adv_label = self.model(x_adv).max(1)
adv_success = adv_label.eq(y_target).sum().to(
torch.float
) / self.normalized_x.size(0)

delta = torch.sum(torch.abs(x_adv - self.normalized_x), dim=1)
return x_adv, delta, adv_success

def test_invalid_input(self):
x_val = -torch.ones((2, 2))
with self.assertRaises(AssertionError):
self.attack(self.model, x_val, eps=10.0, clip_min=0.0, clip_max=1.0)

def test_gives_adversarial_example(self):
_, delta, adv_acc = self.generate_adversarial_examples(
eps=2, clip_min=-5, clip_max=5
)
self.assertLess(adv_acc, 0.5)
self.assertLess(torch.max(torch.abs(delta - 2)), 1e-3)

def test_targeted_gives_adversarial_example(self):
_, delta, adv_acc = self.generate_targeted_adversarial_examples(
eps=10, clip_min=-5, clip_max=5
)
self.assertGreater(adv_acc, 0.7)
self.assertLessEqual(torch.max(delta), 10.001)

def test_can_be_called_with_different_eps(self):
for eps in [10, 20, 30, 40]:
_, delta, _ = self.generate_adversarial_examples(
eps=eps, clip_min=-5, clip_max=5
)
self.assertLessEqual(torch.max(delta), eps + 1e-4)

def test_clip_works_as_expected(self):
x_adv, _, _ = self.generate_adversarial_examples(
eps=10,
nb_iter=20,
rand_init=True,
clip_min=-0.2,
clip_max=0.1,
sanity_checks=False,
)

self.assertClose(torch.min(x_adv), -0.2)
self.assertClose(torch.max(x_adv), 0.1)

def test_do_not_reach_lp_boundary(self):
"""
Make sure that iterative attack don't reach boundary of Lp
neighbourhood if nb_iter * eps_iter is relatively small compared to
epsilon.
"""
_, delta, _ = self.generate_adversarial_examples(
eps=0.5, clip_min=-5, clip_max=5, nb_iter=10, eps_iter=0.01
)
self.assertTrue(torch.max(0.5 - delta) > 0.25)

def test_generate_np_gives_clipped_adversarial_examples(self):
x_adv, _, _ = self.generate_adversarial_examples(
eps=1.0,
eps_iter=0.1,
nb_iter=5,
clip_min=-0.2,
clip_max=0.3,
sanity_checks=False,
)

self.assertLess(-0.201, torch.min(x_adv))
self.assertLess(torch.max(x_adv), 0.301)

def test_clip_eta(self):
_, delta, _ = self.generate_adversarial_examples(
eps=1, clip_min=-5, clip_max=5, nb_iter=5, eps_iter=0.1
)

# this projection is less numerically stable so give it some slack
self.assertLessEqual(torch.max(delta), 1.0 + 1e-6)

def test_attack_strength(self):
# sanity checks turned off because this test initializes outside
# the valid range.
_, _, adv_acc = self.generate_adversarial_examples(
eps=10,
rand_init=True,
clip_min=0.5,
clip_max=0.7,
nb_iter=10,
sanity_checks=False,
)

self.assertLess(adv_acc, 0.4)

def test_grad_clip(self):
"""
With clipped gradients, we achieve
np.mean(orig_labels == new_labels) == 0.0
"""

# sanity checks turned off because this test initializes outside
# the valid range.
_, _, adv_acc = self.generate_adversarial_examples(
eps=10,
rand_init=True,
clip_grad=True,
clip_min=0.5,
clip_max=0.7,
nb_iter=10,
sanity_checks=False,
)
self.assertLess(adv_acc, 0.1)

def test_sparsity(self):
# use a model with larger input dimensionality for this test.
model_fn = DummyModel(1000)
x_val = torch.rand(100, 1000)

for q in [1, 9, 25.8, 50, 75.4, 90.2, 99, 99.9]:
x_adv = self.attack(
model_fn,
x_val,
eps=5.0,
grad_sparsity=q,
nb_iter=1,
sanity_checks=False,
)

numzero = torch.sum(x_adv - x_val == 0, dim=-1).float()
self.assertAlmostEqual(q * 1000.0 / 100.0, torch.mean(numzero), delta=1)

def test_grad_sparsity_checks(self):
# test that the attacks allows `grad_sparsity` to be specified as a scalar
# in (0, 100) or as a vector.

# scalar values out of range
with self.assertRaises(ValueError):
self.generate_adversarial_examples(grad_sparsity=0)

with self.assertRaises(ValueError):
self.generate_adversarial_examples(grad_sparsity=100)

# sparsity as 2D array should fail
with self.assertRaises(ValueError):
gs = torch.empty(100, 2).uniform_(90, 99)
self.generate_adversarial_examples(sanity_checks=False, grad_sparsity=gs)

# sparsity as 1D array should succeed
gs = torch.empty(100).uniform_(90, 99)
self.generate_adversarial_examples(sanity_checks=False, grad_sparsity=gs)

# sparsity vector of wrong size should fail
with self.assertRaises(ValueError) as context:
gs = torch.empty(101).uniform_(90, 99)
self.generate_adversarial_examples(sanity_checks=False, grad_sparsity=gs)
Loading

0 comments on commit 43af686

Please sign in to comment.