Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into fix/569-missing-de…
Browse files Browse the repository at this point in the history
…vice-move-ekfac
  • Loading branch information
schroedk committed May 3, 2024
2 parents 36ea3ba + c41fa91 commit dfedbe6
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 47 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
- Fixed missing move of tensors to model device in `EkfacInfluence`
implementation [PR #570](https://github.com/aai-institute/pyDVL/pull/570)

### Added

- Add a device fixture for `pytest`, which depending on the availability and
user input (`pytest --with-cuda`) resolves to cuda device
[PR #574](https://github.com/aai-institute/pyDVL/pull/574)

## 0.9.1 - Bug fixes, logging improvement

### Fixed
Expand Down
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ There are a few important arguments:
- `--slow-tests` enables running slow tests. See below for a description
of slow tests.

- `--with-cuda` sets the device fixture in [tests/influence/torch/conftest.py](
tests/influence/torch/conftest.py) to `cuda` if it is available.
Using this fixture within tests, you can run parts of your tests on a `cuda`
device. Be aware, that you still have to take care of the usage of the device
manually in a specific test. Setting this flag does not result in
running all tests on a GPU.

### Markers

We use a few different markers to differentiate between tests and runs
Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ def pytest_addoption(parser):
default=False,
help="Disable reporting. Verbose mode takes precedence.",
)
parser.addoption(
"--with-cuda",
action="store_true",
default=False,
help="Set device fixture to 'cuda' if available",
)


@pytest.fixture
Expand Down
12 changes: 12 additions & 0 deletions tests/influence/torch/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Tuple

import pytest
import torch
from numpy.typing import NDArray
from torch.optim import LBFGS
Expand Down Expand Up @@ -59,3 +60,14 @@ def closure():
def torch_linear_model_to_numpy(model: torch.nn.Linear) -> Tuple[NDArray, NDArray]:
model.eval()
return model.weight.data.numpy(), model.bias.data.numpy()


@pytest.fixture(scope="session")
def device(request):
import torch

use_cuda = request.config.getoption("--with-cuda")
if use_cuda and torch.cuda.is_available():
return torch.device("cuda")
else:
return torch.device("cpu")
143 changes: 96 additions & 47 deletions tests/influence/torch/test_influence_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def test_influence_linear_model(
rtol,
mode: InfluenceMode,
train_set_size: int,
device: torch.device,
hessian_reg: float = 0.1,
test_set_size: int = 20,
problem_dimension: Tuple[int, int] = (4, 20),
Expand Down Expand Up @@ -373,16 +374,20 @@ def test_influence_linear_model(

train_data_set = TensorDataset(*list(map(torch.from_numpy, train_data)))
train_data_loader = DataLoader(train_data_set, batch_size=40, num_workers=0)
influence = influence_factory(linear_layer, loss, train_data_loader, hessian_reg)
influence = influence_factory(
linear_layer.to(device), loss, train_data_loader, hessian_reg
)

x_train, y_train = tuple(map(torch.from_numpy, train_data))
x_test, y_test = tuple(map(torch.from_numpy, test_data))
influence_values = influence.influences(
x_test, y_test, x_train, y_train, mode=mode
).numpy()
sym_influence_values = influence.influences(
x_train, y_train, x_train, y_train, mode=mode
).numpy()
influence_values = (
influence.influences(x_test, y_test, x_train, y_train, mode=mode).cpu().numpy()
)
sym_influence_values = (
influence.influences(x_train, y_train, x_train, y_train, mode=mode)
.cpu()
.numpy()
)

with pytest.raises(ValueError):
influence.influences(x_test, y_test, x=x_train, mode=mode)
Expand Down Expand Up @@ -431,18 +436,23 @@ def test_influences_lissa(
],
direct_influences,
influence_factory,
device,
):
model, loss, x_train, y_train, x_test, y_test = model_and_data

train_dataloader = DataLoader(
TensorDataset(x_train, y_train), batch_size=test_case.batch_size
)
influence_model = influence_factory(
model, loss, train_dataloader, test_case.hessian_reg
model.to(device), loss, train_dataloader, test_case.hessian_reg
)
approx_influences = (
influence_model.influences(
x_test, y_test, x_train, y_train, mode=test_case.mode
)
.cpu()
.numpy()
)
approx_influences = influence_model.influences(
x_test, y_test, x_train, y_train, mode=test_case.mode
).numpy()

assert not np.any(np.isnan(approx_influences))

Expand Down Expand Up @@ -497,9 +507,10 @@ def test_influences_low_rank(
direct_sym_influences,
direct_factors,
influence_factory,
device: torch.device,
):
atol = 1e-8
rtol = 1e-5
atol = 1e-7
rtol = 1e-4
model, loss, x_train, y_train, x_test, y_test = model_and_data

num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
Expand All @@ -509,7 +520,7 @@ def test_influences_low_rank(
)

influence_func_model = influence_factory(
model,
model.to(device),
loss,
test_case.hessian_reg,
num_parameters - 1,
Expand All @@ -525,33 +536,47 @@ def test_influences_low_rank(

influence_func_model = influence_func_model.fit(train_dataloader)

low_rank_influence = influence_func_model.influences(
x_test, y_test, x_train, y_train, mode=test_case.mode
).numpy()
low_rank_influence = (
influence_func_model.influences(
x_test, y_test, x_train, y_train, mode=test_case.mode
)
.cpu()
.numpy()
)

sym_low_rank_influence = influence_func_model.influences(
x_train, y_train, mode=test_case.mode
).numpy()
sym_low_rank_influence = (
influence_func_model.influences(x_train, y_train, mode=test_case.mode)
.cpu()
.numpy()
)

low_rank_factors = influence_func_model.influence_factors(x_test, y_test)
assert np.allclose(
direct_factors,
influence_func_model.influence_factors(x_train, y_train).numpy(),
influence_func_model.influence_factors(x_train, y_train).cpu().numpy(),
atol=atol,
rtol=rtol,
)

if test_case.mode is InfluenceMode.Up:
low_rank_influence_transpose = influence_func_model.influences(
x_train, y_train, x_test, y_test, mode=test_case.mode
).numpy()
low_rank_influence_transpose = (
influence_func_model.influences(
x_train, y_train, x_test, y_test, mode=test_case.mode
)
.cpu()
.numpy()
)
assert np.allclose(
low_rank_influence_transpose, low_rank_influence.swapaxes(0, 1)
)

low_rank_values_from_factors = influence_func_model.influences_from_factors(
low_rank_factors, x_train, y_train, mode=test_case.mode
).numpy()
low_rank_values_from_factors = (
influence_func_model.influences_from_factors(
low_rank_factors, x_train, y_train, mode=test_case.mode
)
.cpu()
.numpy()
)
assert np.allclose(direct_influences, low_rank_influence, atol=atol, rtol=rtol)
assert np.allclose(
direct_sym_influences, sym_low_rank_influence, atol=atol, rtol=rtol
Expand All @@ -578,6 +603,7 @@ def test_influences_ekfac(
],
direct_influences,
direct_sym_influences,
device: torch.device,
):
model, loss, x_train, y_train, x_test, y_test = model_and_data

Expand All @@ -589,7 +615,7 @@ def test_influences_ekfac(
model,
update_diagonal=True,
hessian_regularization=test_case.hessian_reg,
)
).to(device)

with pytest.raises(NotFittedException):
ekfac_influence.influences(
Expand All @@ -604,32 +630,46 @@ def test_influences_ekfac(
ekfac_influence.fit(train_dataloader)
elif isinstance(loss, nn.CrossEntropyLoss):
ekfac_influence = ekfac_influence.fit(train_dataloader)
ekfac_influence_values = ekfac_influence.influences(
x_test, y_test, x_train, y_train, mode=test_case.mode
).numpy()
ekfac_influence_values = (
ekfac_influence.influences(
x_test, y_test, x_train, y_train, mode=test_case.mode
)
.cpu()
.numpy()
)

ekfac_influences_by_layer = ekfac_influence.influences_by_layer(
x_test, y_test, x_train, y_train, mode=test_case.mode
)

accumulated_inf_by_layer = np.zeros_like(ekfac_influence_values)
for layer, infl in ekfac_influences_by_layer.items():
accumulated_inf_by_layer += infl.detach().numpy()
accumulated_inf_by_layer += infl.detach().cpu().numpy()

ekfac_self_influence = ekfac_influence.influences(
x_train, y_train, mode=test_case.mode
).numpy()
ekfac_self_influence = (
ekfac_influence.influences(x_train, y_train, mode=test_case.mode)
.cpu()
.numpy()
)

ekfac_factors = ekfac_influence.influence_factors(x_test, y_test)

influence_from_factors = ekfac_influence.influences_from_factors(
ekfac_factors, x_train, y_train, mode=test_case.mode
).numpy()
influence_from_factors = (
ekfac_influence.influences_from_factors(
ekfac_factors, x_train, y_train, mode=test_case.mode
)
.cpu()
.numpy()
)

assert np.allclose(ekfac_influence_values, influence_from_factors)
assert np.allclose(ekfac_influence_values, accumulated_inf_by_layer)
check_influence_correlations(direct_influences, ekfac_influence_values)
check_influence_correlations(direct_sym_influences, ekfac_self_influence)
check_influence_correlations(
direct_influences, ekfac_influence_values, threshold=0.94
)
check_influence_correlations(
direct_sym_influences, ekfac_self_influence, threshold=0.94
)


@pytest.mark.torch
Expand All @@ -656,14 +696,15 @@ def test_influences_cg(
direct_factors,
use_block_cg: bool,
pre_conditioner: PreConditioner,
device: torch.device,
):
model, loss, x_train, y_train, x_test, y_test = model_and_data

train_dataloader = DataLoader(
TensorDataset(x_train, y_train), batch_size=test_case.batch_size
)
influence_model = CgInfluence(
model,
model.to(device),
loss,
test_case.hessian_reg,
maxiter=5,
Expand All @@ -672,9 +713,13 @@ def test_influences_cg(
)
influence_model = influence_model.fit(train_dataloader)

approx_influences = influence_model.influences(
x_test, y_test, x_train, y_train, mode=test_case.mode
).numpy()
approx_influences = (
influence_model.influences(
x_test, y_test, x_train, y_train, mode=test_case.mode
)
.cpu()
.numpy()
)

assert not np.any(np.isnan(approx_influences))

Expand All @@ -701,7 +746,11 @@ def test_influences_cg(
# check that block variant returns the correct vector, if only one right hand side
# is provided
if use_block_cg:
single_influence = influence_model.influence_factors(
x_train[0].unsqueeze(0), y_train[0].unsqueeze(0)
).numpy()
single_influence = (
influence_model.influence_factors(
x_train[0].unsqueeze(0), y_train[0].unsqueeze(0)
)
.cpu()
.numpy()
)
assert np.allclose(single_influence, direct_factors[0], atol=1e-6, rtol=1e-4)

0 comments on commit dfedbe6

Please sign in to comment.