add more tests

eth-easl · XianzheMa · Jun 25, 2024 · Jun 22, 2024 · Jun 22, 2024 · Jun 23, 2024
commit 21324b29b4ed9620623aa66281562e73d5bef068
diff --git a/...server/internal/trainer/remote_downsamplers/test_abstract_matrix_downsampling_strategy.py b/...server/internal/trainer/remote_downsamplers/test_abstract_matrix_downsampling_strategy.py
@@ -1,6 +1,7 @@
 # pylint: disable=abstract-class-instantiated,unused-argument
 from unittest.mock import patch
 
+import pytest
 import numpy as np
 import torch
 from modyn.config import ModynConfig
@@ -10,7 +11,9 @@
 )
 
 
-def get_sampler_config(dummy_system_config: ModynConfig, balance=False):
+def get_sampler_config(
+        dummy_system_config: ModynConfig, balance=False, matrix_content=MatrixContent.LAST_TWO_LAYERS_GRADIENTS
+):
     downsampling_ratio = 50
     per_sample_loss_fct = torch.nn.CrossEntropyLoss(reduction="none")
 
@@ -29,7 +32,7 @@ def get_sampler_config(dummy_system_config: ModynConfig, balance=False):
         dummy_system_config.model_dump(by_alias=True),
         per_sample_loss_fct,
         "cpu",
-        MatrixContent.LAST_TWO_LAYERS_GRADIENTS,
+        matrix_content,
     )
 
 
@@ -106,9 +109,12 @@ def test_collect_embedding_balance(test_amds, dummy_system_config: ModynConfig):
         assert amds.already_selected_samples == [1, 3, 1000, 1002]
 
 
+@pytest.mark.parametrize(
+    "matrix_content", [MatrixContent.LAST_LAYER_GRADIENTS, MatrixContent.LAST_TWO_LAYERS_GRADIENTS]
+)
 @patch.multiple(AbstractMatrixDownsamplingStrategy, __abstractmethods__=set())
-def test_collect_gradients(dummy_system_config: ModynConfig):
-    amds = AbstractMatrixDownsamplingStrategy(*get_sampler_config(dummy_system_config))
+def test_collect_gradients(matrix_content, dummy_system_config: ModynConfig):
+    amds = AbstractMatrixDownsamplingStrategy(*get_sampler_config(dummy_system_config, matrix_content=matrix_content))
     with torch.inference_mode(mode=(not amds.requires_grad)):
         forward_input = torch.randn((4, 5))
         first_output = torch.randn((4, 2))
@@ -125,29 +131,14 @@ def test_collect_gradients(dummy_system_config: ModynConfig):
 
         assert len(amds.matrix_elements) == 2
 
-        # expected shape = (a,b)
+        # expected shape = (a, gradient_shape)
         # a = 7 (4 samples in the first batch and 3 samples in the second batch)
-        # b = 5 * 2 + 2 where 5 is the input dimension of the last layer and 2 is the output one
-        assert np.concatenate(amds.matrix_elements).shape == (7, 12)
+        if matrix_content == MatrixContent.LAST_LAYER_GRADIENTS:
+            # shape same as the last dimension of output
+            gradient_shape = 2
+        else:
+            # 5 is the input dimension of the last layer and 2 is the output one
+            gradient_shape = 5 * 2 + 2
+        assert np.concatenate(amds.matrix_elements).shape == (7, gradient_shape)
 
         assert amds.index_sampleid_map == [1, 2, 3, 4, 21, 31, 41]
-
-
-@patch(
-    "modyn.trainer_server.internal.trainer.remote_downsamplers"
-    ".abstract_matrix_downsampling_strategy.torch.autograd.grad",
-    wraps=torch.autograd.grad,
-)
-def test__compute_last_layer_gradient_wrt_loss_sum(mock_torch_auto_grad):
-    per_sample_loss_fct = torch.nn.CrossEntropyLoss(reduction="none")
-    forward_output = torch.randn((4, 2), requires_grad=True)
-    # random target
-    target = torch.randint(0, 2, (4,))
-    last_layer_gradients = AbstractMatrixDownsamplingStrategy._compute_last_layer_gradient_wrt_loss_sum(
-        per_sample_loss_fct, forward_output, target
-    )
-    # as we use CrossEntropyLoss, the gradient is computed in a closed form
-    assert mock_torch_auto_grad.call_count == 0
-    # verify that the gradients calculated via the closed form are equal to the ones calculated by autograd
-    expected_grad = torch.autograd.grad(per_sample_loss_fct(forward_output, target).sum(), forward_output)[0]
-    assert torch.allclose(last_layer_gradients, expected_grad)
diff --git a/...server/internal/trainer/remote_downsamplers/test_abstract_remote_downsampling_strategy.py b/...server/internal/trainer/remote_downsamplers/test_abstract_remote_downsampling_strategy.py
@@ -1,6 +1,7 @@
 # pylint: disable=abstract-class-instantiated
 from unittest.mock import patch
 
+import torch
 from modyn.config import ModynConfig
 from modyn.trainer_server.internal.trainer.remote_downsamplers.abstract_remote_downsampling_strategy import (
     AbstractRemoteDownsamplingStrategy,
@@ -21,3 +22,23 @@ def test_batch_then_sample_general(dummy_system_config: ModynConfig):
     assert sampler.trigger_id == 128
     assert sampler.pipeline_id == 154
     assert sampler.batch_size == 64
+
+
+@patch(
+    "modyn.trainer_server.internal.trainer.remote_downsamplers"
+    ".abstract_remote_downsampling_strategy.torch.autograd.grad",
+    wraps=torch.autograd.grad,
+)
+def test__compute_last_layer_gradient_wrt_loss_sum(mock_torch_auto_grad):
+    per_sample_loss_fct = torch.nn.CrossEntropyLoss(reduction="none")
+    forward_output = torch.randn((4, 2), requires_grad=True)
+    # random target
+    target = torch.randint(0, 2, (4,))
+    last_layer_gradients = AbstractRemoteDownsamplingStrategy._compute_last_layer_gradient_wrt_loss_sum(
+        per_sample_loss_fct, forward_output, target
+    )
+    # as we use CrossEntropyLoss, the gradient is computed in a closed form
+    assert mock_torch_auto_grad.call_count == 0
+    # verify that the gradients calculated via the closed form are equal to the ones calculated by autograd
+    expected_grad = torch.autograd.grad(per_sample_loss_fct(forward_output, target).sum(), forward_output)[0]
+    assert torch.allclose(last_layer_gradients, expected_grad)