Cocktail hotfixes (#245)

ArlindKadra · web-flow · commit ee685a915869 · 2021-06-03T16:33:10.000+02:00
* Fixes for the development branch and regularization cocktails

* Update implementation

* Fix unit tests temporarily

* Implementation update and bug fixes

* Removing unecessary code

* Addressing Ravin's comments
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ResNetBackbone.py
@@ -41,7 +41,7 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
                     out_features=self.config["num_units_%d" % i],
                     blocks_per_group=self.config["blocks_per_group_%d" % i],
                     last_block_index=(i - 1) * self.config["blocks_per_group_%d" % i],
-                    dropout=self.config['use_dropout']
+                    dropout=self.config[f'dropout_{i}'] if self.config['use_dropout'] else None,
                 )
             )
         if self.config['use_batch_norm']:
@@ -52,7 +52,7 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
         return backbone
 
     def _add_group(self, in_features: int, out_features: int,
-                   blocks_per_group: int, last_block_index: int, dropout: bool
+                   blocks_per_group: int, last_block_index: int, dropout: Optional[float]
                    ) -> nn.Module:
         """
         Adds a group into the main backbone.
@@ -64,7 +64,8 @@ def _add_group(self, in_features: int, out_features: int,
             out_features (int): output dimensionality for the current block
             blocks_per_group (int): Number of ResNet per group
             last_block_index (int): block index for shake regularization
-            dropout (bool): whether or not use dropout
+            dropout (None, float): dropout value for the group. If none,
+                no dropout is applied.
         """
         blocks = list()
         for i in range(blocks_per_group):
@@ -245,7 +246,7 @@ def __init__(
         out_features: int,
         blocks_per_group: int,
         block_index: int,
-        dropout: bool,
+        dropout: Optional[float],
         activation: nn.Module
     ):
         super(ResBlock, self).__init__()
@@ -289,13 +290,22 @@ def _build_block(self, in_features: int, out_features: int) -> nn.Module:
             if self.config['use_batch_norm']:
                 layers.append(nn.BatchNorm1d(in_features))
             layers.append(self.activation())
+        else:
+            # if start norm is not None and skip connection is None
+            # we will never apply the start_norm for the first layer in the block,
+            # which is why we should account for this case.
+            if not self.config['use_skip_connection']:
+                if self.config['use_batch_norm']:
+                    layers.append(nn.BatchNorm1d(in_features))
+                layers.append(self.activation())
+
         layers.append(nn.Linear(in_features, out_features))
 
         if self.config['use_batch_norm']:
             layers.append(nn.BatchNorm1d(out_features))
         layers.append(self.activation())
 
-        if self.config["use_dropout"]:
+        if self.dropout is not None:
             layers.append(nn.Dropout(self.dropout))
         layers.append(nn.Linear(out_features, out_features))
 
@@ -320,6 +330,7 @@ def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
             if self.config["use_skip_connection"]:
                 residual = self.shortcut(x)
 
+        # TODO make the below code better
         if self.config["use_skip_connection"]:
             if self.config["multi_branch_choice"] == 'shake-shake':
                 x1 = self.layers(x)
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/ShapedResNetBackbone.py
@@ -30,22 +30,28 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
         out_features = self.config["output_dim"]
 
         # use the get_shaped_neuron_counts to update the number of units
-        neuron_counts = get_shaped_neuron_counts(self.config['resnet_shape'],
-                                                 in_features,
-                                                 out_features,
-                                                 self.config['max_units'],
-                                                 self.config['num_groups'] + 2)[:-1]
+        neuron_counts = get_shaped_neuron_counts(
+            self.config['resnet_shape'],
+            in_features,
+            out_features,
+            self.config['max_units'],
+            self.config['num_groups'] + 2,
+        )[:-1]
         self.config.update(
             {"num_units_%d" % (i): num for i, num in enumerate(neuron_counts)}
         )
-        if self.config['use_dropout'] and self.config["max_dropout"] > 0.05:
+        if self.config['use_dropout']:
+            # the last dropout ("neuron") value is skipped since it will be equal
+            # to output_feat, which is 0. This is also skipped when getting the
+            # nr of units for the architecture, since, it is mostly implemented for the
+            # output layer, which is part of the head and not of the backbone.
             dropout_shape = get_shaped_neuron_counts(
-                self.config['dropout_shape'], 0, 0, 1000, self.config['num_groups']
-            )
-
-            dropout_shape = [
-                dropout / 1000 * self.config["max_dropout"] for dropout in dropout_shape
-            ]
+                self.config['dropout_shape'],
+                0,
+                0,
+                self.config["max_dropout"],
+                self.config['num_groups'] + 1,
+            )[:-1]
 
             self.config.update(
                 {"dropout_%d" % (i + 1): dropout for i, dropout in enumerate(dropout_shape)}
@@ -61,7 +67,7 @@ def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
                     out_features=self.config["num_units_%d" % i],
                     blocks_per_group=self.config["blocks_per_group"],
                     last_block_index=(i - 1) * self.config["blocks_per_group"],
-                    dropout=self.config['use_dropout']
+                    dropout=self.config[f'dropout_{i}'] if self.config['use_dropout'] else None,
                 )
             )
         if self.config['use_batch_norm']:
diff --git a/autoPyTorch/pipeline/components/setup/network_head/no_head.py b/autoPyTorch/pipeline/components/setup/network_head/no_head.py
@@ -20,7 +20,7 @@ class NoHead(NetworkHeadComponent):
     """
 
     def build_head(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> nn.Module:
-        layers = [nn.Flatten()]
+        layers = []
         in_features = np.prod(input_shape).item()
         out_features = np.prod(output_shape).item()
         layers.append(_activations[self.config["activation"]]())
@@ -34,8 +34,8 @@ def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[
             'shortname': 'NoHead',
             'name': 'NoHead',
             'handles_tabular': True,
-            'handles_image': True,
-            'handles_time_series': True,
+            'handles_image': False,
+            'handles_time_series': False,
         }
 
     @staticmethod
diff --git a/test/test_pipeline/components/setup/test_setup.py b/test/test_pipeline/components/setup/test_setup.py
@@ -422,8 +422,7 @@ def test_add_network_backbone(self):
 class TestNetworkHead:
     def test_all_heads_available(self):
         network_head_choice = NetworkHeadChoice(dataset_properties={})
-
-        assert len(network_head_choice.get_components().keys()) == 2
+        assert len(network_head_choice.get_components().keys()) == 3
 
     @pytest.mark.parametrize('task_type_input_output_shape', [(constants.IMAGE_CLASSIFICATION, (3, 64, 64), (5,)),
                                                               (constants.IMAGE_REGRESSION, (3, 64, 64), (1,)),
@@ -441,7 +440,9 @@ def test_dummy_forward_backward_pass(self, task_type_input_output_shape):
         if task_type in constants.CLASSIFICATION_TASKS:
             dataset_properties["num_classes"] = output_shape[0]
 
-        cs = network_head_choice.get_hyperparameter_search_space(dataset_properties=dataset_properties)
+        cs = network_head_choice.get_hyperparameter_search_space(
+            dataset_properties=dataset_properties,
+        )
         # test 10 random configurations
         for i in range(10):
             config = cs.sample_configuration()