fix doc strings

automl · ravinkohli · Jun 28, 2022 · Dec 22, 2021 · Dec 22, 2021 · Dec 22, 2021
commit 877a12481b6beafb235447e9521aedded9660838
diff --git a/autoPyTorch/evaluation/time_series_forecasting_train_evaluator.py b/autoPyTorch/evaluation/time_series_forecasting_train_evaluator.py
@@ -22,6 +22,86 @@
 
 
 class TimeSeriesForecastingTrainEvaluator(TrainEvaluator):
+    """
+    This class is  similar to the TrainEvaluator. Except that given the specific
+
+    Attributes:
+        backend (Backend):
+            An object to interface with the disk storage. In particular, allows to
+            access the train and test datasets
+        queue (Queue):
+            Each worker available will instantiate an evaluator, and after completion,
+            it will return the evaluation result via a multiprocessing queue
+        metric (autoPyTorchMetric):
+            A scorer object that is able to evaluate how good a pipeline was fit. It
+            is a wrapper on top of the actual score method (a wrapper on top of scikit
+            lean accuracy for example) that formats the predictions accordingly.
+        budget: (float):
+            The amount of epochs/time a configuration is allowed to run.
+        budget_type  (str):
+            The budget type, which can be epochs or time
+        pipeline_config (Optional[Dict[str, Any]]):
+            Defines the content of the pipeline being evaluated. For example, it
+            contains pipeline specific settings like logging name, or whether or not
+            to use tensorboard.
+        configuration (Union[int, str, Configuration]):
+            Determines the pipeline to be constructed. A dummy estimator is created for
+            integer configurations, a traditional machine learning pipeline is created
+            for string based configuration, and NAS is performed when a configuration
+            object is passed.
+        seed (int):
+            A integer that allows for reproducibility of results
+        output_y_hat_optimization (bool):
+            Whether this worker should output the target predictions, so that they are
+            stored on disk. Fundamentally, the resampling strategy might shuffle the
+            Y_train targets, so we store the split in order to re-use them for ensemble
+            selection.
+        num_run (Optional[int]):
+            An identifier of the current configuration being fit. This number is unique per
+            configuration.
+        include (Optional[Dict[str, Any]]):
+            An optional dictionary to include components of the pipeline steps.
+        exclude (Optional[Dict[str, Any]]):
+            An optional dictionary to exclude components of the pipeline steps.
+        disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]):
+            Used as a list to pass more fine-grained
+            information on what to save. Must be a member of `DisableFileOutputParameters`.
+            Allowed elements in the list are:
+
+            + `y_optimization`:
+                do not save the predictions for the optimization set,
+                which would later on be used to build an ensemble. Note that SMAC
+                optimizes a metric evaluated on the optimization set.
+            + `pipeline`:
+                do not save any individual pipeline files
+            + `pipelines`:
+                In case of cross validation, disables saving the joint model of the
+                pipelines fit on each fold.
+            + `y_test`:
+                do not save the predictions for the test set.
+            + `all`:
+                do not save any of the above.
+            For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`.
+        init_params (Optional[Dict[str, Any]]):
+            Optional argument that is passed to each pipeline step. It is the equivalent of
+            kwargs for the pipeline steps.
+        logger_port (Optional[int]):
+            Logging is performed using a socket-server scheme to be robust against many
+            parallel entities that want to write to the same file. This integer states the
+            socket port for the communication channel. If None is provided, a traditional
+            logger is used.
+        all_supported_metrics  (bool):
+            Whether all supported metric should be calculated for every configuration.
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            An object used to fine tune the hyperparameter search space of the pipeline
+        max_budget (float):
+            maximal budget value available for the optimizer. This is applied to compute the size of the proxy
+            validation sets
+        min_num_test_instances (Optional[int]):
+            minimal number of instances to be validated. We do so to ensure that there are enough instances in
+            the validation set
+
+    """
     def __init__(self, backend: Backend, queue: Queue,
                  metric: autoPyTorchMetric,
                  budget: float,
@@ -41,16 +121,6 @@ def __init__(self, backend: Backend, queue: Queue,
                  search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
                  max_budget: float = 1.0,
                  min_num_test_instances: Optional[int] = None) -> None:
-        """
-        Attributes:
-            max_budget (Optional[float]):
-                maximal budget the optimizer could allocate
-            min_num_test_instances: Optional[int]
-                minimal number of validation instances to be evaluated, if the size of the validation set is greater
-                than this value, then less instances from validation sets will be evaluated. The other predictions
-                 will be filled with dummy predictor
-
-        """
         super(TimeSeriesForecastingTrainEvaluator, self).__init__(
             backend=backend,
             queue=queue,

diff --git a/autoPyTorch/pipeline/components/setup/forecasting_training_loss/__init__.py b/autoPyTorch/pipeline/components/setup/forecasting_training_loss/__init__.py
@@ -54,12 +54,12 @@ def get_available_components(
         include/exclude directives, as well as the dataset properties
 
         Args:
-         include (Optional[Dict[str, Any]]):
-            what hyper-parameter configurations to honor when creating the configuration space
-         exclude (Optional[Dict[str, Any]]):
-            what hyper-parameter configurations to remove from the configuration space
-         dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]):
-            Characteristics of the dataset to guide the pipeline choices of components
+            include (Optional[Dict[str, Any]]):
+                what hyper-parameter configurations to honor when creating the configuration space
+            exclude (Optional[Dict[str, Any]]):
+                what hyper-parameter configurations to remove from the configuration space
+            dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]):
+                Characteristics of the dataset to guide the pipeline choices of components
 
         Returns:
             Dict[str, autoPyTorchComponent]: A filtered dict of learning

diff --git a/autoPyTorch/pipeline/components/setup/network/forecasting_architecture.py b/autoPyTorch/pipeline/components/setup/network/forecasting_architecture.py
@@ -154,6 +154,41 @@ def get_lagged_subsequences_inference(
 
 
 class AbstractForecastingNet(nn.Module):
+    """
+    This is a basic forecasting network. It is only composed of a embedding net, an encoder and a head (including
+    MLP decoder and the final head).
+
+    This structure is active when the decoder is a MLP with auto_regressive set as false
+
+    Attributes:
+        network_structure (NetworkStructure):
+            network structure information
+        network_embedding (nn.Module):
+            network embedding
+        network_encoder (Dict[str, EncoderBlockInfo]):
+            Encoder network, could be selected to return a sequence or a 2D Matrix
+        network_decoder (Dict[str, DecoderBlockInfo]):
+            network decoder
+        temporal_fusion Optional[TemporalFusionLayer]:
+            Temporal Fusion Layer
+        network_head (nn.Module):
+            network head, maps the output of decoder to the final output
+        dataset_properties (Dict):
+            dataset properties
+        auto_regressive (bool):
+            if the model is auto-regressive model
+        output_type (str):
+            the form that the network outputs. It could be regression, distribution or quantile
+        forecast_strategy (str):
+            only valid if output_type is distribution or quantile, how the network transforms
+            its output to predicted values, could be mean or sample
+        num_samples (int):
+            only valid if output_type is not regression and forecast_strategy is sample. This indicates the
+            number of the points to sample when doing prediction
+        aggregation (str):
+            only valid if output_type is not regression and forecast_strategy is sample. The way that the samples
+            are aggregated. We could take their mean or median values.
+    """
     future_target_required = False
     dtype = torch.float
 
@@ -178,41 +213,6 @@ def __init__(self,
                  num_samples: int = 50,
                  aggregation: str = 'mean'
                  ):
-        """
-        This is a basic forecasting network. It is only composed of a embedding net, an encoder and a head (including
-        MLP decoder and the final head).
-
-        This structure is active when the decoder is a MLP with auto_regressive set as false
-
-        Args:
-            network_structure (NetworkStructure):
-                network structure information
-            network_embedding (nn.Module):
-                network embedding
-            network_encoder (Dict[str, EncoderBlockInfo]):
-                Encoder network, could be selected to return a sequence or a 2D Matrix
-            network_decoder (Dict[str, DecoderBlockInfo]):
-                network decoder
-            temporal_fusion Optional[TemporalFusionLayer]:
-                Temporal Fusion Layer
-            network_head (nn.Module):
-                network head, maps the output of decoder to the final output
-            dataset_properties (Dict):
-                dataset properties
-            auto_regressive (bool):
-                if the model is auto-regressive model
-            output_type (str):
-                the form that the network outputs. It could be regression, distribution or quantile
-            forecast_strategy (str):
-                only valid if output_type is distribution or quantile, how the network transforms
-                its output to predicted values, could be mean or sample
-            num_samples (int):
-                only valid if output_type is not regression and forecast_strategy is sample. This indicates the
-                number of the points to sample when doing prediction
-            aggregation (str):
-                only valid if output_type is not regression and forecast_strategy is sample. The way that the samples
-                are aggregated. We could take their mean or median values.
-        """
         super().__init__()
         self.network_structure = network_structure
         self.embedding = network_embedding
@@ -305,6 +305,23 @@ def rescale_output(self,
                        loc: Optional[torch.Tensor],
                        scale: Optional[torch.Tensor],
                        device: torch.device = torch.device('cpu')) -> ALL_NET_OUTPUT:
+        """
+        rescale the network output to its raw scale
+
+        Args:
+            outputs (ALL_NET_OUTPUT):
+                network head output
+            loc (Optional[torch.Tensor]):
+                scaling location value
+            scale (Optional[torch.Tensor]):
+                scaling scale value
+            device (torch.device):
+                which device the output is stored
+
+        Return:
+            ALL_NET_OUTPUT:
+                rescaleed network output
+        """
         if isinstance(outputs, List):
             return [self.rescale_output(output, loc, scale, device) for output in outputs]
         if loc is not None or scale is not None:
@@ -323,17 +340,34 @@ def rescale_output(self,
         return outputs
 
     def scale_value(self,
-                    outputs: torch.Tensor,
+                    raw_value: torch.Tensor,
                     loc: Optional[torch.Tensor],
                     scale: Optional[torch.Tensor],
                     device: torch.device = torch.device('cpu')) -> torch.Tensor:
+        """
+        scale the outputs
+
+        Args:
+            raw_value (torch.Tensor):
+                network head output
+            loc (Optional[torch.Tensor]):
+                scaling location value
+            scale (Optional[torch.Tensor]):
+                scaling scale value
+            device (torch.device):
+                which device the output is stored
+
+        Return:
+            torch.Tensor:
+                scaled input value
+        """
         if loc is not None or scale is not None:
             if loc is None:
-                outputs = outputs / scale.to(device)  # type: ignore[union-attr]
+                outputs = raw_value / scale.to(device)  # type: ignore[union-attr]
             elif scale is None:
-                outputs = outputs - loc.to(device)
+                outputs = raw_value - loc.to(device)
             else:
-                outputs = (outputs - loc.to(device)) / scale.to(device)
+                outputs = (raw_value - loc.to(device)) / scale.to(device)
         return outputs
 
     @abstractmethod
@@ -349,6 +383,17 @@ def forward(self,
 
     @abstractmethod
     def pred_from_net_output(self, net_output: ALL_NET_OUTPUT) -> torch.Tensor:
+        """
+        This function is applied to transform the network head output to torch tensor to create the point prediction
+
+        Args:
+            net_output (ALL_NET_OUTPUT):
+                network head output
+
+        Return:
+            torch.Tensor:
+                point prediction
+        """
         raise NotImplementedError
 
     @abstractmethod
@@ -364,6 +409,23 @@ def repeat_intermediate_values(self,
                                    intermediate_values: List[Optional[Union[torch.Tensor, Tuple[torch.Tensor]]]],
                                    is_hidden_states: List[bool],
                                    repeats: int) -> List[Optional[Union[torch.Tensor, Tuple[torch.Tensor]]]]:
+        """
+        This function is often applied for auto-regressive model where we sample multiple points to form several
+        trajectories and we need to repeat the intermediate values to ensure that the batch sizes match
+
+        Args:
+             intermediate_values (List[Optional[Union[torch.Tensor, Tuple[torch.Tensor]]]])
+                a list of intermediate values to be repeated
+             is_hidden_states  (List[bool]):
+                if the intermediate_value is hidden states in RNN-form network, we need to consider the
+                hidden states differently
+            repeats (int):
+                number of repeats
+
+        Return:
+            List[Optional[Union[torch.Tensor, Tuple[torch.Tensor]]]]:
+                repeated values
+        """
         for i, (is_hx, inter_value) in enumerate(zip(is_hidden_states, intermediate_values)):
             if isinstance(inter_value, torch.Tensor):
                 repeated_value = inter_value.repeat_interleave(repeats=repeats, dim=1 if is_hx else 0)
@@ -375,6 +437,19 @@ def repeat_intermediate_values(self,
         return intermediate_values
 
     def pad_tensor(self, tensor_to_be_padded: torch.Tensor, target_length: int) -> torch.Tensor:
+        """
+        pad tensor to meet the required length
+
+        Args:
+             tensor_to_be_padded (torch.Tensor)
+                tensor to be padded
+             target_length  (int):
+                target length
+
+        Return:
+            torch.Tensor:
+                padded tensors
+        """
         tensor_shape = tensor_to_be_padded.shape
         padding_size = [tensor_shape[0], target_length - tensor_shape[1], tensor_shape[-1]]
         tensor_to_be_padded = torch.cat([tensor_to_be_padded.new_zeros(padding_size), tensor_to_be_padded], dim=1)
@@ -1174,6 +1249,9 @@ def forward(self,  # type: ignore[override]
                 past_observed_targets: Optional[torch.BoolTensor] = None,
                 decoder_observed_values: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor,
                                                                                    Tuple[torch.Tensor, torch.Tensor]]:
+
+        # Unlike other networks, NBEATS network is required to predict both past and future targets.
+        # Thereby, we return two tensors for backcast and forecast
         if past_observed_targets is None:
             past_observed_targets = torch.ones_like(past_targets, dtype=torch.bool)
 
@@ -1194,6 +1272,7 @@ def forward(self,  # type: ignore[override]
         forecast = torch.zeros(forcast_shape).to(self.device).flatten(1)
         backcast, _ = self.encoder(past_targets, [None])
         backcast = backcast[0]
+        # nbeats network only has one decoder block (flat decoder)
         for block in self.decoder.decoder['block_1']:
             backcast_block, forecast_block = block([None], backcast)