Addressed comments from arlind, change in T_mul and T_0 calculations

ravinkohli · ravinkohli · commit f29a7e0ee2c9 · 2022-07-21T12:14:09.000+02:00
Added debug information for API

Fix flake

Fix import

made test deterministic for feature preprocessing

Fix bug in parsing log

convert to int

Fix bug in testing
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -948,8 +948,7 @@ def file_output(
                 pipeline = None
         else:
             pipeline = None
-
-        self.logger.debug("Saving directory {}, {}, {}".format(self.seed, self.num_run, self.budget))
+        self.logger.debug("Saving model {}_{}_{} to disk".format(self.seed, self.num_run, self.budget))
         self.backend.save_numrun_to_dir(
             seed=int(self.seed),
             idx=int(self.num_run),
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py
@@ -53,13 +53,14 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
         self.check_requirements(X, y)
 
         # initialise required attributes for the scheduler
-        T_mult: int = 1
-        T_0: int = max(X['epochs'] // self.n_restarts, 1)
+        T_mult: int = 2
+        # using Epochs = T_0 * (T_mul ** n_restarts -1) / (T_mul - 1) (Sum of GP)
+        T_0: int = max((X['epochs'] * (T_mult - 1)) // (T_mult ** self.n_restarts - 1), 1)
 
         self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
             optimizer=X['optimizer'],
-            T_0=T_0,
-            T_mult=T_mult,
+            T_0=int(T_0),
+            T_mult=int(T_mult),
         )
         return self
 
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py
@@ -99,7 +99,6 @@ def get_hyperparameter_search_space(
                                                                       default_value=0.1,
                                                                       )
     ) -> ConfigurationSpace:
-
         cs = ConfigurationSpace()
 
         add_hyperparameter(cs, mode, CategoricalHyperparameter)
diff --git a/test/test_api/api_utils.py b/test/test_api/api_utils.py
@@ -0,0 +1,42 @@
+import glob
+import os
+
+
+def print_debug_information(automl):
+
+    # Log file path
+    log_file = glob.glob(os.path.join(
+        automl._backend.temporary_directory, 'AutoPyTorch*.log'))[0]
+
+    include_messages = ['INFO', 'DEBUG', 'WARN',
+                        'CRITICAL', 'ERROR', 'FATAL']
+
+    # There is a lot of content in the log files. Only
+    # parsing the main message and ignore the metalearning
+    # messages
+    try:
+        with open(log_file) as logfile:
+            content = logfile.readlines()
+
+        # Get the messages to debug easier!
+        content = [line for line in content if any(
+            msg in line for msg in include_messages
+        ) and 'metalearning' not in line]
+
+    except Exception as e:
+        return str(e)
+
+    # Also add the run history if any
+    if hasattr(automl, 'runhistory') and hasattr(automl.runhistory, 'data'):
+        for k, v in automl.runhistory_.data.items():
+            content += ["{}->{}".format(k, v)]
+    else:
+        content += ['No RunHistory']
+
+    # Also add the ensemble history if any
+    if len(automl.ensemble_performance_history) > 0:
+        content += [str(h) for h in automl.ensemble_performance_history]
+    else:
+        content += ['No Ensemble History']
+
+    return os.linesep.join(content)
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
@@ -41,6 +41,8 @@
 from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import _traditional_learners
 from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
 
+from test.test_api.api_utils import print_debug_information
+
 
 CV_NUM_SPLITS = 2
 HOLDOUT_NUM_SPLITS = 1
@@ -154,7 +156,8 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
             run_key_model_run_dir,
             f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model"
         )
-        assert os.path.exists(model_file), model_file
+        time.sleep(5)
+        assert os.path.exists(model_file), print_debug_information(estimator)
 
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
diff --git a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
@@ -107,7 +107,7 @@ def test_pipeline_fit_include(self, fit_dictionary_tabular, preprocessor):
             dataset_properties=fit_dictionary_tabular['dataset_properties'],
             include={'feature_preprocessor': [preprocessor]})
         cs = pipeline.get_hyperparameter_search_space()
-        config = cs.sample_configuration()
+        config = cs.get_default_configuration()
         pipeline.set_hyperparameters(config)
         try:
             pipeline.fit(fit_dictionary_tabular)
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
@@ -379,7 +379,8 @@ def test_set_choices_updates(self, fit_dictionary_tabular):
     @pytest.mark.parametrize('lr_scheduler', ['CosineAnnealingWarmRestarts',
                                               'ReduceLROnPlateau'])
     def test_trainer_cocktails(self, fit_dictionary_tabular, mocker, lr_scheduler, trainer):  # noqa F811
-        fit_dictionary_tabular['epochs'] = 10
+        fit_dictionary_tabular['epochs'] = 20
+        fit_dictionary_tabular['early_stopping'] = 20
         pipeline = TabularClassificationPipeline(
             dataset_properties=fit_dictionary_tabular['dataset_properties'],
             include={'lr_scheduler': [lr_scheduler], 'trainer': [trainer]})
diff --git a/test/utils.py b/test/utils.py

Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,6 @@ def get_hyperparameter_search_space(`
`99`	`99`	`default_value=0.1,`
`100`	`100`	`)`
`101`	`101`	`) -> ConfigurationSpace:`
`102`		`-`
`103`	`102`	`cs = ConfigurationSpace()`
`104`	`103`
`105`	`104`	`add_hyperparameter(cs, mode, CategoricalHyperparameter)`