automl · franchuterivera · Mar 10, 2021 · Mar 9, 2021
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
@@ -105,33 +105,33 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
     # Search for an existing run key in disc. A individual model might have
     # a timeout and hence was not written to disc
     for i, (run_key, value) in enumerate(estimator.run_history.data.items()):
-        if i == 0:
-            # Ignore dummy run
-            continue
         if 'SUCCESS' not in str(value.status):
             continue
 
         run_key_model_run_dir = estimator._backend.get_numrun_directory(
-            estimator.seed, run_key.config_id, run_key.budget)
+            estimator.seed, run_key.config_id + 1, run_key.budget)
         if os.path.exists(run_key_model_run_dir):
+            # Runkey config id is different from the num_run
+            # more specifically num_run = config_id + 1(dummy)
+            successful_num_run = run_key.config_id + 1
             break
 
     if resampling_strategy == HoldoutValTypes.holdout_validation:
         model_file = os.path.join(run_key_model_run_dir,
-                                  f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.model")
+                                  f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model")
         assert os.path.exists(model_file), model_file
         model = estimator._backend.load_model_by_seed_and_id_and_budget(
-            estimator.seed, run_key.config_id, run_key.budget)
+            estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module)
     elif resampling_strategy == CrossValTypes.k_fold_cross_validation:
         model_file = os.path.join(
             run_key_model_run_dir,
-            f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.cv_model"
+            f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model"
         )
         assert os.path.exists(model_file), model_file
 
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
-            estimator.seed, run_key.config_id, run_key.budget)
+            estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model, VotingClassifier)
         assert len(model.estimators_) == 3
         assert isinstance(model.estimators_[0].named_steps['network'].get_network(),
@@ -142,7 +142,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
     # Make sure that predictions on the test data are printed and make sense
     test_prediction = os.path.join(run_key_model_run_dir,
                                    estimator._backend.get_prediction_filename(
-                                       'test', estimator.seed, run_key.config_id,
+                                       'test', estimator.seed, successful_num_run,
                                        run_key.budget))
     assert os.path.exists(test_prediction), test_prediction
     assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0]
@@ -152,7 +152,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
     ensemble_prediction = os.path.join(run_key_model_run_dir,
                                        estimator._backend.get_prediction_filename(
                                            'ensemble',
-                                           estimator.seed, run_key.config_id,
+                                           estimator.seed, successful_num_run,
                                            run_key.budget))
     assert os.path.exists(ensemble_prediction), ensemble_prediction
     assert np.shape(np.load(ensemble_prediction, allow_pickle=True))[0] == np.shape(
@@ -213,10 +213,16 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
         X, y, random_state=1)
 
+    include = None
+    # for python less than 3.7, learned entity embedding
+    # is not able to be stored on disk (only on CI)
+    if sys.version_info < (3, 7):
+        include = {'network_embedding': ['NoEmbedding']}
     # Search for a good configuration
     estimator = TabularRegressionTask(
         backend=backend,
         resampling_strategy=resampling_strategy,
+        include_components=include
     )
 
     estimator.search(
@@ -267,32 +273,32 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
     # Search for an existing run key in disc. A individual model might have
     # a timeout and hence was not written to disc
     for i, (run_key, value) in enumerate(estimator.run_history.data.items()):
-        if i == 0:
-            # Ignore dummy run
-            continue
         if 'SUCCESS' not in str(value.status):
             continue
 
         run_key_model_run_dir = estimator._backend.get_numrun_directory(
-            estimator.seed, run_key.config_id, run_key.budget)
+            estimator.seed, run_key.config_id + 1, run_key.budget)
         if os.path.exists(run_key_model_run_dir):
+            # Runkey config id is different from the num_run
+            # more specifically num_run = config_id + 1(dummy)
+            successful_num_run = run_key.config_id + 1
             break
 
     if resampling_strategy == HoldoutValTypes.holdout_validation:
         model_file = os.path.join(run_key_model_run_dir,
-                                  f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.model")
+                                  f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model")
         assert os.path.exists(model_file), model_file
         model = estimator._backend.load_model_by_seed_and_id_and_budget(
-            estimator.seed, run_key.config_id, run_key.budget)
+            estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module)
     elif resampling_strategy == CrossValTypes.k_fold_cross_validation:
         model_file = os.path.join(
             run_key_model_run_dir,
-            f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.cv_model"
+            f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model"
         )
         assert os.path.exists(model_file), model_file
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
-            estimator.seed, run_key.config_id, run_key.budget)
+            estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model, VotingRegressor)
         assert len(model.estimators_) == 3
         assert isinstance(model.estimators_[0].named_steps['network'].get_network(),
@@ -303,7 +309,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
     # Make sure that predictions on the test data are printed and make sense
     test_prediction = os.path.join(run_key_model_run_dir,
                                    estimator._backend.get_prediction_filename(
-                                       'test', estimator.seed, run_key.config_id,
+                                       'test', estimator.seed, successful_num_run,
                                        run_key.budget))
     assert os.path.exists(test_prediction), test_prediction
     assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0]
@@ -313,7 +319,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
     ensemble_prediction = os.path.join(run_key_model_run_dir,
                                        estimator._backend.get_prediction_filename(
                                            'ensemble',
-                                           estimator.seed, run_key.config_id,
+                                           estimator.seed, successful_num_run,
                                            run_key.budget))
     assert os.path.exists(ensemble_prediction), ensemble_prediction
     assert np.shape(np.load(ensemble_prediction, allow_pickle=True))[0] == np.shape(