automl
diff --git a/‎refactor_development/.buildinfo
Lines changed: 1 addition & 1 deletion b/‎refactor_development/.buildinfo
Lines changed: 1 addition & 1 deletion
diff --git a/‎refactor_development/_downloads/08a8d852a0652c3f1a4a72cc129ab783/example_tabular_classification.py
Lines changed: 11 additions & 27 deletions b/‎refactor_development/_downloads/08a8d852a0652c3f1a4a72cc129ab783/example_tabular_classification.py
Lines changed: 11 additions & 27 deletions
diff --git a/‎refactor_development/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip
-594 Bytes b/‎refactor_development/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip
-594 Bytes
diff --git a/‎refactor_development/_downloads/c62547d844fd82c936bc377a1da9a504/example_tabular_classification.ipynb
Lines changed: 1 addition & 1 deletion b/‎refactor_development/_downloads/c62547d844fd82c936bc377a1da9a504/example_tabular_classification.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎refactor_development/_downloads/fb625db3c50d423b1b7881136ffdeec8/examples_jupyter.zip
-616 Bytes b/‎refactor_development/_downloads/fb625db3c50d423b1b7881136ffdeec8/examples_jupyter.zip
-616 Bytes
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: badc6677826ff369d0cd924e63d913f3
+config: 16955af63d37db79a6e43b67a0b42045
 tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -8,7 +8,6 @@
 """
 import os
 import tempfile as tmp
-import typing
 import warnings
 
 os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
@@ -23,29 +22,9 @@
 import sklearn.model_selection
 
 from autoPyTorch.api.tabular_classification import TabularClassificationTask
-from autoPyTorch.datasets.tabular_dataset import TabularDataset
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
 
-# Get the training data for tabular classification
-def get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]:
-    """
-    This function returns a fit dictionary that within itself, contains all
-    the information to fit a pipeline
-    """
-
-    # Get the training data for tabular classification
-    # Move to Australian to showcase numerical vs categorical
-    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
-    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        X,
-        y,
-        random_state=1,
-    )
-
-    return X_train, X_test, y_train, y_test
-
-
 def get_search_space_updates():
     """
     Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
@@ -72,10 +51,12 @@ def get_search_space_updates():
     ############################################################################
     # Data Loading
     # ============
-    X_train, X_test, y_train, y_test = get_data_to_train()
-    datamanager = TabularDataset(
-        X=X_train, Y=y_train,
-        X_test=X_test, Y_test=y_test)
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
 
     ############################################################################
     # Build and fit a classifier
@@ -85,10 +66,13 @@ def get_search_space_updates():
         search_space_updates=get_search_space_updates()
     )
     api.search(
-        dataset=datamanager,
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
         optimize_metric='accuracy',
         total_walltime_limit=500,
-        func_eval_time_limit=150
+        func_eval_time_limit=50
     )
 
     ############################################################################
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\nimport tempfile as tmp\nimport typing\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.tabular_dataset import TabularDataset\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\n# Get the training data for tabular classification\ndef get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]:\n    \"\"\"\n    This function returns a fit dictionary that within itself, contains all\n    the information to fit a pipeline\n    \"\"\"\n\n    # Get the training data for tabular classification\n    # Move to Australian to showcase numerical vs categorical\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    return X_train, X_test, y_train, y_test\n\n\ndef get_search_space_updates():\n    \"\"\"\n    Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n    Returns:\n        HyperparameterSearchSpaceUpdates\n    \"\"\"\n    updates = HyperparameterSearchSpaceUpdates()\n    updates.append(node_name=\"data_loader\",\n                   hyperparameter=\"batch_size\",\n                   value_range=[16, 512],\n                   default_value=32)\n    updates.append(node_name=\"lr_scheduler\",\n                   hyperparameter=\"CosineAnnealingLR:T_max\",\n                   value_range=[50, 60],\n                   default_value=55)\n    updates.append(node_name='network_backbone',\n                   hyperparameter='ResNetBackbone:dropout',\n                   value_range=[0, 0.5],\n                   default_value=0.2)\n    return updates\n\n\nif __name__ == '__main__':\n    ############################################################################\n    # Data Loading\n    # ============\n    X_train, X_test, y_train, y_test = get_data_to_train()\n    datamanager = TabularDataset(\n        X=X_train, Y=y_train,\n        X_test=X_test, Y_test=y_test)\n\n    ############################################################################\n    # Build and fit a classifier\n    # ==========================\n    api = TabularClassificationTask(\n        delete_tmp_folder_after_terminate=False,\n        search_space_updates=get_search_space_updates()\n    )\n    api.search(\n        dataset=datamanager,\n        optimize_metric='accuracy',\n        total_walltime_limit=500,\n        func_eval_time_limit=150\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)"
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n    \"\"\"\n    Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n    Returns:\n        HyperparameterSearchSpaceUpdates\n    \"\"\"\n    updates = HyperparameterSearchSpaceUpdates()\n    updates.append(node_name=\"data_loader\",\n                   hyperparameter=\"batch_size\",\n                   value_range=[16, 512],\n                   default_value=32)\n    updates.append(node_name=\"lr_scheduler\",\n                   hyperparameter=\"CosineAnnealingLR:T_max\",\n                   value_range=[50, 60],\n                   default_value=55)\n    updates.append(node_name='network_backbone',\n                   hyperparameter='ResNetBackbone:dropout',\n                   value_range=[0, 0.5],\n                   default_value=0.2)\n    return updates\n\n\nif __name__ == '__main__':\n    ############################################################################\n    # Data Loading\n    # ============\n    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n        X,\n        y,\n        random_state=1,\n    )\n\n    ############################################################################\n    # Build and fit a classifier\n    # ==========================\n    api = TabularClassificationTask(\n        delete_tmp_folder_after_terminate=False,\n        search_space_updates=get_search_space_updates()\n    )\n    api.search(\n        X_train=X_train,\n        y_train=y_train,\n        X_test=X_test.copy(),\n        y_test=y_test.copy(),\n        optimize_metric='accuracy',\n        total_walltime_limit=500,\n        func_eval_time_limit=50\n    )\n\n    ############################################################################\n    # Print the final ensemble performance\n    # ====================================\n    print(api.run_history, api.trajectory)\n    y_pred = api.predict(X_test)\n    score = api.score(y_pred, y_test)\n    print(score)"
       ]
     }
   ],
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "import os\nimport tempfile as tmp\nimport typing\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.tabular_dataset import TabularDataset\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\n# Get the training data for tabular classification\ndef get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]:\n \"\"\"\n This function returns a fit dictionary that within itself, contains all\n the information to fit a pipeline\n \"\"\"\n\n # Get the training data for tabular classification\n # Move to Australian to showcase numerical vs categorical\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n return X_train, X_test, y_train, y_test\n\n\ndef get_search_space_updates():\n \"\"\"\n Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n Returns:\n HyperparameterSearchSpaceUpdates\n \"\"\"\n updates = HyperparameterSearchSpaceUpdates()\n updates.append(node_name=\"data_loader\",\n hyperparameter=\"batch_size\",\n value_range=[16, 512],\n default_value=32)\n updates.append(node_name=\"lr_scheduler\",\n hyperparameter=\"CosineAnnealingLR:T_max\",\n value_range=[50, 60],\n default_value=55)\n updates.append(node_name='network_backbone',\n hyperparameter='ResNetBackbone:dropout',\n value_range=[0, 0.5],\n default_value=0.2)\n return updates\n\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n X_train, X_test, y_train, y_test = get_data_to_train()\n datamanager = TabularDataset(\n X=X_train, Y=y_train,\n X_test=X_test, Y_test=y_test)\n\n ############################################################################\n # Build and fit a classifier\n # ==========================\n api = TabularClassificationTask(\n delete_tmp_folder_after_terminate=False,\n search_space_updates=get_search_space_updates()\n )\n api.search(\n dataset=datamanager,\n optimize_metric='accuracy',\n total_walltime_limit=500,\n func_eval_time_limit=150\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)"
	`29`	+ "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n \"\"\"\n Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n Returns:\n HyperparameterSearchSpaceUpdates\n \"\"\"\n updates = HyperparameterSearchSpaceUpdates()\n updates.append(node_name=\"data_loader\",\n hyperparameter=\"batch_size\",\n value_range=[16, 512],\n default_value=32)\n updates.append(node_name=\"lr_scheduler\",\n hyperparameter=\"CosineAnnealingLR:T_max\",\n value_range=[50, 60],\n default_value=55)\n updates.append(node_name='network_backbone',\n hyperparameter='ResNetBackbone:dropout',\n value_range=[0, 0.5],\n default_value=0.2)\n return updates\n\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n ############################################################################\n # Build and fit a classifier\n # ==========================\n api = TabularClassificationTask(\n delete_tmp_folder_after_terminate=False,\n search_space_updates=get_search_space_updates()\n )\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=500,\n func_eval_time_limit=50\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`