Skip to content

Commit 3f61383

Browse files
author
Github Actions
committed
Ravin Kohli: Merge pull request #89 from franchuterivera/InputValidator
1 parent d501028 commit 3f61383

32 files changed

+2337
-2463
lines changed

refactor_development/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: badc6677826ff369d0cd924e63d913f3
3+
config: 16955af63d37db79a6e43b67a0b42045
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

refactor_development/_downloads/08a8d852a0652c3f1a4a72cc129ab783/example_tabular_classification.py

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
"""
99
import os
1010
import tempfile as tmp
11-
import typing
1211
import warnings
1312

1413
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
@@ -23,29 +22,9 @@
2322
import sklearn.model_selection
2423

2524
from autoPyTorch.api.tabular_classification import TabularClassificationTask
26-
from autoPyTorch.datasets.tabular_dataset import TabularDataset
2725
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
2826

2927

30-
# Get the training data for tabular classification
31-
def get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]:
32-
"""
33-
This function returns a fit dictionary that within itself, contains all
34-
the information to fit a pipeline
35-
"""
36-
37-
# Get the training data for tabular classification
38-
# Move to Australian to showcase numerical vs categorical
39-
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
40-
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
41-
X,
42-
y,
43-
random_state=1,
44-
)
45-
46-
return X_train, X_test, y_train, y_test
47-
48-
4928
def get_search_space_updates():
5029
"""
5130
Search space updates to the task can be added using HyperparameterSearchSpaceUpdates
@@ -72,10 +51,12 @@ def get_search_space_updates():
7251
############################################################################
7352
# Data Loading
7453
# ============
75-
X_train, X_test, y_train, y_test = get_data_to_train()
76-
datamanager = TabularDataset(
77-
X=X_train, Y=y_train,
78-
X_test=X_test, Y_test=y_test)
54+
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
55+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
56+
X,
57+
y,
58+
random_state=1,
59+
)
7960

8061
############################################################################
8162
# Build and fit a classifier
@@ -85,10 +66,13 @@ def get_search_space_updates():
8566
search_space_updates=get_search_space_updates()
8667
)
8768
api.search(
88-
dataset=datamanager,
69+
X_train=X_train,
70+
y_train=y_train,
71+
X_test=X_test.copy(),
72+
y_test=y_test.copy(),
8973
optimize_metric='accuracy',
9074
total_walltime_limit=500,
91-
func_eval_time_limit=150
75+
func_eval_time_limit=50
9276
)
9377

9478
############################################################################
Binary file not shown.

refactor_development/_downloads/c62547d844fd82c936bc377a1da9a504/example_tabular_classification.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import os\nimport tempfile as tmp\nimport typing\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.tabular_dataset import TabularDataset\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\n# Get the training data for tabular classification\ndef get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]:\n \"\"\"\n This function returns a fit dictionary that within itself, contains all\n the information to fit a pipeline\n \"\"\"\n\n # Get the training data for tabular classification\n # Move to Australian to showcase numerical vs categorical\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n return X_train, X_test, y_train, y_test\n\n\ndef get_search_space_updates():\n \"\"\"\n Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n Returns:\n HyperparameterSearchSpaceUpdates\n \"\"\"\n updates = HyperparameterSearchSpaceUpdates()\n updates.append(node_name=\"data_loader\",\n hyperparameter=\"batch_size\",\n value_range=[16, 512],\n default_value=32)\n updates.append(node_name=\"lr_scheduler\",\n hyperparameter=\"CosineAnnealingLR:T_max\",\n value_range=[50, 60],\n default_value=55)\n updates.append(node_name='network_backbone',\n hyperparameter='ResNetBackbone:dropout',\n value_range=[0, 0.5],\n default_value=0.2)\n return updates\n\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n X_train, X_test, y_train, y_test = get_data_to_train()\n datamanager = TabularDataset(\n X=X_train, Y=y_train,\n X_test=X_test, Y_test=y_test)\n\n ############################################################################\n # Build and fit a classifier\n # ==========================\n api = TabularClassificationTask(\n delete_tmp_folder_after_terminate=False,\n search_space_updates=get_search_space_updates()\n )\n api.search(\n dataset=datamanager,\n optimize_metric='accuracy',\n total_walltime_limit=500,\n func_eval_time_limit=150\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)"
29+
"import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates\n\n\ndef get_search_space_updates():\n \"\"\"\n Search space updates to the task can be added using HyperparameterSearchSpaceUpdates\n Returns:\n HyperparameterSearchSpaceUpdates\n \"\"\"\n updates = HyperparameterSearchSpaceUpdates()\n updates.append(node_name=\"data_loader\",\n hyperparameter=\"batch_size\",\n value_range=[16, 512],\n default_value=32)\n updates.append(node_name=\"lr_scheduler\",\n hyperparameter=\"CosineAnnealingLR:T_max\",\n value_range=[50, 60],\n default_value=55)\n updates.append(node_name='network_backbone',\n hyperparameter='ResNetBackbone:dropout',\n value_range=[0, 0.5],\n default_value=0.2)\n return updates\n\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n )\n\n ############################################################################\n # Build and fit a classifier\n # ==========================\n api = TabularClassificationTask(\n delete_tmp_folder_after_terminate=False,\n search_space_updates=get_search_space_updates()\n )\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=500,\n func_eval_time_limit=50\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)"
3030
]
3131
}
3232
],
Binary file not shown.

0 commit comments

Comments
 (0)