diff --git a/README.md b/README.md index 389ab7902..3fb449fb1 100755 --- a/README.md +++ b/README.md @@ -56,7 +56,6 @@ git submodule update --init --recursive conda create -n auto-pytorch python=3.8 conda activate auto-pytorch conda install swig -cat requirements.txt | xargs -n 1 -L 1 pip install python setup.py install ``` diff --git a/examples/40_advanced/example_custom_configuration_space.py b/examples/40_advanced/example_custom_configuration_space.py index c64a4fca1..985d9d9ff 100644 --- a/examples/40_advanced/example_custom_configuration_space.py +++ b/examples/40_advanced/example_custom_configuration_space.py @@ -5,6 +5,7 @@ The following example shows how adjust the configuration space of the search. Currently, there are two changes that can be made to the space:- + 1. Adjust individual hyperparameters in the pipeline 2. Include or exclude components: a) include: Dictionary containing components to include. Key is the node @@ -57,80 +58,78 @@ def get_search_space_updates(): return updates -if __name__ == '__main__': - - ############################################################################ - # Data Loading - # ============ - X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, - y, - random_state=1, - ) - - ############################################################################ - # Build and fit a classifier with include components - # ================================================== - api = TabularClassificationTask( - search_space_updates=get_search_space_updates(), - include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'], - 'encoder': ['OneHotEncoder']} - ) - - ############################################################################ - # Search for an ensemble of machine learning algorithms - # ===================================================== - api.search( - X_train=X_train.copy(), - y_train=y_train.copy(), - X_test=X_test.copy(), - y_test=y_test.copy(), - optimize_metric='accuracy', - total_walltime_limit=150, - func_eval_time_limit_secs=30 - ) - - ############################################################################ - # Print the final ensemble performance - # ==================================== - y_pred = api.predict(X_test) - score = api.score(y_pred, y_test) - print(score) - print(api.show_models()) - - # Print statistics from search - print(api.sprint_statistics()) - - ############################################################################ - # Build and fit a classifier with exclude components - # ================================================== - api = TabularClassificationTask( - search_space_updates=get_search_space_updates(), - exclude_components={'network_backbone': ['MLPBackbone'], - 'encoder': ['OneHotEncoder']} - ) - - ############################################################################ - # Search for an ensemble of machine learning algorithms - # ===================================================== - api.search( - X_train=X_train, - y_train=y_train, - X_test=X_test.copy(), - y_test=y_test.copy(), - optimize_metric='accuracy', - total_walltime_limit=150, - func_eval_time_limit_secs=30 - ) - - ############################################################################ - # Print the final ensemble performance - # ==================================== - y_pred = api.predict(X_test) - score = api.score(y_pred, y_test) - print(score) - print(api.show_models()) - - # Print statistics from search - print(api.sprint_statistics()) +############################################################################ +# Data Loading +# ============ +X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) +X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X, + y, + random_state=1, +) + +############################################################################ +# Build and fit a classifier with include components +# ================================================== +api = TabularClassificationTask( + search_space_updates=get_search_space_updates(), + include_components={'network_backbone': ['MLPBackbone', 'ResNetBackbone'], + 'encoder': ['OneHotEncoder']} +) + +############################################################################ +# Search for an ensemble of machine learning algorithms +# ===================================================== +api.search( + X_train=X_train.copy(), + y_train=y_train.copy(), + X_test=X_test.copy(), + y_test=y_test.copy(), + optimize_metric='accuracy', + total_walltime_limit=150, + func_eval_time_limit_secs=30 +) + +############################################################################ +# Print the final ensemble performance +# ==================================== +y_pred = api.predict(X_test) +score = api.score(y_pred, y_test) +print(score) +print(api.show_models()) + +# Print statistics from search +print(api.sprint_statistics()) + +############################################################################ +# Build and fit a classifier with exclude components +# ================================================== +api = TabularClassificationTask( + search_space_updates=get_search_space_updates(), + exclude_components={'network_backbone': ['MLPBackbone'], + 'encoder': ['OneHotEncoder']} +) + +############################################################################ +# Search for an ensemble of machine learning algorithms +# ===================================================== +api.search( + X_train=X_train, + y_train=y_train, + X_test=X_test.copy(), + y_test=y_test.copy(), + optimize_metric='accuracy', + total_walltime_limit=150, + func_eval_time_limit_secs=30 +) + +############################################################################ +# Print the final ensemble performance +# ==================================== +y_pred = api.predict(X_test) +score = api.score(y_pred, y_test) +print(score) +print(api.show_models()) + +# Print statistics from search +print(api.sprint_statistics()) diff --git a/examples/40_advanced/example_parallel_n_jobs.py b/examples/40_advanced/example_parallel_n_jobs.py index 698f3ad61..d345c6fca 100644 --- a/examples/40_advanced/example_parallel_n_jobs.py +++ b/examples/40_advanced/example_parallel_n_jobs.py @@ -1,10 +1,11 @@ """ -====================== -Tabular Classification -====================== +============================================ +Tabular Classification with n parallel jobs +============================================ The following example shows how to fit a sample classification model parallely on 2 cores with AutoPyTorch + """ import os import tempfile as tmp @@ -60,9 +61,9 @@ ############################################################################ # Print the final ensemble performance # ==================================== - print(api.run_history, api.trajectory) y_pred = api.predict(X_test) score = api.score(y_pred, y_test) print(score) # Print the final ensemble built by AutoPyTorch - print(api.show_models()) + print(api.sprint_statistics()) + diff --git a/examples/40_advanced/example_resampling_strategy.py b/examples/40_advanced/example_resampling_strategy.py index 6735fffee..d02859f1b 100644 --- a/examples/40_advanced/example_resampling_strategy.py +++ b/examples/40_advanced/example_resampling_strategy.py @@ -26,10 +26,13 @@ from autoPyTorch.api.tabular_classification import TabularClassificationTask from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes +############################################################################ +# Default Resampling Strategy +# ============================ ############################################################################ # Data Loading -# ============ +# ------------ X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( X, @@ -39,7 +42,7 @@ ############################################################################ # Build and fit a classifier with default resampling strategy -# =========================================================== +# ----------------------------------------------------------- api = TabularClassificationTask( # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33 # is the default argument setting for TabularClassificationTask. @@ -51,7 +54,7 @@ ############################################################################ # Search for an ensemble of machine learning algorithms -# ===================================================== +# ----------------------------------------------------- api.search( X_train=X_train, y_train=y_train, @@ -64,7 +67,7 @@ ############################################################################ # Print the final ensemble performance -# ==================================== +# ------------------------------------ y_pred = api.predict(X_test) score = api.score(y_pred, y_test) print(score) @@ -76,9 +79,13 @@ ############################################################################ +############################################################################ +# Cross validation Resampling Strategy +# ===================================== + ############################################################################ # Build and fit a classifier with Cross validation resampling strategy -# ==================================================================== +# -------------------------------------------------------------------- api = TabularClassificationTask( resampling_strategy=CrossValTypes.k_fold_cross_validation, resampling_strategy_args={'num_splits': 3} @@ -86,7 +93,8 @@ ############################################################################ # Search for an ensemble of machine learning algorithms -# ===================================================== +# ----------------------------------------------------------------------- + api.search( X_train=X_train, y_train=y_train, @@ -99,7 +107,7 @@ ############################################################################ # Print the final ensemble performance -# ==================================== +# ------------ y_pred = api.predict(X_test) score = api.score(y_pred, y_test) print(score) @@ -111,9 +119,13 @@ ############################################################################ +############################################################################ +# Stratified Resampling Strategy +# =============================== + ############################################################################ # Build and fit a classifier with Stratified resampling strategy -# ============================================================== +# -------------------------------------------------------------- api = TabularClassificationTask( # For demonstration purposes, we use # Stratified hold out validation. However, @@ -124,7 +136,7 @@ ############################################################################ # Search for an ensemble of machine learning algorithms -# ===================================================== +# ----------------------------------------------------- api.search( X_train=X_train, y_train=y_train, diff --git a/examples/40_advanced/example_run_with_portfolio.py b/examples/40_advanced/example_run_with_portfolio.py index 01d8bef15..fef230fc5 100644 --- a/examples/40_advanced/example_run_with_portfolio.py +++ b/examples/40_advanced/example_run_with_portfolio.py @@ -24,50 +24,48 @@ from autoPyTorch.api.tabular_classification import TabularClassificationTask -if __name__ == '__main__': +############################################################################ +# Data Loading +# ============ +X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) +X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X, + y, + random_state=42, +) - ############################################################################ - # Data Loading - # ============ - X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True) - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, - y, - random_state=42, - ) +############################################################################ +# Build and fit a classifier +# ========================== +api = TabularClassificationTask( + seed=42, +) - ############################################################################ - # Build and fit a classifier - # ========================== - api = TabularClassificationTask( - seed=42, - ) +############################################################################ +# Search for an ensemble of machine learning algorithms +# ===================================================== +api.search( + X_train=X_train, + y_train=y_train, + X_test=X_test.copy(), + y_test=y_test.copy(), + optimize_metric='accuracy', + total_walltime_limit=300, + func_eval_time_limit_secs=50, + # Setting this option to "greedy" + # will make smac run the configurations + # present in 'autoPyTorch/configs/greedy_portfolio.json' + portfolio_selection="greedy" +) - ############################################################################ - # Search for an ensemble of machine learning algorithms - # ===================================================== - api.search( - X_train=X_train, - y_train=y_train, - X_test=X_test.copy(), - y_test=y_test.copy(), - optimize_metric='accuracy', - total_walltime_limit=300, - func_eval_time_limit_secs=50, - # Setting this option to "greedy" - # will make smac run the configurations - # present in 'autoPyTorch/configs/greedy_portfolio.json' - portfolio_selection="greedy" - ) +############################################################################ +# Print the final ensemble performance +# ==================================== +y_pred = api.predict(X_test) +score = api.score(y_pred, y_test) +print(score) +# Print the final ensemble built by AutoPyTorch +print(api.show_models()) - ############################################################################ - # Print the final ensemble performance - # ==================================== - y_pred = api.predict(X_test) - score = api.score(y_pred, y_test) - print(score) - # Print the final ensemble built by AutoPyTorch - print(api.show_models()) - - # Print statistics from search - print(api.sprint_statistics()) +# Print statistics from search +print(api.sprint_statistics())