Skip to content

Commit e141e4c

Browse files
author
Github Actions
committed
Ravin Kohli: [RELEASE] Release v0.2 (#448)
1 parent 2faea3f commit e141e4c

File tree

106 files changed

+8938
-32219
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+8938
-32219
lines changed

master/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 970eb458f32b434db48854a3317b2f05
3+
config: 29bd076c4adc563a4d280f931be44bec
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""
2+
==============================
3+
Plot the Performance over Time
4+
==============================
5+
6+
Auto-Pytorch uses SMAC to fit individual machine learning algorithms
7+
and then ensembles them together using `Ensemble Selection
8+
<https://www.cs.cornell.edu/~caruana/ctp/ct.papers/caruana.icml04.icdm06long.pdf>`_.
9+
10+
The following examples shows how to plot both the performance
11+
of the individual models and their respective ensemble.
12+
13+
Additionally, as we are compatible with matplotlib,
14+
you can input any args or kwargs that are compatible with ax.plot.
15+
In the case when you would like to create multipanel visualization,
16+
please input plt.Axes obtained from matplotlib.pyplot.subplots.
17+
18+
"""
19+
import warnings
20+
21+
import numpy as np
22+
import pandas as pd
23+
24+
from sklearn import model_selection
25+
26+
import matplotlib.pyplot as plt
27+
28+
from autoPyTorch.api.tabular_classification import TabularClassificationTask
29+
from autoPyTorch.utils.results_visualizer import PlotSettingParams
30+
31+
32+
warnings.simplefilter(action='ignore', category=UserWarning)
33+
warnings.simplefilter(action='ignore', category=FutureWarning)
34+
35+
36+
############################################################################
37+
# Task Definition
38+
# ===============
39+
n_samples, dim = 100, 2
40+
X = np.random.random((n_samples, dim)) * 2 - 1
41+
y = ((X ** 2).sum(axis=-1) < 2 / np.pi).astype(np.int32)
42+
print(y)
43+
44+
X, y = pd.DataFrame(X), pd.DataFrame(y)
45+
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y)
46+
47+
############################################################################
48+
# API Instantiation and Searching
49+
# ===============================
50+
api = TabularClassificationTask(seed=42)
51+
52+
api.search(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test,
53+
optimize_metric='accuracy', total_walltime_limit=120, func_eval_time_limit_secs=10)
54+
55+
############################################################################
56+
# Create Setting Parameters Object
57+
# ================================
58+
metric_name = 'accuracy'
59+
60+
params = PlotSettingParams(
61+
xscale='log',
62+
xlabel='Runtime',
63+
ylabel='Accuracy',
64+
title='Toy Example',
65+
figname='example_plot_over_time.png',
66+
savefig_kwargs={'bbox_inches': 'tight'},
67+
show=False # If you would like to show, make it True and set figname=None
68+
)
69+
70+
############################################################################
71+
# Plot with the Specified Setting Parameters
72+
# ==========================================
73+
# _, ax = plt.subplots() <=== You can feed it to post-process the figure.
74+
75+
# You might need to run `export DISPLAY=:0.0` if you are using non-GUI based environment.
76+
api.plot_perf_over_time(
77+
metric_name=metric_name,
78+
plot_setting_params=params,
79+
marker='*',
80+
markersize=10
81+
)

master/_downloads/307f532dbef0476f85afc6b64b65f087/example_resampling_strategy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@
9393

9494
############################################################################
9595
# Search for an ensemble of machine learning algorithms
96-
# -----------------------------------------------------------------------
96+
# -----------------------------------------------------
9797

9898
api.search(
9999
X_train=X_train,
@@ -107,7 +107,7 @@
107107

108108
############################################################################
109109
# Print the final ensemble performance
110-
# ------------
110+
# ------------------------------------
111111
y_pred = api.predict(X_test)
112112
score = api.score(y_pred, y_test)
113113
print(score)

master/_downloads/38ebc52de63d1626596d1647c695c721/example_tabular_regression.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@
118118
"name": "python",
119119
"nbconvert_exporter": "python",
120120
"pygments_lexer": "ipython3",
121-
"version": "3.8.12"
121+
"version": "3.8.13"
122122
}
123123
},
124124
"nbformat": 4,

master/_downloads/3b0b756ccfcac69e6a1673e56f2f543f/example_visualization.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
},
9999
"outputs": [],
100100
"source": [
101-
"# We will plot the search incumbent through time.\n\n# Collect the performance of individual machine learning algorithms\n# found by SMAC\nindividual_performances = []\nfor run_key, run_value in estimator.run_history.data.items():\n if run_value.status != StatusType.SUCCESS:\n # Ignore crashed runs\n continue\n individual_performances.append({\n 'Timestamp': pd.Timestamp(\n time.strftime(\n '%Y-%m-%d %H:%M:%S',\n time.localtime(run_value.endtime)\n )\n ),\n 'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,\n 'single_best_test_accuracy': np.nan if run_value.additional_info is None else\n accuracy._optimum - run_value.additional_info['test_loss']['accuracy'],\n })\nindividual_performance_frame = pd.DataFrame(individual_performances)\n\n# Collect the performance of the ensemble through time\n# This ensemble is built from the machine learning algorithms\n# found by SMAC\nensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)\n\n# As we are tracking the incumbent, we are interested in the cummax() performance\nensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[\n 'train_accuracy'\n].cummax()\nensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[\n 'test_accuracy'\n].cummax()\nensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)\nindividual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[\n 'single_best_optimization_accuracy'\n].cummax()\nindividual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[\n 'single_best_test_accuracy'\n].cummax()\n\npd.merge(\n ensemble_performance_frame,\n individual_performance_frame,\n on=\"Timestamp\", how='outer'\n).sort_values('Timestamp').fillna(method='ffill').plot(\n x='Timestamp',\n kind='line',\n legend=True,\n title='Auto-PyTorch accuracy over time',\n grid=True,\n)\nplt.show()\n\n# We then can understand the importance of each input feature using\n# a permutation importance analysis. This is done as a proof of concept, to\n# showcase that we can leverage of scikit-learn API.\nresult = permutation_importance(estimator, X_train, y_train, n_repeats=5,\n scoring='accuracy',\n random_state=seed)\nsorted_idx = result.importances_mean.argsort()\n\nfig, ax = plt.subplots()\nax.boxplot(result.importances[sorted_idx].T,\n vert=False, labels=X_test.columns[sorted_idx])\nax.set_title(\"Permutation Importances (Train set)\")\nfig.tight_layout()\nplt.show()"
101+
"# We will plot the search incumbent through time.\n\n# Collect the performance of individual machine learning algorithms\n# found by SMAC\nindividual_performances = []\nfor run_key, run_value in estimator.run_history.data.items():\n if run_value.status != StatusType.SUCCESS:\n # Ignore crashed runs\n continue\n individual_performances.append({\n 'Timestamp': pd.Timestamp(\n time.strftime(\n '%Y-%m-%d %H:%M:%S',\n time.localtime(run_value.endtime)\n )\n ),\n 'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,\n 'single_best_test_accuracy': np.nan if run_value.additional_info is None else\n accuracy._optimum - run_value.additional_info['test_loss']['accuracy'],\n })\nindividual_performance_frame = pd.DataFrame(individual_performances)\n\n# Collect the performance of the ensemble through time\n# This ensemble is built from the machine learning algorithms\n# found by SMAC\nensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)\n\n# As we are tracking the incumbent, we are interested in the cummax() performance\nensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[\n 'train_accuracy'\n].cummax()\nensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[\n 'test_accuracy'\n].cummax()\nensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)\nindividual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[\n 'single_best_optimization_accuracy'\n].cummax()\nindividual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[\n 'single_best_test_accuracy'\n].cummax()\n\npd.merge(\n ensemble_performance_frame,\n individual_performance_frame,\n on=\"Timestamp\", how='outer'\n).sort_values('Timestamp').fillna(method='ffill').plot(\n x='Timestamp',\n kind='line',\n legend=True,\n title='Auto-PyTorch accuracy over time',\n grid=True,\n)\nplt.show()"
102102
]
103103
}
104104
],
@@ -118,7 +118,7 @@
118118
"name": "python",
119119
"nbconvert_exporter": "python",
120120
"pygments_lexer": "ipython3",
121-
"version": "3.8.12"
121+
"version": "3.8.13"
122122
}
123123
},
124124
"nbformat": 4,

master/_downloads/3f9c66ebcc4532fdade3cdaa4d769bde/example_custom_configuration_space.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@
172172
"name": "python",
173173
"nbconvert_exporter": "python",
174174
"pygments_lexer": "ipython3",
175-
"version": "3.8.12"
175+
"version": "3.8.13"
176176
}
177177
},
178178
"nbformat": 4,

master/_downloads/4cbefcc88d68bf84110d315dc5fdb8e1/example_resampling_strategy.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@
247247
"name": "python",
248248
"nbconvert_exporter": "python",
249249
"pygments_lexer": "ipython3",
250-
"version": "3.8.12"
250+
"version": "3.8.13"
251251
}
252252
},
253253
"nbformat": 4,

master/_downloads/6ee656697d20c490e1d49bdbfb69d108/example_tabular_classification.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@
118118
"name": "python",
119119
"nbconvert_exporter": "python",
120120
"pygments_lexer": "ipython3",
121-
"version": "3.8.12"
121+
"version": "3.8.13"
122122
}
123123
},
124124
"nbformat": 4,
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Fit a single configuration\n*Auto-PyTorch* searches for the best combination of machine learning algorithms\nand their hyper-parameter configuration for a given task.\nThis example shows how one can fit one of these pipelines, both, with a user defined\nconfiguration, and a randomly sampled one form the configuration space.\nThe pipelines that Auto-PyTorch fits are compatible with Scikit-Learn API. You can\nget further documentation about Scikit-Learn models here: <https://scikit-learn.org/stable/getting_started.html`>_\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.metrics\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import HoldoutValTypes"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"## Data Loading\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"X, y = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True)\nX_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X, y, test_size=0.5, random_state=3\n)"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"## Define an estimator\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"estimator = TabularClassificationTask(\n resampling_strategy=HoldoutValTypes.holdout_validation,\n resampling_strategy_args={'val_share': 0.5},\n)"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"## Get a configuration of the pipeline for current dataset\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"dataset = estimator.get_dataset(X_train=X_train,\n y_train=y_train,\n X_test=X_test,\n y_test=y_test,\n dataset_name='kr-vs-kp')\nconfiguration = estimator.get_search_space(dataset).get_default_configuration()\n\nprint(\"Passed Configuration:\", configuration)"
84+
]
85+
},
86+
{
87+
"cell_type": "markdown",
88+
"metadata": {},
89+
"source": [
90+
"## Fit the configuration\n\n"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {
97+
"collapsed": false
98+
},
99+
"outputs": [],
100+
"source": [
101+
"pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,\n configuration=configuration,\n budget_type='epochs',\n budget=5,\n run_time_limit_secs=75\n )\n\n# The fit_pipeline command also returns a named tuple with the pipeline constraints\nprint(run_info)\n\n# The fit_pipeline command also returns a named tuple with train/test performance\nprint(run_value)\n\n# This object complies with Scikit-Learn Pipeline API.\n# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html\nprint(pipeline.named_steps)"
102+
]
103+
}
104+
],
105+
"metadata": {
106+
"kernelspec": {
107+
"display_name": "Python 3",
108+
"language": "python",
109+
"name": "python3"
110+
},
111+
"language_info": {
112+
"codemirror_mode": {
113+
"name": "ipython",
114+
"version": 3
115+
},
116+
"file_extension": ".py",
117+
"mimetype": "text/x-python",
118+
"name": "python",
119+
"nbconvert_exporter": "python",
120+
"pygments_lexer": "ipython3",
121+
"version": "3.8.13"
122+
}
123+
},
124+
"nbformat": 4,
125+
"nbformat_minor": 0
126+
}

0 commit comments

Comments
 (0)