Skip to content

Commit ba3571a

Browse files
author
Github Actions
committed
Ravin Kohli: [feature] Greedy Portfolio (#200)
1 parent 9048912 commit ba3571a

32 files changed

+1060
-137
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
============================================
3+
Tabular Classification with Greedy Portfolio
4+
============================================
5+
6+
The following example shows how to fit a sample classification model
7+
with AutoPyTorch using the greedy portfolio
8+
"""
9+
import os
10+
import tempfile as tmp
11+
import warnings
12+
13+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
14+
os.environ['OMP_NUM_THREADS'] = '1'
15+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
16+
os.environ['MKL_NUM_THREADS'] = '1'
17+
18+
warnings.simplefilter(action='ignore', category=UserWarning)
19+
warnings.simplefilter(action='ignore', category=FutureWarning)
20+
21+
import sklearn.datasets
22+
import sklearn.model_selection
23+
24+
from autoPyTorch.api.tabular_classification import TabularClassificationTask
25+
26+
27+
if __name__ == '__main__':
28+
29+
############################################################################
30+
# Data Loading
31+
# ============
32+
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
33+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
34+
X,
35+
y,
36+
random_state=42,
37+
)
38+
39+
############################################################################
40+
# Build and fit a classifier
41+
# ==========================
42+
api = TabularClassificationTask(
43+
seed=42,
44+
)
45+
46+
############################################################################
47+
# Search for an ensemble of machine learning algorithms
48+
# =====================================================
49+
api.search(
50+
X_train=X_train,
51+
y_train=y_train,
52+
X_test=X_test.copy(),
53+
y_test=y_test.copy(),
54+
optimize_metric='accuracy',
55+
total_walltime_limit=300,
56+
func_eval_time_limit_secs=50,
57+
# Setting this option to "greedy"
58+
# will make smac run the configurations
59+
# present in 'autoPyTorch/configs/greedy_portfolio.json'
60+
portfolio_selection="greedy"
61+
)
62+
63+
############################################################################
64+
# Print the final ensemble performance
65+
# ====================================
66+
print(api.run_history, api.trajectory)
67+
y_pred = api.predict(X_test)
68+
score = api.score(y_pred, y_test)
69+
print(score)
70+
# Print the final ensemble built by AutoPyTorch
71+
print(api.show_models())
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Tabular Classification with Greedy Portfolio\n\nThe following example shows how to fit a sample classification model\nwith AutoPyTorch using the greedy portfolio\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\n\n\nif __name__ == '__main__':\n\n ############################################################################\n # Data Loading\n # ============\n X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=42,\n )\n\n ############################################################################\n # Build and fit a classifier\n # ==========================\n api = TabularClassificationTask(\n seed=42,\n )\n\n ############################################################################\n # Search for an ensemble of machine learning algorithms\n # =====================================================\n api.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='accuracy',\n total_walltime_limit=300,\n func_eval_time_limit_secs=50,\n # Setting this option to \"greedy\"\n # will make smac run the configurations\n # present in 'autoPyTorch/configs/greedy_portfolio.json'\n portfolio_selection=\"greedy\"\n )\n\n ############################################################################\n # Print the final ensemble performance\n # ====================================\n print(api.run_history, api.trajectory)\n y_pred = api.predict(X_test)\n score = api.score(y_pred, y_test)\n print(score)\n # Print the final ensemble built by AutoPyTorch\n print(api.show_models())"
30+
]
31+
}
32+
],
33+
"metadata": {
34+
"kernelspec": {
35+
"display_name": "Python 3",
36+
"language": "python",
37+
"name": "python3"
38+
},
39+
"language_info": {
40+
"codemirror_mode": {
41+
"name": "ipython",
42+
"version": 3
43+
},
44+
"file_extension": ".py",
45+
"mimetype": "text/x-python",
46+
"name": "python",
47+
"nbconvert_exporter": "python",
48+
"pygments_lexer": "ipython3",
49+
"version": "3.8.10"
50+
}
51+
},
52+
"nbformat": 4,
53+
"nbformat_minor": 0
54+
}
Binary file not shown.
Binary file not shown.
Loading
Loading
Loading

development/_modules/autoPyTorch/api/tabular_classification.html

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,9 @@ <h1>Source code for autoPyTorch.api.tabular_classification</h1><div class="highl
169169
<span class="sd"> If None, all possible components are used. Otherwise</span>
170170
<span class="sd"> specifies set of components not to use. Incompatible</span>
171171
<span class="sd"> with include components</span>
172+
<span class="sd"> search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):</span>
173+
<span class="sd"> search space updates that can be used to modify the search</span>
174+
<span class="sd"> space of particular components or choice modules of the pipeline</span>
172175
<span class="sd"> &quot;&quot;&quot;</span>
173176
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
174177
<span class="bp">self</span><span class="p">,</span>
@@ -232,6 +235,7 @@ <h1>Source code for autoPyTorch.api.tabular_classification</h1><div class="highl
232235
<span class="n">precision</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">32</span><span class="p">,</span>
233236
<span class="n">disable_file_output</span><span class="p">:</span> <span class="n">List</span> <span class="o">=</span> <span class="p">[],</span>
234237
<span class="n">load_models</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
238+
<span class="n">portfolio_selection</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
235239
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s1">&#39;BaseTask&#39;</span><span class="p">:</span>
236240
<span class="sd">&quot;&quot;&quot;</span>
237241
<span class="sd"> Search for the best pipeline configuration for the given dataset.</span>
@@ -244,21 +248,21 @@ <h1>Source code for autoPyTorch.api.tabular_classification</h1><div class="highl
244248
<span class="sd"> A pair of features (X_train) and targets (y_train) used to fit a</span>
245249
<span class="sd"> pipeline. Additionally, a holdout of this pairs (X_test, y_test) can</span>
246250
<span class="sd"> be provided to track the generalization performance of each stage.</span>
247-
<span class="sd"> optimize_metric (str): name of the metric that is used to</span>
248-
<span class="sd"> evaluate a pipeline.</span>
251+
<span class="sd"> optimize_metric (str):</span>
252+
<span class="sd"> name of the metric that is used to evaluate a pipeline.</span>
249253
<span class="sd"> budget_type (Optional[str]):</span>
250254
<span class="sd"> Type of budget to be used when fitting the pipeline.</span>
251255
<span class="sd"> Either &#39;epochs&#39; or &#39;runtime&#39;. If not provided, uses</span>
252256
<span class="sd"> the default in the pipeline config (&#39;epochs&#39;)</span>
253257
<span class="sd"> budget (Optional[float]):</span>
254258
<span class="sd"> Budget to fit a single run of the pipeline. If not</span>
255259
<span class="sd"> provided, uses the default in the pipeline config</span>
256-
<span class="sd"> total_walltime_limit (int), (default=100): Time limit</span>
257-
<span class="sd"> in seconds for the search of appropriate models.</span>
260+
<span class="sd"> total_walltime_limit (int), (default=100):</span>
261+
<span class="sd"> Time limit in seconds for the search of appropriate models.</span>
258262
<span class="sd"> By increasing this value, autopytorch has a higher</span>
259263
<span class="sd"> chance of finding better models.</span>
260-
<span class="sd"> func_eval_time_limit_secs (int), (default=None): Time limit</span>
261-
<span class="sd"> for a single call to the machine learning model.</span>
264+
<span class="sd"> func_eval_time_limit_secs (int), (default=None):</span>
265+
<span class="sd"> Time limit for a single call to the machine learning model.</span>
262266
<span class="sd"> Model fitting will be terminated if the machine</span>
263267
<span class="sd"> learning algorithm runs over the time limit. Set</span>
264268
<span class="sd"> this value high enough so that typical machine</span>
@@ -275,32 +279,40 @@ <h1>Source code for autoPyTorch.api.tabular_classification</h1><div class="highl
275279
<span class="sd"> feature by turning this flag to False. All machine learning</span>
276280
<span class="sd"> algorithms that are fitted during search() are considered for</span>
277281
<span class="sd"> ensemble building.</span>
278-
<span class="sd"> memory_limit (Optional[int]), (default=4096): Memory</span>
279-
<span class="sd"> limit in MB for the machine learning algorithm. autopytorch</span>
282+
<span class="sd"> memory_limit (Optional[int]), (default=4096):</span>
283+
<span class="sd"> Memory limit in MB for the machine learning algorithm. autopytorch</span>
280284
<span class="sd"> will stop fitting the machine learning algorithm if it tries</span>
281285
<span class="sd"> to allocate more than memory_limit MB. If None is provided,</span>
282286
<span class="sd"> no memory limit is set. In case of multi-processing, memory_limit</span>
283287
<span class="sd"> will be per job. This memory limit also applies to the ensemble</span>
284288
<span class="sd"> creation process.</span>
285-
<span class="sd"> smac_scenario_args (Optional[Dict]): Additional arguments inserted</span>
286-
<span class="sd"> into the scenario of SMAC. See the</span>
289+
<span class="sd"> smac_scenario_args (Optional[Dict]):</span>
290+
<span class="sd"> Additional arguments inserted into the scenario of SMAC. See the</span>
287291
<span class="sd"> [SMAC documentation] (https://automl.github.io/SMAC3/master/options.html?highlight=scenario#scenario)</span>
288-
<span class="sd"> get_smac_object_callback (Optional[Callable]): Callback function</span>
289-
<span class="sd"> to create an object of class</span>
292+
<span class="sd"> get_smac_object_callback (Optional[Callable]):</span>
293+
<span class="sd"> Callback function to create an object of class</span>
290294
<span class="sd"> [smac.optimizer.smbo.SMBO](https://automl.github.io/SMAC3/master/apidoc/smac.optimizer.smbo.html).</span>
291295
<span class="sd"> The function must accept the arguments scenario_dict,</span>
292296
<span class="sd"> instances, num_params, runhistory, seed and ta. This is</span>
293297
<span class="sd"> an advanced feature. Use only if you are familiar with</span>
294298
<span class="sd"> [SMAC](https://automl.github.io/SMAC3/master/index.html).</span>
295-
<span class="sd"> all_supported_metrics (bool), (default=True): if True, all</span>
296-
<span class="sd"> metrics supporting current task will be calculated</span>
299+
<span class="sd"> all_supported_metrics (bool), (default=True):</span>
300+
<span class="sd"> if True, all metrics supporting current task will be calculated</span>
297301
<span class="sd"> for each pipeline and results will be available via cv_results</span>
298302
<span class="sd"> precision (int), (default=32): Numeric precision used when loading</span>
299303
<span class="sd"> ensemble data. Can be either &#39;16&#39;, &#39;32&#39; or &#39;64&#39;.</span>
300304
<span class="sd"> disable_file_output (Union[bool, List]):</span>
301-
<span class="sd"> load_models (bool), (default=True): Whether to load the</span>
302-
<span class="sd"> models after fitting AutoPyTorch.</span>
303-
305+
<span class="sd"> load_models (bool), (default=True):</span>
306+
<span class="sd"> Whether to load the models after fitting AutoPyTorch.</span>
307+
<span class="sd"> portfolio_selection (str), (default=None):</span>
308+
<span class="sd"> This argument controls the initial configurations that</span>
309+
<span class="sd"> AutoPyTorch uses to warm start SMAC for hyperparameter</span>
310+
<span class="sd"> optimization. By default, no warm-starting happens.</span>
311+
<span class="sd"> The user can provide a path to a json file containing</span>
312+
<span class="sd"> configurations, similar to (...herepathtogreedy...).</span>
313+
<span class="sd"> Additionally, the keyword &#39;greedy&#39; is supported,</span>
314+
<span class="sd"> which would use the default portfolio from</span>
315+
<span class="sd"> `AutoPyTorch Tabular &lt;https://arxiv.org/abs/2006.13799&gt;`</span>
304316
<span class="sd"> Returns:</span>
305317
<span class="sd"> self</span>
306318

@@ -346,6 +358,7 @@ <h1>Source code for autoPyTorch.api.tabular_classification</h1><div class="highl
346358
<span class="n">precision</span><span class="o">=</span><span class="n">precision</span><span class="p">,</span>
347359
<span class="n">disable_file_output</span><span class="o">=</span><span class="n">disable_file_output</span><span class="p">,</span>
348360
<span class="n">load_models</span><span class="o">=</span><span class="n">load_models</span><span class="p">,</span>
361+
<span class="n">portfolio_selection</span><span class="o">=</span><span class="n">portfolio_selection</span><span class="p">,</span>
349362
<span class="p">)</span></div>
350363

351364
<div class="viewcode-block" id="TabularClassificationTask.predict"><a class="viewcode-back" href="../../../api.html#autoPyTorch.api.tabular_classification.TabularClassificationTask.predict">[docs]</a> <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span>

development/_sources/examples/20_basics/example_image_classification.rst.txt

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,20 +89,18 @@ Image Classification
8989
Pipeline Random Config:
9090
________________________________________
9191
Configuration:
92-
image_augmenter:GaussianBlur:sigma_min, Value: 2.366941395804103
93-
image_augmenter:GaussianBlur:sigma_offset, Value: 2.368203230872295
94-
image_augmenter:GaussianBlur:use_augmenter, Value: True
95-
image_augmenter:GaussianNoise:sigma_offset, Value: 0.12078542050406282
92+
image_augmenter:GaussianBlur:use_augmenter, Value: False
93+
image_augmenter:GaussianNoise:sigma_offset, Value: 1.2338395220421345
9694
image_augmenter:GaussianNoise:use_augmenter, Value: True
97-
image_augmenter:RandomAffine:rotate, Value: 269
98-
image_augmenter:RandomAffine:scale_offset, Value: 0.05588649414637841
99-
image_augmenter:RandomAffine:shear, Value: 28
100-
image_augmenter:RandomAffine:translate_percent_offset, Value: 0.35268602065512145
95+
image_augmenter:RandomAffine:rotate, Value: 313
96+
image_augmenter:RandomAffine:scale_offset, Value: 0.21852230155367117
97+
image_augmenter:RandomAffine:shear, Value: 15
98+
image_augmenter:RandomAffine:translate_percent_offset, Value: 0.03252023210559827
10199
image_augmenter:RandomAffine:use_augmenter, Value: True
102-
image_augmenter:RandomCutout:p, Value: 0.6556358764831947
100+
image_augmenter:RandomCutout:p, Value: 0.530501187923635
103101
image_augmenter:RandomCutout:use_augmenter, Value: True
104-
image_augmenter:Resize:use_augmenter, Value: False
105-
image_augmenter:ZeroPadAndCrop:percent, Value: 0.4856157166545874
102+
image_augmenter:Resize:use_augmenter, Value: True
103+
image_augmenter:ZeroPadAndCrop:percent, Value: 0.049910811329537064
106104
normalizer:__choice__, Value: 'ImageNormalizer'
107105

108106
Fitting the pipeline...
@@ -182,7 +180,7 @@ Image Classification
182180
183181
.. rst-class:: sphx-glr-timing
184182

185-
**Total running time of the script:** ( 0 minutes 7.488 seconds)
183+
**Total running time of the script:** ( 0 minutes 7.665 seconds)
186184

187185

188186
.. _sphx_glr_download_examples_20_basics_example_image_classification.py:

0 commit comments

Comments
 (0)