Skip to content

Commit 1e08fc9

Browse files
[feature] Greedy Portfolio (#200)
* initial configurations added * In progress, adding flag in search function * Adds documentation, example and fixes setup.py * Address comments from shuhei, change run_greedy to portfolio_selection * address comments from fransisco, movie portfolio to configs * Address comments from fransisco, add tests for greedy portfolio and tests * fix flake tests * Simplify portfolio selection * Update autoPyTorch/optimizer/smbo.py Co-authored-by: Francisco Rivera Valverde <44504424+franchuterivera@users.noreply.github.com> * Address comments from fransisco, path exception handling and test * fix flake * Address comments from shuhei * fix bug in setup.py * fix tests in base trainer evaluate, increase n samples and add seed * fix tests in base trainer evaluate, increase n samples (fix) Co-authored-by: Francisco Rivera Valverde <44504424+franchuterivera@users.noreply.github.com>
1 parent 097cb99 commit 1e08fc9

16 files changed

+853
-35
lines changed

autoPyTorch/api/base_task.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ class BaseTask:
121121
exclude_components (Optional[Dict]): If None, all possible components are used.
122122
Otherwise specifies set of components not to use. Incompatible with include
123123
components
124+
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
125+
search space updates that can be used to modify the search
126+
space of particular components or choice modules of the pipeline
124127
"""
125128

126129
def __init__(
@@ -697,6 +700,7 @@ def _search(
697700
precision: int = 32,
698701
disable_file_output: List = [],
699702
load_models: bool = True,
703+
portfolio_selection: Optional[str] = None
700704
) -> 'BaseTask':
701705
"""
702706
Search for the best pipeline configuration for the given dataset.
@@ -767,7 +771,15 @@ def _search(
767771
disable_file_output (Union[bool, List]):
768772
load_models (bool), (default=True): Whether to load the
769773
models after fitting AutoPyTorch.
770-
774+
portfolio_selection (str), (default=None):
775+
This argument controls the initial configurations that
776+
AutoPyTorch uses to warm start SMAC for hyperparameter
777+
optimization. By default, no warm-starting happens.
778+
The user can provide a path to a json file containing
779+
configurations, similar to (...herepathtogreedy...).
780+
Additionally, the keyword 'greedy' is supported,
781+
which would use the default portfolio from
782+
`AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
771783
Returns:
772784
self
773785
@@ -955,7 +967,8 @@ def _search(
955967
# We do not increase the num_run here, this is something
956968
# smac does internally
957969
start_num_run=self._backend.get_next_num_run(peek=True),
958-
search_space_updates=self.search_space_updates
970+
search_space_updates=self.search_space_updates,
971+
portfolio_selection=portfolio_selection,
959972
)
960973
try:
961974
run_history, self.trajectory, budget_type = \

autoPyTorch/api/tabular_classification.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ class TabularClassificationTask(BaseTask):
5656
If None, all possible components are used. Otherwise
5757
specifies set of components not to use. Incompatible
5858
with include components
59+
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
60+
search space updates that can be used to modify the search
61+
space of particular components or choice modules of the pipeline
5962
"""
6063
def __init__(
6164
self,
@@ -119,6 +122,7 @@ def search(
119122
precision: int = 32,
120123
disable_file_output: List = [],
121124
load_models: bool = True,
125+
portfolio_selection: Optional[str] = None,
122126
) -> 'BaseTask':
123127
"""
124128
Search for the best pipeline configuration for the given dataset.
@@ -131,21 +135,21 @@ def search(
131135
A pair of features (X_train) and targets (y_train) used to fit a
132136
pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
133137
be provided to track the generalization performance of each stage.
134-
optimize_metric (str): name of the metric that is used to
135-
evaluate a pipeline.
138+
optimize_metric (str):
139+
name of the metric that is used to evaluate a pipeline.
136140
budget_type (Optional[str]):
137141
Type of budget to be used when fitting the pipeline.
138142
Either 'epochs' or 'runtime'. If not provided, uses
139143
the default in the pipeline config ('epochs')
140144
budget (Optional[float]):
141145
Budget to fit a single run of the pipeline. If not
142146
provided, uses the default in the pipeline config
143-
total_walltime_limit (int), (default=100): Time limit
144-
in seconds for the search of appropriate models.
147+
total_walltime_limit (int), (default=100):
148+
Time limit in seconds for the search of appropriate models.
145149
By increasing this value, autopytorch has a higher
146150
chance of finding better models.
147-
func_eval_time_limit_secs (int), (default=None): Time limit
148-
for a single call to the machine learning model.
151+
func_eval_time_limit_secs (int), (default=None):
152+
Time limit for a single call to the machine learning model.
149153
Model fitting will be terminated if the machine
150154
learning algorithm runs over the time limit. Set
151155
this value high enough so that typical machine
@@ -162,32 +166,40 @@ def search(
162166
feature by turning this flag to False. All machine learning
163167
algorithms that are fitted during search() are considered for
164168
ensemble building.
165-
memory_limit (Optional[int]), (default=4096): Memory
166-
limit in MB for the machine learning algorithm. autopytorch
169+
memory_limit (Optional[int]), (default=4096):
170+
Memory limit in MB for the machine learning algorithm. autopytorch
167171
will stop fitting the machine learning algorithm if it tries
168172
to allocate more than memory_limit MB. If None is provided,
169173
no memory limit is set. In case of multi-processing, memory_limit
170174
will be per job. This memory limit also applies to the ensemble
171175
creation process.
172-
smac_scenario_args (Optional[Dict]): Additional arguments inserted
173-
into the scenario of SMAC. See the
176+
smac_scenario_args (Optional[Dict]):
177+
Additional arguments inserted into the scenario of SMAC. See the
174178
[SMAC documentation] (https://automl.github.io/SMAC3/master/options.html?highlight=scenario#scenario)
175-
get_smac_object_callback (Optional[Callable]): Callback function
176-
to create an object of class
179+
get_smac_object_callback (Optional[Callable]):
180+
Callback function to create an object of class
177181
[smac.optimizer.smbo.SMBO](https://automl.github.io/SMAC3/master/apidoc/smac.optimizer.smbo.html).
178182
The function must accept the arguments scenario_dict,
179183
instances, num_params, runhistory, seed and ta. This is
180184
an advanced feature. Use only if you are familiar with
181185
[SMAC](https://automl.github.io/SMAC3/master/index.html).
182-
all_supported_metrics (bool), (default=True): if True, all
183-
metrics supporting current task will be calculated
186+
all_supported_metrics (bool), (default=True):
187+
if True, all metrics supporting current task will be calculated
184188
for each pipeline and results will be available via cv_results
185189
precision (int), (default=32): Numeric precision used when loading
186190
ensemble data. Can be either '16', '32' or '64'.
187191
disable_file_output (Union[bool, List]):
188-
load_models (bool), (default=True): Whether to load the
189-
models after fitting AutoPyTorch.
190-
192+
load_models (bool), (default=True):
193+
Whether to load the models after fitting AutoPyTorch.
194+
portfolio_selection (str), (default=None):
195+
This argument controls the initial configurations that
196+
AutoPyTorch uses to warm start SMAC for hyperparameter
197+
optimization. By default, no warm-starting happens.
198+
The user can provide a path to a json file containing
199+
configurations, similar to (...herepathtogreedy...).
200+
Additionally, the keyword 'greedy' is supported,
201+
which would use the default portfolio from
202+
`AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
191203
Returns:
192204
self
193205
@@ -233,6 +245,7 @@ def search(
233245
precision=precision,
234246
disable_file_output=disable_file_output,
235247
load_models=load_models,
248+
portfolio_selection=portfolio_selection,
236249
)
237250

238251
def predict(

autoPyTorch/api/tabular_regression.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ class TabularRegressionTask(BaseTask):
4747
exclude_components (Optional[Dict]): If None, all possible components are used.
4848
Otherwise specifies set of components not to use. Incompatible with include
4949
components
50+
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
51+
search space updates that can be used to modify the search
52+
space of particular components or choice modules of the pipeline
5053
"""
5154

5255
def __init__(
@@ -111,6 +114,7 @@ def search(
111114
precision: int = 32,
112115
disable_file_output: List = [],
113116
load_models: bool = True,
117+
portfolio_selection: Optional[str] = None,
114118
) -> 'BaseTask':
115119
"""
116120
Search for the best pipeline configuration for the given dataset.
@@ -175,6 +179,15 @@ def search(
175179
disable_file_output (Union[bool, List]):
176180
load_models (bool), (default=True): Whether to load the
177181
models after fitting AutoPyTorch.
182+
portfolio_selection (str), (default=None):
183+
This argument controls the initial configurations that
184+
AutoPyTorch uses to warm start SMAC for hyperparameter
185+
optimization. By default, no warm-starting happens.
186+
The user can provide a path to a json file containing
187+
configurations, similar to (...herepathtogreedy...).
188+
Additionally, the keyword 'greedy' is supported,
189+
which would use the default portfolio from
190+
`AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
178191
179192
Returns:
180193
self
@@ -221,6 +234,7 @@ def search(
221234
precision=precision,
222235
disable_file_output=disable_file_output,
223236
load_models=load_models,
237+
portfolio_selection=portfolio_selection,
224238
)
225239

226240
def predict(

0 commit comments

Comments
 (0)