Skip to content

Commit 9b350aa

Browse files
committed
[FIX] Tests after rebase of reg_cocktails (#359)
* update requirements * update requirements * resolve remaining conflicts and fix flake and mypy * Fix remaining tests and examples * fix failing checks * fix flake
1 parent 3e50b27 commit 9b350aa

38 files changed

+291
-1016
lines changed

autoPyTorch/api/base_task.py

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -941,18 +941,15 @@ def run_traditional_ml(
941941
learning algorithm runs over the time limit.
942942
"""
943943
assert self._logger is not None # for mypy compliancy
944-
if STRING_TO_TASK_TYPES[self.task_type] in REGRESSION_TASKS:
945-
self._logger.warning("Traditional Pipeline is not enabled for regression. Skipping...")
946-
else:
947-
traditional_task_name = 'runTraditional'
948-
self._stopwatch.start_task(traditional_task_name)
949-
elapsed_time = self._stopwatch.wall_elapsed(current_task_name)
950-
time_for_traditional = int(runtime_limit - elapsed_time)
951-
self._do_traditional_prediction(
952-
func_eval_time_limit_secs=func_eval_time_limit_secs,
953-
time_left=time_for_traditional,
954-
)
955-
self._stopwatch.stop_task(traditional_task_name)
944+
traditional_task_name = 'runTraditional'
945+
self._stopwatch.start_task(traditional_task_name)
946+
elapsed_time = self._stopwatch.wall_elapsed(current_task_name)
947+
time_for_traditional = int(runtime_limit - elapsed_time)
948+
self._do_traditional_prediction(
949+
func_eval_time_limit_secs=func_eval_time_limit_secs,
950+
time_left=time_for_traditional,
951+
)
952+
self._stopwatch.stop_task(traditional_task_name)
956953

957954
def _search(
958955
self,
@@ -1322,22 +1319,7 @@ def _search(
13221319
self._logger.info("Starting Shutdown")
13231320

13241321
if proc_ensemble is not None:
1325-
self._results_manager.ensemble_performance_history = list(proc_ensemble.history)
1326-
1327-
if len(proc_ensemble.futures) > 0:
1328-
# Also add ensemble runs that did not finish within smac time
1329-
# and add them into the ensemble history
1330-
self._logger.info("Ensemble script still running, waiting for it to finish.")
1331-
result = proc_ensemble.futures.pop().result()
1332-
if result:
1333-
ensemble_history, _, _, _ = result
1334-
self._results_manager.ensemble_performance_history.extend(ensemble_history)
1335-
self._logger.info("Ensemble script finished, continue shutdown.")
1336-
1337-
# save the ensemble performance history file
1338-
if len(self.ensemble_performance_history) > 0:
1339-
pd.DataFrame(self.ensemble_performance_history).to_json(
1340-
os.path.join(self._backend.internals_directory, 'ensemble_history.json'))
1322+
self._collect_results_ensemble(proc_ensemble)
13411323

13421324
if load_models:
13431325
self._logger.info("Loading models...")
@@ -1605,7 +1587,7 @@ def fit_pipeline(
16051587
exclude=self.exclude_components,
16061588
search_space_updates=self.search_space_updates)
16071589
dataset_properties = dataset.get_dataset_properties(dataset_requirements)
1608-
self._backend.replace_datamanager(dataset)
1590+
self._backend.save_datamanager(dataset)
16091591

16101592
if self._logger is None:
16111593
self._logger = self._get_logger(dataset.dataset_name)
@@ -1796,7 +1778,7 @@ def fit_ensemble(
17961778
ensemble_fit_task_name = 'EnsembleFit'
17971779
self._stopwatch.start_task(ensemble_fit_task_name)
17981780
if enable_traditional_pipeline:
1799-
if func_eval_time_limit_secs is None or func_eval_time_limit_secs > time_for_task:
1781+
if func_eval_time_limit_secs > time_for_task:
18001782
self._logger.warning(
18011783
'Time limit for a single run is higher than total time '
18021784
'limit. Capping the limit for a single run to the total '
@@ -1837,12 +1819,8 @@ def fit_ensemble(
18371819
)
18381820

18391821
manager.build_ensemble(self._dask_client)
1840-
future = manager.futures.pop()
1841-
result = future.result()
1842-
if result is None:
1843-
raise ValueError("Errors occurred while building the ensemble - please"
1844-
" check the log file and command line output for error messages.")
1845-
self.ensemble_performance_history, _, _, _ = result
1822+
if manager is not None:
1823+
self._collect_results_ensemble(manager)
18461824

18471825
if load_models:
18481826
self._load_models()
@@ -1920,6 +1898,31 @@ def _init_ensemble_builder(
19201898

19211899
return proc_ensemble
19221900

1901+
def _collect_results_ensemble(
1902+
self,
1903+
manager: EnsembleBuilderManager
1904+
) -> None:
1905+
1906+
if self._logger is None:
1907+
raise ValueError("logger should be initialized to fit ensemble")
1908+
1909+
self._results_manager.ensemble_performance_history = list(manager.history)
1910+
1911+
if len(manager.futures) > 0:
1912+
# Also add ensemble runs that did not finish within smac time
1913+
# and add them into the ensemble history
1914+
self._logger.info("Ensemble script still running, waiting for it to finish.")
1915+
result = manager.futures.pop().result()
1916+
if result:
1917+
ensemble_history, _, _, _ = result
1918+
self._results_manager.ensemble_performance_history.extend(ensemble_history)
1919+
self._logger.info("Ensemble script finished, continue shutdown.")
1920+
1921+
# save the ensemble performance history file
1922+
if len(self.ensemble_performance_history) > 0:
1923+
pd.DataFrame(self.ensemble_performance_history).to_json(
1924+
os.path.join(self._backend.internals_directory, 'ensemble_history.json'))
1925+
19231926
def predict(
19241927
self,
19251928
X_test: np.ndarray,

autoPyTorch/api/tabular_classification.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
1919
from autoPyTorch.datasets.resampling_strategy import (
2020
HoldoutValTypes,
21+
CrossValTypes,
2122
ResamplingStrategies,
2223
)
2324
from autoPyTorch.datasets.tabular_dataset import TabularDataset
@@ -437,6 +438,7 @@ def search(
437438

438439
if self.dataset is None:
439440
raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
441+
440442
return self._search(
441443
dataset=self.dataset,
442444
optimize_metric=optimize_metric,
@@ -476,23 +478,23 @@ def predict(
476478
raise ValueError("predict() is only supported after calling search. Kindly call first "
477479
"the estimator search() method.")
478480

479-
X_test = self.input_validator.feature_validator.transform(X_test)
481+
X_test = self.InputValidator.feature_validator.transform(X_test)
480482
predicted_probabilities = super().predict(X_test, batch_size=batch_size,
481483
n_jobs=n_jobs)
482484

483-
if self.input_validator.target_validator.is_single_column_target():
485+
if self.InputValidator.target_validator.is_single_column_target():
484486
predicted_indexes = np.argmax(predicted_probabilities, axis=1)
485487
else:
486488
predicted_indexes = (predicted_probabilities > 0.5).astype(int)
487489

488490
# Allow to predict in the original domain -- that is, the user is not interested
489491
# in our encoded values
490-
return self.input_validator.target_validator.inverse_transform(predicted_indexes)
492+
return self.InputValidator.target_validator.inverse_transform(predicted_indexes)
491493

492494
def predict_proba(self,
493495
X_test: Union[np.ndarray, pd.DataFrame, List],
494496
batch_size: Optional[int] = None, n_jobs: int = 1) -> np.ndarray:
495-
if self.input_validator is None or not self.input_validator._is_fitted:
497+
if self.InputValidator is None or not self.InputValidator._is_fitted:
496498
raise ValueError("predict() is only supported after calling search. Kindly call first "
497499
"the estimator search() method.")
498500
X_test = self.input_validator.feature_validator.transform(X_test)

autoPyTorch/api/tabular_regression.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
1919
from autoPyTorch.datasets.resampling_strategy import (
2020
HoldoutValTypes,
21+
CrossValTypes,
2122
ResamplingStrategies,
2223
)
2324
from autoPyTorch.datasets.tabular_dataset import TabularDataset
@@ -437,6 +438,7 @@ def search(
437438

438439
if self.dataset is None:
439440
raise ValueError("`dataset` in {} must be initialized, but got None".format(self.__class__.__name__))
441+
440442
return self._search(
441443
dataset=self.dataset,
442444
optimize_metric=optimize_metric,
@@ -462,14 +464,14 @@ def predict(
462464
batch_size: Optional[int] = None,
463465
n_jobs: int = 1
464466
) -> np.ndarray:
465-
if self.input_validator is None or not self.input_validator._is_fitted:
467+
if self.InputValidator is None or not self.InputValidator._is_fitted:
466468
raise ValueError("predict() is only supported after calling search. Kindly call first "
467469
"the estimator search() method.")
468470

469-
X_test = self.input_validator.feature_validator.transform(X_test)
471+
X_test = self.InputValidator.feature_validator.transform(X_test)
470472
predicted_values = super().predict(X_test, batch_size=batch_size,
471473
n_jobs=n_jobs)
472474

473475
# Allow to predict in the original domain -- that is, the user is not interested
474476
# in our encoded values
475-
return self.input_validator.target_validator.inverse_transform(predicted_values)
477+
return self.InputValidator.target_validator.inverse_transform(predicted_values)

autoPyTorch/data/base_target_validator.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def fit(
8686
np.shape(y_test)
8787
))
8888
if isinstance(y_train, pd.DataFrame):
89-
y_train = cast(pd.DataFrame, y_train)
9089
y_test = cast(pd.DataFrame, y_test)
9190
if y_train.columns.tolist() != y_test.columns.tolist():
9291
raise ValueError(

autoPyTorch/data/tabular_feature_validator.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from logging import Logger
33
from typing import Dict, List, Optional, Tuple, Union, cast
44

5+
56
import numpy as np
67

78
import pandas as pd
@@ -270,7 +271,7 @@ def transform(
270271
if isinstance(X, np.ndarray):
271272
X = self.numpy_to_pandas(X)
272273

273-
if hasattr(X, "iloc") and not issparse(X):
274+
if ispandas(X) and not issparse(X):
274275
X = cast(pd.DataFrame, X)
275276

276277
# Check the data here so we catch problems on new test data
@@ -400,9 +401,6 @@ def _get_columns_info(
400401
Type of each column numerical/categorical
401402
"""
402403

403-
if len(self.transformed_columns) > 0 and self.feat_type is not None:
404-
return self.transformed_columns, self.feat_type
405-
406404
# Register if a column needs encoding
407405
numerical_columns = []
408406
categorical_columns = []

autoPyTorch/data/tabular_target_validator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import List, Optional, Union, cast
1+
from typing import List, Optional, cast
22

33
import numpy as np
44

autoPyTorch/evaluation/fit_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010

1111
from smac.tae import StatusType
1212

13+
from autoPyTorch.automl_common.common.utils.backend import Backend
1314
from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes
1415
from autoPyTorch.evaluation.abstract_evaluator import (
1516
AbstractEvaluator,
1617
fit_and_suppress_warnings
1718
)
1819
from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
19-
from autoPyTorch.utils.backend import Backend
2020
from autoPyTorch.utils.common import subsampler
2121
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
2222

autoPyTorch/optimizer/smbo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def __init__(self,
104104
resampling_strategy_args: Optional[Dict[str, Any]] = None,
105105
include: Optional[Dict[str, Any]] = None,
106106
exclude: Optional[Dict[str, Any]] = None,
107-
disable_file_output: List = [],
107+
disable_file_output: Union[bool, List[str]] = False,
108108
smac_scenario_args: Optional[Dict[str, Any]] = None,
109109
get_smac_object_callback: Optional[Callable] = None,
110110
all_supported_metrics: bool = True,

autoPyTorch/pipeline/components/setup/network_backbone/utils.py

Lines changed: 1 addition & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,7 @@ class ShakeDropFunction(Function):
8383
Github URL: https://github.com/owruby/shake-drop_pytorch/blob/master/models/shakedrop.py
8484
"""
8585
@staticmethod
86-
<<<<<<< HEAD
8786
def forward(ctx: Any,
88-
=======
89-
def forward(ctx: typing.Any,
90-
>>>>>>> Bug fixes (#249)
9187
x: torch.Tensor,
9288
alpha: torch.Tensor,
9389
beta: torch.Tensor,
@@ -114,31 +110,20 @@ def backward(ctx: Any,
114110
shake_drop = ShakeDropFunction.apply
115111

116112

117-
<<<<<<< HEAD
118-
def shake_get_alpha_beta(is_training: bool, is_cuda: bool
119-
) -> Tuple[torch.Tensor, torch.Tensor]:
120-
"""
121-
The methods used in this function have been introduced in 'ShakeShake Regularisation'
122-
Currently, this function supports `shake-shake`.
123-
=======
124113
def shake_get_alpha_beta(
125114
is_training: bool,
126115
is_cuda: bool,
127116
method: str
128-
) -> typing.Tuple[torch.Tensor, torch.Tensor]:
117+
) -> Tuple[torch.Tensor, torch.Tensor]:
129118
"""
130119
The methods used in this function have been introduced in 'ShakeShake Regularisation'
131120
Each method name is available in the referred paper.
132121
Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`.
133-
>>>>>>> Bug fixes (#249)
134122
135123
Args:
136124
is_training (bool): Whether the computation for the training
137125
is_cuda (bool): Whether the tensor is on CUDA
138-
<<<<<<< HEAD
139-
=======
140126
method (str): The shake method either `even-even`, `shake-even`, `shake-shake` or `M3`
141-
>>>>>>> Bug fixes (#249)
142127
143128
Returns:
144129
alpha, beta (Tuple[float, float]):
@@ -150,14 +135,8 @@ def shake_get_alpha_beta(
150135
Author: Xavier Gastaldi
151136
URL: https://arxiv.org/abs/1705.07485
152137
153-
<<<<<<< HEAD
154-
Note:
155-
The names have been taken from the paper as well.
156-
Currently, this function supports `shake-shake`.
157-
=======
158138
The names have been taken from the paper as well.
159139
Currently, this function supports `even-even`, `shake-even`, `shake-shake` and `M3`.
160-
>>>>>>> Bug fixes (#249)
161140
"""
162141
if not is_training:
163142
result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5]))
@@ -187,27 +166,15 @@ def shake_get_alpha_beta(
187166

188167

189168
def shake_drop_get_bl(
190-
<<<<<<< HEAD
191-
block_index: int,
192-
min_prob_no_shake: float,
193-
num_blocks: int,
194-
is_training: bool,
195-
is_cuda: bool
196-
=======
197169
block_index: int,
198170
min_prob_no_shake: float,
199171
num_blocks: int,
200172
is_training: bool,
201173
is_cuda: bool
202-
>>>>>>> Bug fixes (#249)
203174
) -> torch.Tensor:
204175
"""
205176
The sampling of Bernoulli random variable
206177
based on Eq. (4) in the paper
207-
<<<<<<< HEAD
208-
209-
=======
210-
>>>>>>> Bug fixes (#249)
211178
Args:
212179
block_index (int): The index of the block from the input layer
213180
min_prob_no_shake (float): The initial shake probability
@@ -217,28 +184,16 @@ def shake_drop_get_bl(
217184
218185
Returns:
219186
bl (torch.Tensor): a Bernoulli random variable in {0, 1}
220-
<<<<<<< HEAD
221-
222-
=======
223-
>>>>>>> Bug fixes (#249)
224187
Reference:
225188
ShakeDrop Regularization for Deep Residual Learning
226189
Yoshihiro Yamada et. al. (2020)
227190
paper: https://arxiv.org/pdf/1802.02375.pdf
228191
implementation: https://github.com/imenurok/ShakeDrop
229192
"""
230-
<<<<<<< HEAD
231-
232-
pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)
233-
234-
if is_training:
235-
# Move to torch.rand(1) for reproducibility
236-
=======
237193
pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)
238194

239195
if is_training:
240196
# Move to torch.randn(1) for reproducibility
241-
>>>>>>> Bug fixes (#249)
242197
bl = torch.as_tensor(1.0) if torch.rand(1) <= pl else torch.as_tensor(0.0)
243198
else:
244199
bl = torch.as_tensor(pl)

0 commit comments

Comments
 (0)