Skip to content

Commit e69ff3b

Browse files
committed
fix tests after rebase
1 parent 637a68b commit e69ff3b

File tree

22 files changed

+145
-98
lines changed

22 files changed

+145
-98
lines changed

autoPyTorch/api/base_task.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,6 +1908,7 @@ def _init_ensemble_builder(
19081908
# builder in the provide dask client
19091909
required_dataset_properties = {'task_type': self.task_type,
19101910
'output_type': self.dataset.output_type}
1911+
19111912
proc_ensemble = EnsembleBuilderManager(
19121913
start_time=time.time(),
19131914
time_left_for_ensembles=time_left_for_ensembles,
@@ -1928,6 +1929,7 @@ def _init_ensemble_builder(
19281929
random_state=self.seed,
19291930
precision=precision,
19301931
logger_port=self._logger_port,
1932+
metrics_kwargs=self._metrics_kwargs
19311933
)
19321934
self._stopwatch.stop_task(ensemble_task_name)
19331935

autoPyTorch/data/base_feature_validator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def __init__(
4949
self.categories: List[List[int]] = []
5050
self.categorical_columns: List[int] = []
5151
self.numerical_columns: List[int] = []
52+
self.encode_columns: List[int] = []
5253

5354
self.all_nan_columns: Optional[Set[Union[int, str]]] = None
5455

autoPyTorch/data/tabular_feature_validator.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -168,27 +168,23 @@ def _fit(
168168

169169
# Handle objects if possible
170170
exist_object_columns = has_object_columns(X.dtypes.values)
171+
171172
if exist_object_columns:
172173
X = self.infer_objects(X)
173174
self.dtypes = [dt.name for dt in X.dtypes] # Also note this change in self.dtypes
175+
174176
self.all_nan_columns = set(all_nan_columns)
175177

176-
self.transformed_columns, self.feat_types = self.get_columns_to_encode(X)
178+
self.encode_columns, self.feat_types = self.get_columns_to_encode(X)
177179

178180
assert self.feat_types is not None
179181

180-
preprocessors = get_tabular_preprocessors()
181-
self.column_transformer = _create_column_transformer(
182-
preprocessors=preprocessors,
183-
categorical_columns=self.transformed_columns,
184-
)
185-
186-
if len(self.enc_columns) > 0:
182+
if len(self.encode_columns) > 0:
187183

188184
preprocessors = get_tabular_preprocessors()
189185
self.column_transformer = _create_column_transformer(
190186
preprocessors=preprocessors,
191-
categorical_columns=self.enc_columns,
187+
categorical_columns=self.encode_columns,
192188
)
193189

194190
# Mypy redefinition
@@ -302,8 +298,8 @@ def transform(
302298
# we change those columns to `object` dtype
303299
# to ensure that these columns are changed to appropriate dtype
304300
# in self.infer_objects
305-
all_nan_cat_cols = set(X[self.enc_columns].columns[X[self.enc_columns].isna().all()])
306-
dtype_dict = {col: 'object' for col in self.enc_columns if col in all_nan_cat_cols}
301+
all_nan_cat_cols = set(X[self.encode_columns].columns[X[self.encode_columns].isna().all()])
302+
dtype_dict = {col: 'object' for col in self.encode_columns if col in all_nan_cat_cols}
307303
X = X.astype(dtype_dict)
308304

309305
# Check the data here so we catch problems on new test data
@@ -388,10 +384,6 @@ def _check_data(
388384
if exist_object_columns:
389385
X = self.infer_objects(X)
390386

391-
# Define the column to be encoded here as the feature validator is fitted once
392-
# per estimator
393-
self.transformed_columns, self.feat_types = self.get_columns_to_encode(X)
394-
395387
column_order = [column for column in X.columns]
396388
if len(self.column_order) > 0:
397389
if self.column_order != column_order:
@@ -491,8 +483,8 @@ def _get_columns_to_encode(
491483
Type of each column numerical/categorical
492484
"""
493485

494-
if len(self.transformed_columns) > 0 and self.feat_types is not None:
495-
return self.transformed_columns, self.feat_types
486+
if len(self.encode_columns) > 0 and self.feat_types is not None:
487+
return self.encode_columns, self.feat_types
496488

497489
# Register if a column needs encoding
498490
categorical_columns = []
@@ -503,7 +495,7 @@ def _get_columns_to_encode(
503495
for i, column in enumerate(X.columns):
504496
if self.all_nan_columns is not None and column in self.all_nan_columns:
505497
continue
506-
column_dtype = self.dtypes[i]
498+
column_dtype = self.dtypes[i] if len(self.dtypes) > 0 else X[column].dtype.name
507499
err_msg = "Valid types are `numerical`, `categorical` or `boolean`, " \
508500
"but input column {} has an invalid type `{}`.".format(column, column_dtype)
509501
if column_dtype in ['category', 'bool']:

autoPyTorch/data/time_series_feature_validator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ def __init__(
3737
self.series_idx: Optional[List[Union[str, int]]] = None
3838

3939
def get_reordered_columns(self) -> List[str]:
40-
return self.transformed_columns + [
41-
col for col in self.column_order if col not in set(self.transformed_columns)
40+
return self.encode_columns + [
41+
col for col in self.column_order if col not in set(self.encode_columns)
4242
]
4343

4444
def fit(

autoPyTorch/datasets/resampling_strategy.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,7 @@ def is_stratified(self) -> bool:
109109

110110
# TODO: replace it with another way
111111
ResamplingStrategies = Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes]
112-
<<<<<<< HEAD
113112

114-
=======
115-
>>>>>>> Additional metrics during train (#194)
116113

117114
DEFAULT_RESAMPLING_PARAMETERS: Dict[
118115
ResamplingStrategies,

autoPyTorch/evaluation/abstract_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ def _loss(self, y_true: np.ndarray, y_hat: np.ndarray, **metric_kwargs: Any) ->
729729
def finish_up(self, loss: Dict[str, float], train_loss: Dict[str, float],
730730
valid_pred: Optional[np.ndarray], test_pred: Optional[np.ndarray],
731731
additional_run_info: Optional[Dict], file_output: bool, status: StatusType,
732-
opt_pred: Optional[np.ndarray],
732+
opt_pred: Optional[np.ndarray], **metric_kwargs: Any
733733
) -> Optional[Tuple[float, float, int, Dict]]:
734734
"""This function does everything necessary after the fitting is done:
735735

autoPyTorch/optimizer/smbo.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,8 @@ def __init__(self,
276276
initial_configurations = []
277277

278278
if STRING_TO_TASK_TYPES.get(self.task_type, -1) == TIMESERIES_FORECASTING:
279-
initial_configurations = self.get_init_configs_for_forecasting(config_space, kwargs)
279+
# TODO: update search space (to remove reg cocktails) for forecasting tasks so that we can use the portfolio (or build the portfolio again)
280+
# initial_configurations = self.get_init_configs_for_forecasting(config_space, kwargs)
280281
# proxy-validation sets
281282
self.min_num_test_instances: Optional[int] = kwargs.get('min_num_test_instances', # type:ignore[assignment]
282283
None)

autoPyTorch/pipeline/base_pipeline.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,6 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
520520
# needs to be updated is in components of the
521521
# choice module
522522
elif split_hyperparameter[0] not in components.keys():
523-
<<<<<<< HEAD
524523
hp_in_component = False
525524
if hasattr(node, 'additional_components') and node.additional_components:
526525
# This is designed for forecasting network encoder:
@@ -538,12 +537,6 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
538537
"Expected update hyperparameter "
539538
"to be in {} got {}".format(node.__class__.__name__,
540539
components.keys(), split_hyperparameter[0]))
541-
=======
542-
raise ValueError("Unknown component choice for node {}. "
543-
"Expected update component "
544-
"to be in {}, but got {}".format(node_name,
545-
components.keys(), split_hyperparameter[0]))
546-
>>>>>>> Bug fixes (#249)
547540
else:
548541
# check if hyperparameter is in the search space of the component
549542
component = components[split_hyperparameter[0]]

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,18 @@ def percentage_value_range_to_integer_range(
8181
log = False
8282
else:
8383
log = hyperparameter_search_space.log
84+
85+
min_hyperparameter_value = hyperparameter_search_space.value_range[0]
86+
if len(hyperparameter_search_space.value_range) > 1:
87+
max_hyperparameter_value = hyperparameter_search_space.value_range[1]
88+
else:
89+
max_hyperparameter_value = hyperparameter_search_space.value_range[0]
90+
8491
hyperparameter_search_space = HyperparameterSearchSpace(
8592
hyperparameter=hyperparameter_name,
8693
value_range=(
87-
floor(float(hyperparameter_search_space.value_range[0]) * n_features),
88-
floor(float(hyperparameter_search_space.value_range[1]) * n_features)),
94+
floor(float(min_hyperparameter_value) * n_features),
95+
floor(float(max_hyperparameter_value) * n_features)),
8996
default_value=ceil(float(hyperparameter_search_space.default_value) * n_features),
9097
log=log)
9198
else:

autoPyTorch/pipeline/components/setup/network/forecasting_architecture.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,10 @@ def forward(self,
574574
past_observed_targets: Optional[torch.BoolTensor] = None,
575575
decoder_observed_values: Optional[torch.Tensor] = None,
576576
) -> ALL_NET_OUTPUT:
577+
578+
if isinstance(past_targets, dict):
579+
past_targets, past_features, future_features, past_observed_targets = self._unwrap_past_targets(past_targets)
580+
577581
x_past, x_future, x_static, loc, scale, static_context_initial_hidden, _ = self.pre_processing(
578582
past_targets=past_targets,
579583
past_observed_targets=past_observed_targets,
@@ -603,6 +607,38 @@ def forward(self,
603607

604608
return self.rescale_output(output, loc, scale, self.device)
605609

610+
def _unwrap_past_targets(
611+
self,
612+
past_targets: dict
613+
) -> Tuple[
614+
torch.Tensor,
615+
Optional[torch.Tensor],
616+
Optional[torch.Tensor],
617+
Optional[torch.Tensor],
618+
Optional[torch.BoolTensor],
619+
Optional[torch.Tensor]]:
620+
"""
621+
Time series forecasting network requires multiple inputs for the forward pass which is different to how pytorch
622+
networks usually work. SWA's update_bn in line #452 of trainer choice, does not unwrap the dictionary of the
623+
input when running the forward pass. So we need to check for that here.
624+
625+
Args:
626+
past_targets (dict):
627+
Input mistakenly passed to past_targets variable
628+
629+
Returns:
630+
_type_: _description_
631+
"""
632+
633+
past_targets_copy = past_targets.copy()
634+
past_targets = past_targets_copy.pop('past_targets')
635+
future_targets = past_targets_copy.pop('future_targets', None)
636+
past_features = past_targets_copy.pop('past_features', None)
637+
future_features = past_targets_copy.pop('future_features', None)
638+
past_observed_targets = past_targets_copy.pop('past_observed_targets', None)
639+
decoder_observed_values = past_targets_copy.pop('decoder_observed_values', None)
640+
return past_targets,past_features,future_features,past_observed_targets
641+
606642
def pred_from_net_output(self, net_output: ALL_NET_OUTPUT) -> torch.Tensor:
607643
if self.output_type == 'regression':
608644
return net_output
@@ -694,6 +730,10 @@ def forward(self,
694730
future_features: Optional[torch.Tensor] = None,
695731
past_observed_targets: Optional[torch.BoolTensor] = None,
696732
decoder_observed_values: Optional[torch.Tensor] = None, ) -> ALL_NET_OUTPUT:
733+
734+
if isinstance(past_targets, dict):
735+
past_targets, past_features, future_features, past_observed_targets = self._unwrap_past_targets(past_targets)
736+
697737
x_past, _, x_static, loc, scale, static_context_initial_hidden, past_targets = self.pre_processing(
698738
past_targets=past_targets,
699739
past_observed_targets=past_observed_targets,
@@ -983,6 +1023,10 @@ def forward(self,
9831023
future_features: Optional[torch.Tensor] = None,
9841024
past_observed_targets: Optional[torch.BoolTensor] = None,
9851025
decoder_observed_values: Optional[torch.Tensor] = None, ) -> ALL_NET_OUTPUT:
1026+
1027+
if isinstance(past_targets, dict):
1028+
past_targets, past_features, future_features, past_observed_targets = self._unwrap_past_targets(past_targets)
1029+
9861030
encode_length = min(self.window_size, past_targets.shape[1])
9871031

9881032
if past_observed_targets is None:
@@ -1250,6 +1294,9 @@ def forward(self, # type: ignore[override]
12501294
decoder_observed_values: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor,
12511295
Tuple[torch.Tensor, torch.Tensor]]:
12521296

1297+
if isinstance(past_targets, dict):
1298+
past_targets, past_features, future_features, past_observed_targets = self._unwrap_past_targets(past_targets)
1299+
12531300
# Unlike other networks, NBEATS network is required to predict both past and future targets.
12541301
# Thereby, we return two tensors for backcast and forecast
12551302
if past_observed_targets is None:

autoPyTorch/pipeline/components/setup/network_backbone/utils.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,10 @@ def get_output_shape(network: torch.nn.Module, input_shape: Tuple[int, ...], has
2929
"""
3030
placeholder = torch.randn((2, *input_shape), dtype=torch.float)
3131
with torch.no_grad():
32-
<<<<<<< HEAD
3332
if has_hidden_states:
3433
output = network(placeholder)[0]
3534
else:
3635
output = network(placeholder)
37-
=======
38-
output = network(placeholder)
39-
40-
>>>>>>> Bug fixes (#249)
4136
return tuple(output.shape[1:])
4237

4338

autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111

1212

1313
class NetworkEmbeddingComponent(autoPyTorchSetupComponent):
14-
def __init__(self, random_state: Optional[np.random.RandomState] = None):
15-
super().__init__(random_state=random_state)
14+
def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None):
15+
super().__init__()
1616
self.embedding: Optional[nn.Module] = None
17+
self.random_state = random_state
1718
self.feature_shapes: Dict[str, int] = {}
1819

1920
def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
2021

21-
num_numerical_columns, num_input_features = self._get_required_info_from_data(X)
22+
num_numerical_columns, num_input_features = self._get_args(X)
2223

2324
self.embedding, num_output_features = self.build_embedding(
2425
num_input_features=num_input_features,
@@ -35,7 +36,6 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
3536
self.feature_shapes = feature_shapes
3637
else:
3738
self.feature_shapes = X['dataset_properties']['feature_shapes']
38-
3939
return self
4040

4141
def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
@@ -49,39 +49,31 @@ def build_embedding(self,
4949
num_numerical_features: int) -> Tuple[nn.Module, Optional[List[int]]]:
5050
raise NotImplementedError
5151

52-
def _get_required_info_from_data(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
53-
"""
54-
Returns the number of numerical columns after preprocessing and
55-
an array of size equal to the number of input features
56-
containing zeros for numerical data and number of categories
57-
for categorical data. This is required to build the embedding.
58-
59-
Args:
60-
X (Dict[str, Any]):
61-
Fit dictionary
62-
63-
Returns:
64-
Tuple[int, np.ndarray]:
65-
number of numerical columns and array indicating
66-
number of categories for categorical columns and
67-
0 for numerical columns
68-
"""
52+
def _get_args(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
6953
# Feature preprocessors can alter numerical columns
7054
if len(X['dataset_properties']['numerical_columns']) == 0:
7155
num_numerical_columns = 0
7256
else:
7357
X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
7458

75-
numerical_column_transformer = X['tabular_transformer'].preprocessor. \
76-
named_transformers_['numerical_pipeline']
77-
num_numerical_columns = numerical_column_transformer.transform(
78-
X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
79-
80-
num_cols = num_numerical_columns + len(X['dataset_properties']['categorical_columns'])
81-
num_input_feats = np.zeros(num_cols, dtype=np.int32)
82-
59+
if 'tabular_transformer' in X:
60+
numerical_column_transformer = X['tabular_transformer'].preprocessor. \
61+
named_transformers_['numerical_pipeline']
62+
elif 'time_series_feature_transformer' in X:
63+
numerical_column_transformer = X['time_series_feature_transformer'].preprocessor. \
64+
named_transformers_['numerical_pipeline']
65+
else:
66+
raise ValueError("Either a tabular or time_series transformer must be contained!")
67+
if hasattr(X_train, 'iloc'):
68+
num_numerical_columns = numerical_column_transformer.transform(
69+
X_train.iloc[:, X['dataset_properties']['numerical_columns']]).shape[1]
70+
else:
71+
num_numerical_columns = numerical_column_transformer.transform(
72+
X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
73+
num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
74+
dtype=np.int32)
8375
categories = X['dataset_properties']['categories']
84-
for idx, cats in enumerate(categories, start=num_numerical_columns):
85-
num_input_feats[idx] = len(cats)
8676

87-
return num_numerical_columns, num_input_feats
77+
for i, category in enumerate(categories):
78+
num_input_features[num_numerical_columns + i, ] = len(category)
79+
return num_numerical_columns, num_input_features

autoPyTorch/pipeline/components/training/data_loader/time_series_forecasting_data_loader.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
254254
self.val_transform,
255255
train=False,
256256
)
257-
258-
if X['dataset_properties']["is_small_preprocess"]:
257+
if X['dataset_properties'].get("is_small_preprocess", True):
259258
# This parameter indicates that the data has been pre-processed for speed
260259
# Overwrite the datamanager with the pre-processes data
261260
datamanager.replace_data(X['X_train'],
@@ -616,3 +615,16 @@ def __str__(self) -> str:
616615
""" Allow a nice understanding of what components where used """
617616
string = self.train_data_loader.__class__.__name__
618617
return string
618+
619+
def _check_transform_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
620+
"""
621+
622+
Makes sure that the fit dictionary contains the required transformations
623+
that the dataset should go through
624+
625+
Args:
626+
X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
627+
mechanism, in which during a transform, a components adds relevant information
628+
so that further stages can be properly fitted
629+
"""
630+
pass

0 commit comments

Comments
 (0)