Skip to content

Commit b01f1cb

Browse files
committed
Add unit tests
1 parent 7488955 commit b01f1cb

File tree

6 files changed

+101
-13
lines changed

6 files changed

+101
-13
lines changed

autosklearn/automl.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,12 +1779,12 @@ def _get_runhistory_models_performance(self):
17791779
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_value.endtime))
17801780
)
17811781
cost = run_value.cost
1782+
train_loss = run_value.additional_info["train_loss"]
17821783
if len(self._metrics) > 1:
17831784
cost = cost[0]
1785+
train_loss = train_loss[0]
17841786
val_score = metric._optimum - (metric._sign * cost)
1785-
train_score = metric._optimum - (
1786-
metric._sign * run_value.additional_info["train_loss"]
1787-
)
1787+
train_score = metric._optimum - (metric._sign * train_loss)
17881788
scores = {
17891789
"Timestamp": endtime,
17901790
"single_best_optimization_score": val_score,

autosklearn/pipeline/util.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,17 @@ def get_dataset(
5050
train_size_maximum=150,
5151
make_multilabel=False,
5252
make_binary=False,
53+
return_target_as_string=False,
5354
):
5455
iris = getattr(sklearn.datasets, "load_%s" % dataset)()
5556
X = iris.data.astype(np.float32)
5657
Y = iris.target
58+
59+
if return_target_as_string:
60+
if make_binary or make_multilabel or (len(Y.shape) > 1 and Y.shape[1] > 1):
61+
raise NotImplementedError()
62+
Y = np.array([iris.target_names[y] for y in Y])
63+
5764
rs = np.random.RandomState(42)
5865
indices = np.arange(X.shape[0])
5966
train_size = min(int(len(indices) / 3.0 * 2.0), train_size_maximum)

test/fixtures/datasets.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def _make(
8484
task: Optional[int] = None,
8585
feat_type: Optional[Dict | str] = None,
8686
as_datamanager: bool = False,
87+
return_target_as_string: bool = False,
8788
) -> Any:
8889
X, y, Xt, yt = get_dataset(
8990
dataset=name,
@@ -92,6 +93,7 @@ def _make(
9293
train_size_maximum=train_size_maximum,
9394
make_multilabel=make_multilabel,
9495
make_binary=make_binary,
96+
return_target_as_string=return_target_as_string,
9597
)
9698

9799
if not as_datamanager:

test/test_automl/cases.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import numpy as np
2727

28+
import autosklearn.metrics
2829
from autosklearn.automl import AutoMLClassifier, AutoMLRegressor
2930
from autosklearn.automl_common.common.utils.backend import Backend
3031

@@ -133,6 +134,49 @@ def case_classifier_fitted_cv(
133134
return model
134135

135136

137+
@case(tags=["classifier", "fitted", "holdout", "cached", "multiobjective"])
138+
@parametrize("dataset", ["iris"])
139+
def case_classifier_fitted_holdout_multiobjective(
140+
dataset: str,
141+
make_cache: Callable[[str], Cache],
142+
make_backend: Callable[..., Backend],
143+
make_automl_classifier: Callable[..., AutoMLClassifier],
144+
make_sklearn_dataset: Callable[..., Tuple[np.ndarray, ...]],
145+
) -> AutoMLClassifier:
146+
"""Case of a holdout fitted classifier"""
147+
resampling_strategy = "holdout"
148+
149+
key = f"case_classifier_{resampling_strategy}_{dataset}"
150+
151+
# This locks the cache for this item while we check, required for pytest-xdist
152+
with make_cache(key) as cache:
153+
if "model" not in cache:
154+
# Make the model in the cache
155+
model = make_automl_classifier(
156+
temporary_directory=cache.path("backend"),
157+
delete_tmp_folder_after_terminate=False,
158+
resampling_strategy=resampling_strategy,
159+
metrics=[
160+
autosklearn.metrics.balanced_accuracy,
161+
autosklearn.metrics.log_loss,
162+
],
163+
)
164+
165+
X, y, Xt, yt = make_sklearn_dataset(
166+
name=dataset, return_target_as_string=True
167+
)
168+
model.fit(X, y, dataset_name=dataset)
169+
170+
# Save the model
171+
cache.save(model, "model")
172+
173+
# Try the model from the cache
174+
model = cache.load("model")
175+
model._backend = copy_backend(old=model._backend, new=make_backend())
176+
177+
return model
178+
179+
136180
@case(tags=["regressor", "fitted", "holdout", "cached"])
137181
@parametrize("dataset", ["boston"])
138182
def case_regressor_fitted_holdout(

test/test_automl/test_dummy_predictions.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from typing import Callable, Tuple
1+
from __future__ import annotations
2+
3+
from typing import Callable, Sequence, Tuple
24

35
from pathlib import Path
46

@@ -12,7 +14,7 @@
1214
REGRESSION,
1315
)
1416
from autosklearn.data.xy_data_manager import XYDataManager
15-
from autosklearn.metrics import Scorer, accuracy, precision, r2
17+
from autosklearn.metrics import Scorer, accuracy, log_loss, precision, r2
1618
from autosklearn.util.logging_ import PicklableClientLogger
1719

1820
import pytest
@@ -21,17 +23,18 @@
2123

2224

2325
@parametrize(
24-
"dataset, metric, task",
26+
"dataset, metrics, task",
2527
[
26-
("breast_cancer", accuracy, BINARY_CLASSIFICATION),
27-
("wine", accuracy, MULTICLASS_CLASSIFICATION),
28-
("diabetes", r2, REGRESSION),
28+
("breast_cancer", [accuracy], BINARY_CLASSIFICATION),
29+
("breast_cancer", [accuracy, log_loss], BINARY_CLASSIFICATION),
30+
("wine", [accuracy], MULTICLASS_CLASSIFICATION),
31+
("diabetes", [r2], REGRESSION),
2932
],
3033
)
3134
def test_produces_correct_output(
3235
dataset: str,
3336
task: int,
34-
metric: Scorer,
37+
metrics: Sequence[Scorer],
3538
mock_logger: PicklableClientLogger,
3639
make_automl: Callable[..., AutoML],
3740
make_sklearn_dataset: Callable[..., XYDataManager],
@@ -45,8 +48,8 @@ def test_produces_correct_output(
4548
task : int
4649
The task type of the dataset
4750
48-
metric: Scorer
49-
Metric to use, required as fit usually determines the metric to use
51+
metrics: Sequence[Scorer]
52+
Metric(s) to use, required as fit usually determines the metric to use
5053
5154
Fixtures
5255
--------
@@ -66,7 +69,7 @@ def test_produces_correct_output(
6669
* It should produce predictions "predictions_ensemble_1337_1_0.0.npy"
6770
"""
6871
seed = 1337
69-
automl = make_automl(metrics=[metric], seed=seed)
72+
automl = make_automl(metrics=metrics, seed=seed)
7073
automl._logger = mock_logger
7174

7275
datamanager = make_sklearn_dataset(

test/test_automl/test_post_fit.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import numpy as np
2+
from sklearn.ensemble import VotingClassifier, VotingRegressor
3+
14
from autosklearn.automl import AutoML
25

36
from pytest_cases import parametrize_with_cases
@@ -66,3 +69,32 @@ def test_no_ensemble(automl: AutoML) -> None:
6669
assert automl.ensemble_ is None
6770
assert len(automl.models_) == 0
6871
assert len(automl.cv_models_) == 0
72+
73+
74+
@parametrize_with_cases("automl", cases, has_tag=["multiobjective"])
75+
def test__load_pareto_front(automl: AutoML) -> None:
76+
"""
77+
Parameters
78+
----------
79+
automl : AutoML
80+
An AutoML object fitted with multiple objective metrics
81+
82+
Expects
83+
-------
84+
* Auto-sklearn can predict and has a model
85+
* _load_pareto_front returns one scikit-learn ensemble
86+
"""
87+
# Check that the predict function works
88+
X = np.array([[1.0, 1.0, 1.0, 1.0]])
89+
print(automl.predict(X))
90+
assert automl.predict_proba(X).shape == (1, 3)
91+
assert automl.predict(X).shape == (1,)
92+
93+
pareto_front = automl._load_pareto_front()
94+
assert len(pareto_front) == 1
95+
for ensemble in pareto_front:
96+
assert isinstance(ensemble, (VotingClassifier, VotingRegressor))
97+
y_pred = ensemble.predict_proba(X)
98+
assert y_pred.shape == (1, 3)
99+
y_pred = ensemble.predict(X)
100+
assert y_pred in ["setosa", "versicolor", "virginica"]

0 commit comments

Comments
 (0)