Skip to content

Commit e478777

Browse files
committed
Fix regression algorithms to give correct output dimensions (#1335)
* Added ignored_warnings file * Use ignored_warnings file * Test regressors with 1d, 1d as 2d and 2d targets * Flake'd * Fix broken relative imports to ignore_warnings * Removed print and updated parameter type for tests * Type import fix
1 parent 8a09659 commit e478777

16 files changed

+394
-133
lines changed

autosklearn/pipeline/components/regression/adaboost.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(self, n_estimators, learning_rate, loss, max_depth, random_state=No
1515
self.max_depth = max_depth
1616
self.estimator = None
1717

18-
def fit(self, X, Y):
18+
def fit(self, X, y):
1919
import sklearn.ensemble
2020
import sklearn.tree
2121

@@ -32,7 +32,11 @@ def fit(self, X, Y):
3232
loss=self.loss,
3333
random_state=self.random_state
3434
)
35-
self.estimator.fit(X, Y)
35+
36+
if y.ndim == 2 and y.shape[1] == 1:
37+
y = y.flatten()
38+
39+
self.estimator.fit(X, y)
3640
return self
3741

3842
def predict(self, X):

autosklearn/pipeline/components/regression/ard_regression.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ def __init__(self, n_iter, tol, alpha_1, alpha_2, lambda_1, lambda_2,
2222
self.threshold_lambda = threshold_lambda
2323
self.fit_intercept = fit_intercept
2424

25-
def fit(self, X, Y):
26-
import sklearn.linear_model
25+
def fit(self, X, y):
26+
from sklearn.linear_model import ARDRegression
2727

2828
self.n_iter = int(self.n_iter)
2929
self.tol = float(self.tol)
@@ -34,20 +34,25 @@ def fit(self, X, Y):
3434
self.threshold_lambda = float(self.threshold_lambda)
3535
self.fit_intercept = check_for_bool(self.fit_intercept)
3636

37-
self.estimator = sklearn.linear_model.\
38-
ARDRegression(n_iter=self.n_iter,
39-
tol=self.tol,
40-
alpha_1=self.alpha_1,
41-
alpha_2=self.alpha_2,
42-
lambda_1=self.lambda_1,
43-
lambda_2=self.lambda_2,
44-
compute_score=False,
45-
threshold_lambda=self.threshold_lambda,
46-
fit_intercept=True,
47-
normalize=False,
48-
copy_X=False,
49-
verbose=False)
50-
self.estimator.fit(X, Y)
37+
self.estimator = ARDRegression(
38+
n_iter=self.n_iter,
39+
tol=self.tol,
40+
alpha_1=self.alpha_1,
41+
alpha_2=self.alpha_2,
42+
lambda_1=self.lambda_1,
43+
lambda_2=self.lambda_2,
44+
compute_score=False,
45+
threshold_lambda=self.threshold_lambda,
46+
fit_intercept=True,
47+
normalize=False,
48+
copy_X=False,
49+
verbose=False
50+
)
51+
52+
if y.ndim == 2 and y.shape[1] == 1:
53+
y = y.flatten()
54+
55+
self.estimator.fit(X, y)
5156
return self
5257

5358
def predict(self, X):

autosklearn/pipeline/components/regression/decision_tree.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ def fit(self, X, y, sample_weight=None):
5656
min_weight_fraction_leaf=self.min_weight_fraction_leaf,
5757
min_impurity_decrease=self.min_impurity_decrease,
5858
random_state=self.random_state)
59+
60+
if y.ndim == 2 and y.shape[1] == 1:
61+
y = y.flatten()
62+
5963
self.estimator.fit(X, y, sample_weight=sample_weight)
6064
return self
6165

autosklearn/pipeline/components/regression/extra_trees.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,10 @@ def iterative_fit(self, X, y, n_iter=1, refit=False):
9595
self.estimator.n_estimators = min(self.estimator.n_estimators,
9696
self.n_estimators)
9797

98-
self.estimator.fit(X, y,)
98+
if y.ndim == 2 and y.shape[1] == 1:
99+
y = y.flatten()
100+
101+
self.estimator.fit(X, y)
99102

100103
return self
101104

autosklearn/pipeline/components/regression/gaussian_process.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ def __init__(self, alpha, thetaL, thetaU, random_state=None):
1212
self.thetaU = thetaU
1313
self.random_state = random_state
1414
self.estimator = None
15-
self.scaler = None
1615

1716
def fit(self, X, y):
1817
import sklearn.gaussian_process
@@ -38,6 +37,9 @@ def fit(self, X, y):
3837
normalize_y=True
3938
)
4039

40+
if y.ndim == 2 and y.shape[1] == 1:
41+
y = y.flatten()
42+
4143
self.estimator.fit(X, y)
4244

4345
return self

autosklearn/pipeline/components/regression/gradient_boosting.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,7 @@ def get_current_iter(self):
4848
return self.estimator.n_iter_
4949

5050
def iterative_fit(self, X, y, n_iter=2, refit=False):
51-
52-
"""
53-
Set n_iter=2 for the same reason as for SGD
54-
"""
51+
""" Set n_iter=2 for the same reason as for SGD """
5552
import sklearn.ensemble
5653
from sklearn.experimental import enable_hist_gradient_boosting # noqa
5754

@@ -112,6 +109,9 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
112109
self.estimator.max_iter = min(self.estimator.max_iter,
113110
self.max_iter)
114111

112+
if y.ndim == 2 and y.shape[1] == 1:
113+
y = y.flatten()
114+
115115
self.estimator.fit(X, y)
116116

117117
if (

autosklearn/pipeline/components/regression/k_nearest_neighbors.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def __init__(self, n_neighbors, weights, p, random_state=None):
1313
self.p = p
1414
self.random_state = random_state
1515

16-
def fit(self, X, Y):
16+
def fit(self, X, y):
1717
import sklearn.neighbors
1818

1919
self.n_neighbors = int(self.n_neighbors)
@@ -24,7 +24,11 @@ def fit(self, X, Y):
2424
n_neighbors=self.n_neighbors,
2525
weights=self.weights,
2626
p=self.p)
27-
self.estimator.fit(X, Y)
27+
28+
if y.ndim == 2 and y.shape[1] == 1:
29+
y = y.flatten()
30+
31+
self.estimator.fit(X, y)
2832
return self
2933

3034
def predict(self, X):

autosklearn/pipeline/components/regression/liblinear_svr.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __init__(self, loss, epsilon, dual, tol, C, fit_intercept,
2323
self.random_state = random_state
2424
self.estimator = None
2525

26-
def fit(self, X, Y):
26+
def fit(self, X, y):
2727
import sklearn.svm
2828

2929
self.C = float(self.C)
@@ -42,7 +42,11 @@ def fit(self, X, Y):
4242
fit_intercept=self.fit_intercept,
4343
intercept_scaling=self.intercept_scaling,
4444
random_state=self.random_state)
45-
self.estimator.fit(X, Y)
45+
46+
if y.ndim == 2 and y.shape[1] == 1:
47+
y = y.flatten()
48+
49+
self.estimator.fit(X, y)
4650
return self
4751

4852
def predict(self, X):

autosklearn/pipeline/components/regression/libsvm_svr.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
77
UniformIntegerHyperparameter, CategoricalHyperparameter, \
88
UnParametrizedHyperparameter
9-
109
from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
1110
from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
1211
from autosklearn.util.common import check_for_bool, check_none
@@ -29,7 +28,7 @@ def __init__(self, kernel, C, epsilon, tol, shrinking, gamma=0.1,
2928
self.random_state = random_state
3029
self.estimator = None
3130

32-
def fit(self, X, Y):
31+
def fit(self, X, y):
3332
import sklearn.svm
3433

3534
# Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is
@@ -88,18 +87,35 @@ def fit(self, X, Y):
8887
)
8988
self.scaler = sklearn.preprocessing.StandardScaler(copy=True)
9089

91-
self.scaler.fit(Y.reshape((-1, 1)))
92-
Y_scaled = self.scaler.transform(Y.reshape((-1, 1))).ravel()
93-
self.estimator.fit(X, Y_scaled)
90+
# Convert y to be at least 2d for the scaler
91+
# [1,1,1] -> [[1], [1], [1]]
92+
if y.ndim == 1:
93+
y = y.reshape((-1, 1))
94+
95+
y_scaled = self.scaler.fit_transform(y)
96+
97+
# Flatten: [[0], [0], [0]] -> [0, 0, 0]
98+
if y_scaled.ndim == 2 and y_scaled.shape[1] == 1:
99+
y_scaled = y_scaled.flatten()
100+
101+
self.estimator.fit(X, y_scaled)
102+
94103
return self
95104

96105
def predict(self, X):
97106
if self.estimator is None:
98107
raise NotImplementedError
99108
if self.scaler is None:
100109
raise NotImplementedError
101-
Y_pred = self.estimator.predict(X)
102-
return self.scaler.inverse_transform(Y_pred)
110+
y_pred = self.estimator.predict(X)
111+
112+
inverse = self.scaler.inverse_transform(y_pred)
113+
114+
# Flatten: [[0], [0], [0]] -> [0, 0, 0]
115+
if inverse.ndim == 2 and inverse.shape[1] == 1:
116+
inverse = inverse.flatten()
117+
118+
return inverse
103119

104120
@staticmethod
105121
def get_properties(dataset_properties=None):

autosklearn/pipeline/components/regression/mlp.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,16 +137,36 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
137137
# max_fun=self.max_fun
138138
)
139139
self.scaler = sklearn.preprocessing.StandardScaler(copy=True)
140-
self.scaler.fit(y.reshape((-1, 1)))
140+
141+
# Convert y to be at least 2d for the StandardScaler
142+
# [1,1,1] -> [[1], [1], [1]]
143+
if y.ndim == 1:
144+
y = y.reshape((-1, 1))
145+
146+
self.scaler.fit(y)
141147
else:
142148
new_max_iter = min(self.max_iter - self.estimator.n_iter_, n_iter)
143149
self.estimator.max_iter = new_max_iter
144150

145-
Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel()
146-
self.estimator.fit(X, Y_scaled)
147-
if self.estimator.n_iter_ >= self.max_iter or \
148-
self.estimator._no_improvement_count > self.n_iter_no_change:
151+
# Convert y to be at least 2d for the scaler
152+
# [1,1,1] -> [[1], [1], [1]]
153+
if y.ndim == 1:
154+
y = y.reshape((-1, 1))
155+
156+
y_scaled = self.scaler.transform(y)
157+
158+
# Flatten: [[0], [0], [0]] -> [0, 0, 0]
159+
if y_scaled.ndim == 2 and y_scaled.shape[1] == 1:
160+
y_scaled = y_scaled.flatten()
161+
162+
self.estimator.fit(X, y_scaled)
163+
164+
if (
165+
self.estimator.n_iter_ >= self.max_iter
166+
or self.estimator._no_improvement_count > self.n_iter_no_change
167+
):
149168
self._fully_fit = True
169+
150170
return self
151171

152172
def configuration_fully_fitted(self):
@@ -160,8 +180,16 @@ def configuration_fully_fitted(self):
160180
def predict(self, X):
161181
if self.estimator is None:
162182
raise NotImplementedError
163-
Y_pred = self.estimator.predict(X)
164-
return self.scaler.inverse_transform(Y_pred)
183+
184+
y_pred = self.estimator.predict(X)
185+
186+
inverse = self.scaler.inverse_transform(y_pred)
187+
188+
# Flatten: [[0], [0], [0]] -> [0, 0, 0]
189+
if inverse.ndim == 2 and inverse.shape[1] == 1:
190+
inverse = inverse.flatten()
191+
192+
return inverse
165193

166194
@staticmethod
167195
def get_properties(dataset_properties=None):

autosklearn/pipeline/components/regression/random_forest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ def iterative_fit(self, X, y, n_iter=1, refit=False):
8585
self.estimator.n_estimators = min(self.estimator.n_estimators,
8686
self.n_estimators)
8787

88+
if y.ndim == 2 and y.shape[1] == 1:
89+
y = y.flatten()
90+
8891
self.estimator.fit(X, y)
8992
return self
9093

autosklearn/pipeline/components/regression/sgd.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,36 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
9090
warm_start=True)
9191

9292
self.scaler = sklearn.preprocessing.StandardScaler(copy=True)
93-
self.scaler.fit(y.reshape((-1, 1)))
94-
Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel()
95-
self.estimator.fit(X, Y_scaled)
93+
94+
if y.ndim == 1:
95+
y = y.reshape((-1, 1))
96+
97+
y_scaled = self.scaler.fit_transform(y)
98+
99+
# Flatten: [[0], [0], [0]] -> [0, 0, 0]
100+
if y_scaled.ndim == 2 and y_scaled.shape[1] == 1:
101+
y_scaled = y_scaled.flatten()
102+
103+
self.estimator.fit(X, y_scaled)
96104
self.n_iter_ = self.estimator.n_iter_
97105
else:
98106
self.estimator.max_iter += n_iter
99107
self.estimator.max_iter = min(self.estimator.max_iter, self.max_iter)
100-
Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel()
108+
109+
# Convert y to be at least 2d for the scaler
110+
# [1,1,1] -> [[1], [1], [1]]
111+
if y.ndim == 1:
112+
y = y.reshape((-1, 1))
113+
114+
y_scaled = self.scaler.transform(y)
115+
116+
# Flatten: [[0], [0], [0]] -> [0, 0, 0]
117+
if y_scaled.ndim == 2 and y_scaled.shape[1] == 1:
118+
y_scaled = y_scaled.flatten()
119+
101120
self.estimator._validate_params()
102121
self.estimator._partial_fit(
103-
X, Y_scaled,
122+
X, y_scaled,
104123
alpha=self.estimator.alpha,
105124
C=1.0,
106125
loss=self.estimator.loss,

0 commit comments

Comments
 (0)