@@ -37,27 +37,68 @@ def _predict_proba_estimator(clf, X):
37
37
raise NotImplementedError ("predict_proba not supported" )
38
38
39
39
40
+ def _check_Xy (stacking , X , y = None ):
41
+ """check dimensions"""
42
+ if np .ndim (X ) != 3 :
43
+ raise ValueError (
44
+ 'X must be 3 dimensional, your X has %d dimensions' % np .ndim (X ))
45
+ expected_n_sources = len (stacking .estimators )
46
+ if expected_n_sources != np .asarray (X ).shape [0 ]:
47
+ raise ValueError (
48
+ 'The first axis of X (%d) should match the '
49
+ 'number of estimators (%d)' % (
50
+ X .shape [0 ],
51
+ len (stacking .estimators )))
52
+ if y is not None :
53
+ if len (y ) != np .asarray (X ).shape [1 ]:
54
+ raise ValueError (
55
+ 'The second axis of X (%d) should match the '
56
+ 'number of samples (%d)' % (
57
+ X .shape [1 ],
58
+ len (stacking .estimators )))
59
+
60
+
40
61
class StackingClassifier (BaseEstimator , ClassifierMixin , TransformerMixin ):
41
- """Meta-classifier of 3D X matrix with labels
62
+ """Stacking Meta-classifier of 3D X matrix with labels
63
+
64
+ Parameters
65
+ ----------
66
+ estimators : list of Estimator objects compatible with scikit-learn
67
+ The estimators to be used with each source of inputs. Length must match
68
+ the firt dimensions of X.
69
+ stacking_estimator : Estimator objects compatible with scikit-learn
70
+ The estimator used to integrate the predictions of the estimators.
71
+ memory : joblib memory object | None
72
+ The caching configuration. Defaults to `Memory(cachedir=None)`.
73
+ memory_level : int (defaults to 0)
74
+ The memory level used for caching.
42
75
"""
43
76
44
77
def __init__ (self , estimators = None ,
45
78
stacking_estimator = None ,
46
79
memory = Memory (cachedir = None ), memory_level = 0 ,
47
80
n_jobs = 1 ):
48
- """ initialization
49
- """
81
+
50
82
self .estimators = estimators
51
83
self .stacking_estimator = stacking_estimator
52
84
self .memory = memory
53
85
self .memory_level = memory_level
54
86
self .n_jobs = n_jobs
55
87
56
88
def fit (self , X , y ):
57
- """ stacking model fitting
58
- X is 3D matrix
59
- """
89
+ """Fit all estimators according to the given training data.
60
90
91
+ Parameters
92
+ ----------
93
+ X : {array-like, sparse-matrix}, shape (n_estimators, n_samples,
94
+ n_features)
95
+ Training vector, where n_samples is the number of samples and
96
+ n_features is the number of features.
97
+
98
+ y : array-like, shape (n_samples,)
99
+ Target vector relative to X.
100
+ """
101
+ _check_Xy (self , X , y )
61
102
self .estimators = Parallel (n_jobs = self .n_jobs )(
62
103
delayed (_fit_estimator )(clf , x , y )
63
104
for x , clf in zip (X , self .estimators ))
@@ -71,10 +112,20 @@ def fit(self, X, y):
71
112
return self
72
113
73
114
def predict (self , X ):
74
- """ stacking model prediction
75
- X is 3D matrix
115
+ """Predict class labels for samples in X.
116
+
117
+ Parameters
118
+ ----------
119
+ X : {array-like, sparse matrix}, shape = (n_estimators,
120
+ n_samples, n_features)
121
+ The multi-input samples.
122
+
123
+ Returns
124
+ -------
125
+ C : array, shape = (n_samples)
126
+ Predicted class label per sample.
76
127
"""
77
-
128
+ _check_Xy ( self , X )
78
129
predictions_ = Parallel (n_jobs = self .n_jobs )(
79
130
delayed (_predict_proba_estimator )(clf , x )
80
131
for x , clf in zip (X , self .estimators ))
@@ -83,20 +134,69 @@ def predict(self, X):
83
134
return self .stacking_estimator .predict (predictions_ )
84
135
85
136
def score (self , X , y ):
86
- """ stacking model accuracy
137
+ """Returns the mean accuracy on the given test data and labels.
138
+
139
+ In multi-label classification, this is the subset accuracy
140
+ which is a harsh metric since you require for each sample that
141
+ each label set be correctly predicted.
142
+
143
+ Parameters
144
+ ----------
145
+ X : array-like, shape = (n_estimators, n_samples, n_features)
146
+ The multi-input samples.
147
+
148
+ y : array-like, shape = (n_samples) or (n_samples, n_outputs)
149
+ True labels for X.
150
+
151
+
152
+ Returns
153
+ -------
154
+ score : float
155
+ Mean accuracy of self.predict(X) wrt. y.
87
156
"""
157
+ _check_Xy (self , X , y )
88
158
return accuracy_score (y , self .predict (X ))
89
159
90
160
def predict_estimators (self , X ):
91
- """ prediction from separate estimators
161
+ """Predict class labels for samples in X for each estimators.
162
+
163
+ Parameters
164
+ ----------
165
+ X : {array-like, sparse matrix}, shape = (n_estimators,
166
+ n_samples, n_features)
167
+ The multi-input samples.
168
+
169
+ Returns
170
+ -------
171
+ C : array, shape = (n_samples, n_estimators)
172
+ Predicted class label per sample and estimators.
92
173
"""
174
+ _check_Xy (self , X )
93
175
predictions_ = Parallel (n_jobs = self .n_jobs )(
94
176
delayed (_predict_estimator )(clf , x )
95
177
for x , clf in zip (X , self .estimators ))
96
178
return np .array (predictions_ ).T
97
179
98
180
def score_estimators (self , X , y ):
99
- """ accuracy from separate estimators
181
+ """Returns the mean accuracy for each estimators.
182
+
183
+ In multi-label classification, this is the subset accuracy
184
+ which is a harsh metric since you require for each sample that
185
+ each label set be correctly predicted.
186
+
187
+ Parameters
188
+ ----------
189
+ X : array-like, shape = (n_estimators, n_samples, n_features)
190
+ The multi-input samples.
191
+
192
+ y : array-like, shape = (n_samples) or (n_samples, n_outputs)
193
+ True labels for X.
194
+
195
+ Returns
196
+ -------
197
+ score : list of float, shape (n_estimators,)
198
+ Mean accuracy of self.predict_estimators(X) wrt. y.
100
199
"""
200
+ _check_Xy (self , X , y )
101
201
predictions_ = self .predict_estimators (X )
102
202
return np .array ([accuracy_score (y , p ) for p in predictions_ .T ])
0 commit comments