12
12
from sklearn .externals .joblib import Memory , Parallel , delayed
13
13
14
14
15
+ def stack_features (X ):
16
+ """Stack features from sources
17
+
18
+ Parameters:
19
+ -----------
20
+ X : a list of 2d matrices
21
+
22
+ Returns:
23
+ --------
24
+ - Xstacked : (n_samples x (n_features*n_sources)) stacked 2d matrix
25
+
26
+ - features_indices : (n_features*n_sources) list of indices
27
+ """
28
+ X_stacked = np .hstack (X )
29
+
30
+ features_markers = np .r_ [0 , np .cumsum ([x .shape [1 ] for x in X ])]
31
+ feature_indices = [slice (features_markers [i ],
32
+ features_markers [i + 1 ])
33
+ for i in range (len (features_markers )- 1 )]
34
+
35
+ return X_stacked , feature_indices
36
+
37
+
38
+ def _split_features (X , feature_indices ):
39
+ """helper"""
40
+ return [X [:, fi ] for fi in feature_indices ]
41
+
42
+
15
43
def _fit_estimator (clf , X , y ):
16
44
"""Helper to fit estimator"""
17
45
return clf .fit (X , y )
@@ -76,11 +104,13 @@ class StackingClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
76
104
77
105
def __init__ (self , estimators = None ,
78
106
stacking_estimator = None ,
107
+ feature_indices = None ,
79
108
memory = Memory (cachedir = None ), memory_level = 0 ,
80
109
n_jobs = 1 ):
81
110
82
111
self .estimators = estimators
83
112
self .stacking_estimator = stacking_estimator
113
+ self .feature_indices = feature_indices
84
114
self .memory = memory
85
115
self .memory_level = memory_level
86
116
self .n_jobs = n_jobs
@@ -90,22 +120,23 @@ def fit(self, X, y):
90
120
91
121
Parameters
92
122
----------
93
- X : {array-like, sparse-matrix}, shape (n_estimators, n_samples,
94
- n_features)
123
+ X : {array-like, sparse-matrix}, shape (n_samples, n_features)
95
124
Training vector, where n_samples is the number of samples and
96
125
n_features is the number of features.
97
126
98
127
y : array-like, shape (n_samples,)
99
128
Target vector relative to X.
100
129
"""
101
- _check_Xy (self , X , y )
130
+
131
+ X_list = _split_features (X , self .feature_indices )
132
+ _check_Xy (self , X_list , y )
102
133
self .estimators = Parallel (n_jobs = self .n_jobs )(
103
134
delayed (_fit_estimator )(clf , x , y )
104
- for x , clf in zip (X , self .estimators ))
135
+ for x , clf in zip (X_list , self .estimators ))
105
136
106
137
predictions_ = Parallel (n_jobs = self .n_jobs )(
107
138
delayed (_predict_proba_estimator )(clf , x )
108
- for x , clf in zip (X , self .estimators ))
139
+ for x , clf in zip (X_list , self .estimators ))
109
140
predictions_ = np .array (predictions_ ).T
110
141
111
142
self .stacking_estimator .fit (predictions_ , y )
@@ -116,19 +147,19 @@ def predict(self, X):
116
147
117
148
Parameters
118
149
----------
119
- X : {array-like, sparse matrix}, shape = (n_estimators,
120
- n_samples, n_features)
150
+ X : {array-like, sparse matrix}, shape = (n_samples, n_features)
121
151
The multi-input samples.
122
152
123
153
Returns
124
154
-------
125
155
C : array, shape = (n_samples)
126
156
Predicted class label per sample.
127
157
"""
128
- _check_Xy (self , X )
158
+ X_list = _split_features (X , self .feature_indices )
159
+ _check_Xy (self , X_list )
129
160
predictions_ = Parallel (n_jobs = self .n_jobs )(
130
161
delayed (_predict_proba_estimator )(clf , x )
131
- for x , clf in zip (X , self .estimators ))
162
+ for x , clf in zip (X_list , self .estimators ))
132
163
predictions_ = np .array (predictions_ ).T
133
164
134
165
return self .stacking_estimator .predict (predictions_ )
@@ -142,7 +173,7 @@ def score(self, X, y):
142
173
143
174
Parameters
144
175
----------
145
- X : array-like, shape = (n_estimators, n_samples, n_features)
176
+ X : array-like, shape = (n_samples, n_features)
146
177
The multi-input samples.
147
178
148
179
y : array-like, shape = (n_samples) or (n_samples, n_outputs)
@@ -154,27 +185,26 @@ def score(self, X, y):
154
185
score : float
155
186
Mean accuracy of self.predict(X) wrt. y.
156
187
"""
157
- _check_Xy (self , X , y )
158
188
return accuracy_score (y , self .predict (X ))
159
189
160
190
def predict_estimators (self , X ):
161
191
"""Predict class labels for samples in X for each estimators.
162
192
163
193
Parameters
164
194
----------
165
- X : {array-like, sparse matrix}, shape = (n_estimators,
166
- n_samples, n_features)
195
+ X : {array-like, sparse matrix}, shape = (n_samples, n_features)
167
196
The multi-input samples.
168
197
169
198
Returns
170
199
-------
171
200
C : array, shape = (n_samples, n_estimators)
172
201
Predicted class label per sample and estimators.
173
202
"""
174
- _check_Xy (self , X )
203
+ X_list = _split_features (X , self .feature_indices )
204
+ _check_Xy (self , X_list )
175
205
predictions_ = Parallel (n_jobs = self .n_jobs )(
176
206
delayed (_predict_estimator )(clf , x )
177
- for x , clf in zip (X , self .estimators ))
207
+ for x , clf in zip (X_list , self .estimators ))
178
208
return np .array (predictions_ ).T
179
209
180
210
def score_estimators (self , X , y ):
@@ -186,7 +216,7 @@ def score_estimators(self, X, y):
186
216
187
217
Parameters
188
218
----------
189
- X : array-like, shape = (n_estimators, n_samples, n_features)
219
+ X : array-like, shape = (n_samples, n_features)
190
220
The multi-input samples.
191
221
192
222
y : array-like, shape = (n_samples) or (n_samples, n_outputs)
@@ -197,6 +227,5 @@ def score_estimators(self, X, y):
197
227
score : list of float, shape (n_estimators,)
198
228
Mean accuracy of self.predict_estimators(X) wrt. y.
199
229
"""
200
- _check_Xy (self , X , y )
201
230
predictions_ = self .predict_estimators (X )
202
231
return np .array ([accuracy_score (y , p ) for p in predictions_ .T ])
0 commit comments