quick fix for target class logic in predict_proba

dengemann · dengemann · commit 535353b45978 · 2017-05-15T15:53:25.000+02:00
diff --git a/stlearn/stacking.py b/stlearn/stacking.py
@@ -59,10 +59,12 @@ def _predict_estimator(clf, X):
 
 def _predict_proba_estimator(clf, X):
     """Helper to get prediction method"""
+
+    # XXX this is not safe. Maybe add explicit 1st level scoring param.
     # try predict_proba
     predict_proba = getattr(clf, "predict_proba", None)
     if callable(predict_proba):
-        return clf.predict_proba(X)[:, 0]
+        return clf.predict_proba(X)
 
     # or decision_function
     decision_function = getattr(clf, "decision_function", None)
@@ -131,6 +133,9 @@ def __init__(self, estimators,
         self.feature_indices = feature_indices
         self.n_jobs = n_jobs
 
+    def _disambiguate_probability(self, x):
+        return x[:, -1] if np.ndim(x) > 1 else x
+
     def fit(self, X, y):
         """Fit all estimators according to the given training data.
 
@@ -154,6 +159,8 @@ def fit(self, X, y):
         predictions_ = Parallel(n_jobs=self.n_jobs)(
             delayed(_predict_proba_estimator)(clf, x)
             for x, clf in zip(X_list, self.estimators))
+        predictions_ = [self._disambiguate_probability(x)
+                        for x in predictions_]
         predictions_ = np.array(predictions_).T
 
         self.stacking_estimator.fit(predictions_, y)
@@ -177,6 +184,8 @@ def predict(self, X):
         predictions_ = Parallel(n_jobs=self.n_jobs)(
             delayed(_predict_proba_estimator)(clf, x)
             for x, clf in zip(X_list, self.estimators))
+        predictions_ = [self._disambiguate_probability(x)
+                        for x in predictions_]
         predictions_ = np.array(predictions_).T
 
         return self.stacking_estimator.predict(predictions_)
@@ -199,12 +208,14 @@ def predict_proba(self, X):
         predictions_ = Parallel(n_jobs=self.n_jobs)(
             delayed(_predict_proba_estimator)(clf, x)
             for x, clf in zip(X_list, self.estimators))
+        predictions_ = [self._disambiguate_probability(x)
+                        for x in predictions_]
         predictions_ = np.array(predictions_).T
 
         return _predict_proba_estimator(self.stacking_estimator, predictions_)
 
     def decision_function(self, X):
-        return self.predict_proba(X)
+        return self.predict_proba(X)[:, -1]
 
     def score(self, X, y):
         """Returns the mean accuracy on the given test data and labels.
diff --git a/stlearn/tests/test_stacking.py b/stlearn/tests/test_stacking.py
@@ -67,6 +67,9 @@ def test_stacking_essentials():
     predictions = stacking.predict(X_stacked)
     assert_array_equal(np.unique(predictions), np.array([0, 1]))
 
+    proba = stacking.predict_proba(X_stacked)
+    assert_array_equal(proba.sum(1), np.ones_like(proba[:, 1]))
+
     score = stacking.score(X_stacked, y)
     assert_true(np.isscalar(score))