explain_prediction for sklearn linear classifiers: use predicted class

kmike · kmike · commit 499c2e464d50 · 2017-06-23T05:39:16.000+05:00
diff --git a/eli5/sklearn/explain_prediction.py b/eli5/sklearn/explain_prediction.py
@@ -198,11 +198,14 @@ def explain_prediction_linear_classifier(clf, doc,
             add_weighted_spans(doc, vec, vectorized, target_expl)
             res.targets.append(target_expl)
     else:
+        label_id = 1 if score > 0 else 0
+        scale = -1 if label_id == 0 else 1
+
         target_expl = TargetExplanation(
-            target=display_names[1][1],
-            feature_weights=_weights(0),
+            target=display_names[label_id][1],
+            feature_weights=_weights(0, scale=scale),
             score=score,
-            proba=proba[1] if proba is not None else None,
+            proba=proba[label_id] if proba is not None else None,
         )
         add_weighted_spans(doc, vec, vectorized, target_expl)
         res.targets.append(target_expl)
@@ -606,8 +609,8 @@ def _multiply(X, coef):
 def _linear_weights(clf, x, top, feature_names, flt_indices):
     """ Return top weights getter for label_id.
     """
-    def _weights(label_id):
-        coef = get_coef(clf, label_id)
+    def _weights(label_id, scale=1.0):
+        coef = get_coef(clf, label_id) * scale
         _x = x
         scores = _multiply(_x, coef)
         if flt_indices is not None:
diff --git a/tests/test_ipython.py b/tests/test_ipython.py
@@ -43,7 +43,7 @@ def test_show_prediction():
     html = eli5.show_prediction(clf, doc)
     write_html(clf, html.data, '')
     assert isinstance(html, HTML)
-    assert 'y=b' in html.data
+    assert 'y=a' in html.data
     assert 'BIAS' in html.data
     assert 'x1' in html.data
 
@@ -56,6 +56,14 @@ def test_show_prediction():
     # format_as_html arguments are supported
     html = eli5.show_prediction(clf, doc, show=['method'])
     write_html(clf, html.data, '')
-    assert 'y=b' not in html.data
+    assert 'y=a' not in html.data
     assert 'BIAS' not in html.data
     assert 'Explained as' in html.data
+
+    # top target is used
+    html = eli5.show_prediction(clf, np.array([1, 1]))
+    write_html(clf, html.data, '')
+    assert 'y=b' in html.data
+    assert 'BIAS' in html.data
+    assert 'x1' in html.data
+
diff --git a/tests/test_sklearn_explain_prediction.py b/tests/test_sklearn_explain_prediction.py
@@ -128,13 +128,23 @@ def assert_binary_linear_classifier_explained(newsgroups_train_binary, clf,
     X = vec.fit_transform(docs)
     clf.fit(X, y)
 
-    get_res = lambda **kwargs: explain_prediction(
-        clf, docs[2], vec=vec, target_names=target_names, top=20, **kwargs)
-    res = get_res()
-    pprint(res)
+    assert y[2] == 1
+    cg_document = docs[2]
+    res = explain_prediction(clf, cg_document, vec=vec,
+                             target_names=target_names, top=20)
     expl_text, expl_html = format_as_all(res, clf)
     for expl in [expl_text, expl_html]:
         assert 'software' in expl
+        assert target_names[1] in expl
+
+    assert y[15] == 0
+    atheism_document = docs[15]
+    res = explain_prediction(clf, atheism_document, vec=vec,
+                             target_names=target_names, top=20)
+    expl_text, expl_html = format_as_all(res, clf)
+    for expl in [expl_text, expl_html]:
+        assert 'god' in expl
+        assert target_names[0] in expl
 
 
 def assert_linear_regression_explained(boston_train, reg, explain_prediction,
@@ -288,6 +298,8 @@ def test_explain_linear(newsgroups_train, clf):
 
 @pytest.mark.parametrize(['clf'], [
     [LogisticRegression(random_state=42)],
+    [LogisticRegressionCV(random_state=42)],
+    [OneVsRestClassifier(LogisticRegression(random_state=42))],
     [SGDClassifier(random_state=42)],
     [SVC(kernel='linear', random_state=42)],
     [SVC(kernel='linear', random_state=42, decision_function_shape='ovr')],
diff --git a/tests/test_sklearn_vectorizers.py b/tests/test_sklearn_vectorizers.py
@@ -18,15 +18,15 @@
 from .utils import format_as_all, get_all_features, get_names_coefs, write_html
 
 
-def check_explain_linear_binary(res, clf):
+def check_explain_linear_binary(res, clf, target='alt.atheism'):
     expl_text, expl_html = format_as_all(res, clf)
     assert len(res.targets) == 1
     e = res.targets[0]
-    assert e.target == 'comp.graphics'
-    neg = get_all_features(e.feature_weights.neg)
-    assert 'objective' in neg
+    assert e.target == target
+    pos = get_all_features(e.feature_weights.pos)
+    assert 'objective' in pos
     for expl in [expl_text, expl_html]:
-        assert 'comp.graphics' in expl
+        assert target in expl
         assert 'objective' in expl
 
 
@@ -50,9 +50,9 @@ def test_explain_linear_binary(vec, newsgroups_train_binary):
         top=20, vectorized=True)
     if isinstance(vec, HashingVectorizer):
         # InvertableHashingVectorizer must be passed with vectorized=True
-        neg_weights = res_vectorized.targets[0].feature_weights.neg
-        neg_vectorized = get_all_features(neg_weights)
-        assert all(name.startswith('x') for name in neg_vectorized)
+        pos_weights = res_vectorized.targets[0].feature_weights.pos
+        pos_vectorized = get_all_features(pos_weights)
+        assert all(name.startswith('x') for name in pos_vectorized)
     else:
         assert res_vectorized == _without_weighted_spans(res)