better IPython support: eli5.show_weights and eli5.show_prediction

kmike · kmike · commit 8bac562442af · 2016-11-23T18:50:18.000+05:00
diff --git a/docs/source/autodocs/eli5.rst b/docs/source/autodocs/eli5.rst
@@ -10,3 +10,6 @@ The following functions are exposed to a top level, e.g.
 
 .. autofunction:: eli5.explain_prediction
 
+.. autofunction:: eli5.show_weights
+
+.. autofunction:: eli5.show_prediction
diff --git a/eli5/__init__.py b/eli5/__init__.py
@@ -6,6 +6,14 @@
 from .formatters import format_as_html, format_html_styles, format_as_text
 from .explain import explain_weights, explain_prediction
 from .sklearn import explain_weights_sklearn, explain_prediction_sklearn
+
+
+try:
+    from .ipython import show_weights, show_prediction
+except ImportError:
+    pass  # IPython is not installed
+
+
 try:
     from .lightning import (
         explain_prediction_lightning,
@@ -15,6 +23,7 @@
     # lightning is not available
     pass
 
+
 try:
     from .sklearn_crfsuite import (
         explain_weights_sklearn_crfsuite
diff --git a/eli5/explain.py b/eli5/explain.py
@@ -62,8 +62,12 @@ def explain_weights(estimator, **kwargs):
     Explanation
         :class:`~Explanation` result. Use one of the formatting functions from
         :mod:`eli5.formatters` to print it in a human-readable form.
-        Explanation instances also have repr which works well with
-        IPython notebook.
+
+        Explanation instances have repr which works well with
+        IPython notebook, but it can be a better idea to use
+        :func:`eli5.show_weights` instead of :func:`eli5.explain_weights`
+        if you work with IPython: :func:`eli5.show_weights` allows to customize
+        formatting without a need to import :mod:`eli5.formatters` functions.
     """
     return Explanation(
         estimator=repr(estimator),
@@ -128,8 +132,13 @@ def explain_prediction(estimator, doc, **kwargs):
     Explanation
         :class:`~.Explanation` result. Use one of the formatting functions from
         :mod:`eli5.formatters` to print it in a human-readable form.
-        Explanation instances also have repr which works well with
-        IPython notebook.
+
+        Explanation instances have repr which works well with
+        IPython notebook, but it can be a better idea to use
+        :func:`eli5.show_prediction` instead of :func:`eli5.explain_prediction`
+        if you work with IPython: :func:`eli5.show_prediction` allows to
+        customize formatting without a need to import :mod:`eli5.formatters`
+        functions.
     """
     return Explanation(
         estimator=repr(estimator),
diff --git a/eli5/ipython.py b/eli5/ipython.py
@@ -0,0 +1,224 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from IPython.display import HTML
+
+from .explain import explain_weights, explain_prediction
+from .formatters import format_as_html, fields
+
+
+FORMAT_KWARGS = {'include_styles', 'force_weights',
+                 'show', 'preserve_density',
+                 'highlight_spaces', 'horizontal_layout'}
+
+
+def show_weights(estimator, **kwargs):
+    """ Return an explanation of estimator parameters (weights)
+    as an IPython.display.HTML object. Use this function
+    to show classifier weights in IPython.
+
+    :func:`show_weights` accepts all
+    :func:`eli5.explain_weights` arguments and all
+    :func:`eli5.formatters.html.format_as_html`
+    keyword arguments, so it is possible to get explanation and
+    customize formatting in a single call.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator instance. This argument must be positional.
+
+    top : int or (int, int) tuple, optional
+        Number of features to show. When ``top`` is int, ``top`` features with
+        a highest absolute values are shown. When it is (pos, neg) tuple,
+        no more than ``pos`` positive features and no more than ``neg``
+        negative features is shown. ``None`` value means no limit.
+
+        This argument may be supported or not, depending on estimator type.
+
+    target_names : list[str] or {'old_name': 'new_name'} dict, optional
+        Names of targets or classes. This argument can be used to provide
+        human-readable class/target names for estimators which don't expose
+        clss names themselves. It can be also used to rename estimator-provided
+        classes before displaying them.
+
+        This argument may be supported or not, depending on estimator type.
+
+    targets : list, optional
+        Order of class/target names to show. This argument can be also used
+        to show information only for a subset of classes. It should be a list
+        of class / target names which match either names provided by
+        an estimator or names defined in ``target_names`` parameter.
+
+        This argument may be supported or not, depending on estimator type.
+
+    feature_names : list, optional
+        A list of feature names. It allows to specify feature
+        names when they are not provided by an estimator object.
+
+        This argument may be supported or not, depending on estimator type.
+
+    feature_re : str, optional
+        Only feature names which match ``feature_re`` regex are returned.
+
+    show : List[str], optional
+        List of sections to show. Allowed values:
+
+        * 'targets' - per-target feature weights;
+        * 'transition_features' - transition features of a CRF model;
+        * 'feature_importances' - feature importances of a decision tree or
+          an ensemble-based estimator;
+        * 'decision_tree' - decision tree in a graphical form;
+        * 'method' - a string with explanation method;
+        * 'description' - description of explanation method and its caveats.
+
+    horizontal_layout : bool
+        When True, feature weight tables are printed horizontally
+        (left to right); when False, feature weight tables are printed
+        vertically (top to down). Default is True.
+
+    highlight_spaces : bool or None, optional
+        Whether to highlight spaces in feature names. This is useful if
+        you work with text and have ngram features which may include spaces
+        at left or right. Default is None, meaning that the value used
+        is set automatically based on vectorizer and feature values.
+
+    include_styles : bool
+        Most styles are inline, but some are included separately in <style> tag;
+        you can omit them by passing ``include_styles=False``. Default is True.
+
+    **kwargs: dict
+        Keyword arguments. All keyword arguments are passed to
+        concrete explain_weights... implementations.
+
+    Returns
+    -------
+    IPython.display.HTML
+        The result is printed in IPython notebook as an HTML widget.
+        If you need to display several explanations as an output of a single
+        cell, or if you want to display it from a function then use
+        IPython.display.display::
+
+            from IPython.display import display
+            display(eli5.show_weights(clf1))
+            display(eli5.show_weights(clf2))
+
+    """
+    format_kwargs, explain_kwargs = _split_kwargs(kwargs)
+    expl = explain_weights(estimator, **explain_kwargs)
+    html = format_as_html(expl, **format_kwargs)
+    return HTML(html)
+
+
+def show_prediction(estimator, doc, **kwargs):
+    """ Return an explanation of estimator prediction
+    as an IPython.display.HTML object. Use this function
+    to show information about classifier prediction in IPython.
+
+    :func:`show_prediction` accepts all
+    :func:`eli5.explain_prediction` arguments and all
+    :func:`eli5.formatters.html.format_as_html`
+    keyword arguments, so it is possible to get explanation and
+    customize formatting in a single call.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator instance. This argument must be positional.
+
+    doc : object
+        Example to run estimator on. Estimator makes a prediction for this
+        example, and :func:`show_prediction` tries to show information
+        about this prediction.
+
+    top : int or (int, int) tuple, optional
+        Number of features to show. When ``top`` is int, ``top`` features with
+        a highest absolute values are shown. When it is (pos, neg) tuple,
+        no more than ``pos`` positive features and no more than ``neg``
+        negative features is shown. ``None`` value means no limit (default).
+
+        This argument may be supported or not, depending on estimator type.
+
+    target_names : list[str] or {'old_name': 'new_name'} dict, optional
+        Names of targets or classes. This argument can be used to provide
+        human-readable class/target names for estimators which don't expose
+        clss names themselves. It can be also used to rename estimator-provided
+        classes before displaying them.
+
+        This argument may be supported or not, depending on estimator type.
+
+    targets : list, optional
+        Order of class/target names to show. This argument can be also used
+        to show information only for a subset of classes. It should be a list
+        of class / target names which match either names provided by
+        an estimator or names defined in ``target_names`` parameter.
+
+        This argument may be supported or not, depending on estimator type.
+
+    feature_names : list, optional
+        A list of feature names. It allows to specify feature
+        names when they are not provided by an estimator object.
+
+        This argument may be supported or not, depending on estimator type.
+
+    horizontal_layout : bool
+        When True, feature weight tables are printed horizontally
+        (left to right); when False, feature weight tables are printed
+        vertically (top to down). Default is True.
+
+    highlight_spaces : bool or None, optional
+        Whether to highlight spaces in feature names. This is useful if
+        you work with text and have ngram features which may include spaces
+        at left or right. Default is None, meaning that the value used
+        is set automatically based on vectorizer and feature values.
+
+    include_styles : bool
+        Most styles are inline, but some are included separately in <style> tag;
+        you can omit them by passing ``include_styles=False``. Default is True.
+
+    force_weights : bool
+        When True, a table with feature weights is displayed even if all
+        features are already highlighted in text. Default is False.
+
+    preserve_density: bool or None
+        This argument currently only makes sense when used with text data
+        and vectorizers from scikit-learn.
+
+        If preserve_density is True, then color for longer fragments will be
+        less intensive than for shorter fragments, so that "sum" of intensities
+        will correspond to feature weight.
+
+        If preserve_density is None, then it's value is chosen depending on
+        analyzer kind: it is preserved for "char" and "char_wb" analyzers,
+        and not preserved for "word" analyzers.
+
+        Default is None.
+
+    **kwargs: dict
+        Keyword arguments. All keyword arguments are passed to
+        concrete explain_prediction... implementations.
+
+    Returns
+    -------
+    IPython.display.HTML
+        The result is printed in IPython notebook as an HTML widget.
+        If you need to display several explanations as an output of a single
+        cell, or if you want to display it from a function then use
+        IPython.display.display::
+
+            from IPython.display import display
+            display(eli5.show_weights(clf1))
+            display(eli5.show_weights(clf2))
+    """
+    format_kwargs, explain_kwargs = _split_kwargs(kwargs)
+    expl = explain_prediction(estimator, doc, **explain_kwargs)
+    html = format_as_html(expl, **format_kwargs)
+    return HTML(html)
+
+
+def _split_kwargs(kwargs):
+    format_kwargs = {k: v for k, v in kwargs.items() if k in FORMAT_KWARGS}
+    format_kwargs.setdefault('show', fields.WEIGHTS)
+    format_kwargs.setdefault('force_weights', False)
+    explain_kwargs = {k: v for k, v in kwargs.items() if k not in FORMAT_KWARGS}
+    return format_kwargs, explain_kwargs
diff --git a/tests/test_ipython.py b/tests/test_ipython.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+import pytest
+pytest.importorskip('IPython')
+
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from IPython.display import HTML
+
+import eli5
+from .utils import write_html
+
+
+def test_show_weights():
+    clf = LogisticRegression()
+    X = [[0, 0], [1, 1], [0, 1]]
+    y = ['a', 'b', 'a']
+    clf.fit(X, y)
+
+    html = eli5.show_weights(clf)
+    # write_html(clf, html.data, '')
+    assert isinstance(html, HTML)
+    assert 'y=b' in html.data
+    assert 'Explained as' not in html.data
+
+    # explain_weights arguments are supported
+    html = eli5.show_weights(clf, target_names=['A', 'B'])
+    assert 'y=B' in html.data
+
+    # format_as_html arguments are supported
+    html = eli5.show_weights(clf, show=['method'])
+    assert 'y=b' not in html.data
+    assert 'Explained as' in html.data
+
+
+def test_show_prediction():
+    clf = LogisticRegression(C=100)
+    X = [[0, 0], [1, 1], [0, 1]]
+    y = ['a', 'b', 'a']
+    clf.fit(X, y)
+
+    doc = np.array([0, 1])
+
+    html = eli5.show_prediction(clf, doc)
+    write_html(clf, html.data, '')
+    assert isinstance(html, HTML)
+    assert 'y=b' in html.data
+    assert 'BIAS' in html.data
+    assert 'x1' in html.data
+
+    # explain_prediction arguments are supported
+    html = eli5.show_prediction(clf, doc, feature_names=['foo', 'bar'])
+    write_html(clf, html.data, '')
+    assert 'x1' not in html.data
+    assert 'bar' in html.data
+
+    # format_as_html arguments are supported
+    html = eli5.show_prediction(clf, doc, show=['method'])
+    write_html(clf, html.data, '')
+    assert 'y=b' not in html.data
+    assert 'BIAS' not in html.data
+    assert 'Explained as' in html.data
diff --git a/tox.ini b/tox.ini
@@ -18,6 +18,7 @@ deps=
 deps=
     {[base]deps}
     sklearn-crfsuite
+    ipython
 
 commands=
     ; to install lightning numpy must be installed first