diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 16ef76638ec5b..6e05c3ff0457a 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -456,28 +456,29 @@ columns: .. _visualization.box.return: -Basically, plot functions return :class:`matplotlib Axes ` as a return value. -In ``boxplot``, the return type can be changed by argument ``return_type``, and whether the subplots is enabled (``subplots=True`` in ``plot`` or ``by`` is specified in ``boxplot``). +.. warning:: -When ``subplots=False`` / ``by`` is ``None``: + The default changed from ``'dict'`` to ``'axes'`` in version 0.19.0. -* if ``return_type`` is ``'dict'``, a dictionary containing the :class:`matplotlib Lines ` is returned. The keys are "boxes", "caps", "fliers", "medians", and "whiskers". - This is the default of ``boxplot`` in historical reason. - Note that ``plot.box()`` returns ``Axes`` by default same as other plots. -* if ``return_type`` is ``'axes'``, a :class:`matplotlib Axes ` containing the boxplot is returned. -* if ``return_type`` is ``'both'`` a namedtuple containing the :class:`matplotlib Axes ` - and :class:`matplotlib Lines ` is returned +In ``boxplot``, the return type can be controlled by the ``return_type``, keyword. The valid choices are ``{"axes", "dict", "both", None}``. +Faceting, created by ``DataFrame.boxplot`` with the ``by`` +keyword, will affect the output type as well: -When ``subplots=True`` / ``by`` is some column of the DataFrame: +================ ======= ========================== +``return_type=`` Faceted Output type +---------------- ------- -------------------------- -* A dict of ``return_type`` is returned, where the keys are the columns - of the DataFrame. The plot has a facet for each column of - the DataFrame, with a separate box for each value of ``by``. +``None`` No axes +``None`` Yes 2-D ndarray of axes +``'axes'`` No axes +``'axes'`` Yes Series of axes +``'dict'`` No dict of artists +``'dict'`` Yes Series of dicts of artists +``'both'`` No namedtuple +``'both'`` Yes Series of namedtuples +================ ======= ========================== -Finally, when calling boxplot on a :class:`Groupby` object, a dict of ``return_type`` -is returned, where the keys are the same as the Groupby object. The plot has a -facet for each key, with each facet containing a box for each column of the -DataFrame. +``Groupby.boxplot`` always returns a Series of ``return_type``. .. ipython:: python :okwarning: diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index a422e667e32a7..f02367a49d44d 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -494,6 +494,7 @@ API changes - ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`) - Passing ``Period`` with multiple frequencies to normal ``Index`` now returns ``Index`` with ``object`` dtype (:issue:`13664`) - ``PeriodIndex.fillna`` with ``Period`` has different freq now coerces to ``object`` dtype (:issue:`13664`) +- Faceted boxplots from ``DataFrame.boxplot(by=col)`` now return a ``Series`` when ``return_type`` is not None. Previously these returned an ``OrderedDict``. Note that when ``return_type=None``, the default, these still return a 2-D NumPy array. (:issue:`12216`, :issue:`7096`) - More informative exceptions are passed through the csv parser. The exception type would now be the original exception type instead of ``CParserError``. (:issue:`13652`) - ``astype()`` will now accept a dict of column name to data types mapping as the ``dtype`` argument. (:issue:`12086`) - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) @@ -1282,9 +1283,9 @@ Removal of prior version deprecations/changes Now legacy time rules raises ``ValueError``. For the list of currently supported offsets, see :ref:`here ` +- The default value for the ``return_type`` parameter for ``DataFrame.plot.box`` and ``DataFrame.boxplot`` changed from ``None`` to ``"axes"``. These methods will now return a matplotlib axes by default instead of a dictionary of artists. See :ref:`here ` (:issue:`6581`). - The ``tquery`` and ``uquery`` functions in the ``pandas.io.sql`` module are removed (:issue:`5950`). - .. _whatsnew_0190.performance: Performance Improvements diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 7dcc3d6e5734f..9fe1d7cacd38f 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -5,8 +5,8 @@ import os import warnings -from pandas import DataFrame -from pandas.compat import zip, iteritems, OrderedDict +from pandas import DataFrame, Series +from pandas.compat import zip, iteritems from pandas.util.decorators import cache_readonly from pandas.types.api import is_list_like import pandas.util.testing as tm @@ -445,7 +445,8 @@ def _check_box_return_type(self, returned, return_type, expected_keys=None, self.assertIsInstance(r, Axes) return - self.assertTrue(isinstance(returned, OrderedDict)) + self.assertTrue(isinstance(returned, Series)) + self.assertEqual(sorted(returned.keys()), sorted(expected_keys)) for key, value in iteritems(returned): self.assertTrue(isinstance(value, types[return_type])) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index d499540827ab0..333792c5ffdb2 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -92,6 +92,12 @@ def test_boxplot_legacy(self): lines = list(itertools.chain.from_iterable(d.values())) self.assertEqual(len(ax.get_lines()), len(lines)) + @slow + def test_boxplot_return_type_none(self): + # GH 12216; return_type=None & by=None -> axes + result = self.hist_df.boxplot() + self.assertTrue(isinstance(result, self.plt.Axes)) + @slow def test_boxplot_return_type_legacy(self): # API change in https://github.com/pydata/pandas/pull/7096 @@ -103,10 +109,8 @@ def test_boxplot_return_type_legacy(self): with tm.assertRaises(ValueError): df.boxplot(return_type='NOTATYPE') - with tm.assert_produces_warning(FutureWarning): - result = df.boxplot() - # change to Axes in future - self._check_box_return_type(result, 'dict') + result = df.boxplot() + self._check_box_return_type(result, 'axes') with tm.assert_produces_warning(False): result = df.boxplot(return_type='dict') @@ -140,6 +144,7 @@ def _check_ax_limits(col, ax): p = df.boxplot(['height', 'weight', 'age'], by='category') height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0] dummy_ax = p[1, 1] + _check_ax_limits(df['height'], height_ax) _check_ax_limits(df['weight'], weight_ax) _check_ax_limits(df['age'], age_ax) @@ -163,8 +168,7 @@ def test_boxplot_legacy(self): grouped = self.hist_df.groupby(by='gender') with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') - self._check_axes_shape(list(axes.values()), axes_num=2, layout=(1, 2)) - + self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) @@ -175,7 +179,7 @@ def test_boxplot_legacy(self): grouped = df.groupby(level=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') - self._check_axes_shape(list(axes.values()), axes_num=10, layout=(4, 3)) + self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') @@ -184,8 +188,7 @@ def test_boxplot_legacy(self): grouped = df.unstack(level=1).groupby(level=0, axis=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') - self._check_axes_shape(list(axes.values()), axes_num=3, layout=(2, 2)) - + self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) @@ -226,8 +229,7 @@ def test_grouped_box_return_type(self): expected_keys=['height', 'weight', 'category']) # now for groupby - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.groupby('gender').boxplot() + result = df.groupby('gender').boxplot(return_type='dict') self._check_box_return_type( result, 'dict', expected_keys=['Male', 'Female']) @@ -347,7 +349,7 @@ def test_grouped_box_multiple_axes(self): with tm.assert_produces_warning(UserWarning): returned = df.boxplot(column=['height', 'weight', 'category'], by='gender', return_type='axes', ax=axes[0]) - returned = np.array(list(returned.values())) + returned = np.array(list(returned.values)) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) self.assert_numpy_array_equal(returned, axes[0]) self.assertIs(returned[0].figure, fig) @@ -357,7 +359,7 @@ def test_grouped_box_multiple_axes(self): returned = df.groupby('classroom').boxplot( column=['height', 'weight', 'category'], return_type='axes', ax=axes[1]) - returned = np.array(list(returned.values())) + returned = np.array(list(returned.values)) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) self.assert_numpy_array_equal(returned, axes[1]) self.assertIs(returned[0].figure, fig) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 91be0a7a73e35..4d0c1e9213b17 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1221,6 +1221,9 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type='axes') self._check_box_return_type(result, 'axes') + result = df.plot.box() # default axes + self._check_box_return_type(result, 'axes') + result = df.plot.box(return_type='both') self._check_box_return_type(result, 'both') @@ -1230,7 +1233,7 @@ def test_boxplot_subplots_return_type(self): # normal style: return_type=None result = df.plot.box(subplots=True) - self.assertIsInstance(result, np.ndarray) + self.assertIsInstance(result, Series) self._check_box_return_type(result, None, expected_keys=[ 'height', 'weight', 'category']) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 1abd11017dbfe..7fd0b1044f9d7 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -2247,7 +2247,7 @@ class BoxPlot(LinePlot): # namedtuple to hold results BP = namedtuple("Boxplot", ['ax', 'lines']) - def __init__(self, data, return_type=None, **kwargs): + def __init__(self, data, return_type='axes', **kwargs): # Do not call LinePlot.__init__ which may fill nan if return_type not in self._valid_return_types: raise ValueError( @@ -2266,7 +2266,7 @@ def _args_adjust(self): self.sharey = False @classmethod - def _plot(cls, ax, y, column_num=None, return_type=None, **kwds): + def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): if y.ndim == 2: y = [remove_na(v) for v in y] # Boxplot fails with empty arrays, so need to add a NaN @@ -2339,7 +2339,7 @@ def maybe_color_bp(self, bp): def _make_plot(self): if self.subplots: - self._return_obj = compat.OrderedDict() + self._return_obj = Series() for i, (label, y) in enumerate(self._iter_data()): ax = self._get_ax(i) @@ -2691,14 +2691,17 @@ def plot_series(data, kind='line', ax=None, # Series unique grid : Setting this to True will show the grid layout : tuple (optional) (rows, columns) for the layout of the plot - return_type : {'axes', 'dict', 'both'}, default 'dict' - The kind of object to return. 'dict' returns a dictionary - whose values are the matplotlib Lines of the boxplot; + return_type : {None, 'axes', 'dict', 'both'}, default None + The kind of object to return. The default is ``axes`` 'axes' returns the matplotlib axes the boxplot is drawn on; + 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot; 'both' returns a namedtuple with the axes and dict. - When grouping with ``by``, a dict mapping columns to ``return_type`` - is returned. + When grouping with ``by``, a Series mapping columns to ``return_type`` + is returned, unless ``return_type`` is None, in which case a NumPy + array of axes is returned with the same shape as ``layout``. + See the prose documentation for more. kwds : other plotting keyword arguments to be passed to matplotlib boxplot function @@ -2724,7 +2727,7 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None, # validate return_type: if return_type not in BoxPlot._valid_return_types: - raise ValueError("return_type must be {None, 'axes', 'dict', 'both'}") + raise ValueError("return_type must be {'axes', 'dict', 'both'}") from pandas import Series, DataFrame if isinstance(data, Series): @@ -2769,23 +2772,19 @@ def plot_group(keys, values, ax): columns = [column] if by is not None: + # Prefer array return type for 2-D plots to match the subplot layout + # https://github.com/pydata/pandas/pull/12216#issuecomment-241175580 result = _grouped_plot_by_column(plot_group, data, columns=columns, by=by, grid=grid, figsize=figsize, ax=ax, layout=layout, return_type=return_type) else: + if return_type is None: + return_type = 'axes' if layout is not None: raise ValueError("The 'layout' keyword is not supported when " "'by' is None") - if return_type is None: - msg = ("\nThe default value for 'return_type' will change to " - "'axes' in a future release.\n To use the future behavior " - "now, set return_type='axes'.\n To keep the previous " - "behavior and silence this warning, set " - "return_type='dict'.") - warnings.warn(msg, FutureWarning, stacklevel=3) - return_type = 'dict' if ax is None: ax = _gca() data = data._get_numeric_data() @@ -3104,12 +3103,12 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, figsize=figsize, layout=layout) axes = _flatten(axes) - ret = compat.OrderedDict() + ret = Series() for (key, group), ax in zip(grouped, axes): d = group.boxplot(ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds) ax.set_title(pprint_thing(key)) - ret[key] = d + ret.loc[key] = d fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: @@ -3175,7 +3174,9 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, _axes = _flatten(axes) - result = compat.OrderedDict() + result = Series() + ax_values = [] + for i, col in enumerate(columns): ax = _axes[i] gp_col = grouped[col] @@ -3183,9 +3184,11 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, re_plotf = plotf(keys, values, ax, **kwargs) ax.set_title(col) ax.set_xlabel(pprint_thing(by)) - result[col] = re_plotf + ax_values.append(re_plotf) ax.grid(grid) + result = Series(ax_values, index=columns) + # Return axes in multiplot case, maybe revisit later # 985 if return_type is None: result = axes diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f5a93d1f17d00..57bb01e5e0406 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -880,12 +880,12 @@ def assert_attr_equal(attr, left, right, obj='Attributes'): def assert_is_valid_plot_return_object(objs): import matplotlib.pyplot as plt - if isinstance(objs, np.ndarray): - for el in objs.flat: - assert isinstance(el, plt.Axes), ('one of \'objs\' is not a ' - 'matplotlib Axes instance, ' - 'type encountered {0!r}' - ''.format(el.__class__.__name__)) + if isinstance(objs, (pd.Series, np.ndarray)): + for el in objs.ravel(): + msg = ('one of \'objs\' is not a matplotlib Axes instance, ' + 'type encountered {0!r}') + assert isinstance(el, (plt.Axes, dict)), msg.format( + el.__class__.__name__) else: assert isinstance(objs, (plt.Artist, tuple, dict)), \ ('objs is neither an ndarray of Artist instances nor a '