From 8d462ed2427ed2de826e67e7a47fc2e80256411e Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Wed, 7 Mar 2018 14:32:12 +0000 Subject: [PATCH] EHN: Implement method argument for DataFrame.replace (#19894) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/generic.py | 9 +++++++- pandas/tests/frame/test_replace.py | 37 ++++++++++++++++++++++++++---- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 79d85513efa26..feca90aae6237 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -338,6 +338,7 @@ Other Enhancements - For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) - :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) - Added option ``display.html.use_mathjax`` so `MathJax `_ can be disabled when rendering tables in ``Jupyter`` notebooks (:issue:`19856`, :issue:`19824`) +- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) - :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) - :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e34fe606be759..a893b2ba1a189 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4886,7 +4886,10 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None): ``to_replace`` must be ``None``. method : string, optional, {'pad', 'ffill', 'bfill'} The method to use when for replacement, when ``to_replace`` is a - ``list``. + scalar, list or tuple and ``value`` is None. + + .. versionchanged:: 0.23.0 + Added to DataFrame See Also -------- @@ -5055,6 +5058,10 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, to_replace = [to_replace] if isinstance(to_replace, (tuple, list)): + if isinstance(self, pd.DataFrame): + return self.apply(_single_replace, + args=(to_replace, method, inplace, + limit)) return _single_replace(self, to_replace, method, inplace, limit) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index fbc4accd0e41e..dd83a94b7062a 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -33,9 +33,6 @@ def test_replace_inplace(self): tsframe.replace(nan, 0, inplace=True) assert_frame_equal(tsframe, self.tsframe.fillna(0)) - pytest.raises(TypeError, self.tsframe.replace, nan, inplace=True) - pytest.raises(TypeError, self.tsframe.replace, nan) - # mixed type mf = self.mixed_frame mf.iloc[5:20, mf.columns.get_loc('foo')] = nan @@ -720,7 +717,6 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self): assert_frame_equal(expected, result) def test_replace_value_is_none(self): - pytest.raises(TypeError, self.tsframe.replace, nan) orig_value = self.tsframe.iloc[0, 0] orig2 = self.tsframe.iloc[1, 0] @@ -1072,3 +1068,36 @@ def test_replace_with_empty_dictlike(self): assert_frame_equal(df, df.replace({'b': {}})) assert_frame_equal(df, df.replace(Series({'b': {}}))) + + @pytest.mark.parametrize("to_replace, method, expected", [ + (0, 'bfill', {'A': [1, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + (nan, 'bfill', {'A': [0, 1, 2], + 'B': [5.0, 7.0, 7.0], + 'C': ['a', 'b', 'c']}), + ('d', 'ffill', {'A': [0, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + ([0, 2], 'bfill', {'A': [1, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + ([1, 2], 'pad', {'A': [0, 0, 0], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + ((1, 2), 'bfill', {'A': [0, 2, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + (['b', 'c'], 'ffill', {'A': [0, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'a', 'a']}), + ]) + def test_replace_method(self, to_replace, method, expected): + # GH 19632 + df = DataFrame({'A': [0, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}) + + result = df.replace(to_replace=to_replace, value=None, method=method) + expected = DataFrame(expected) + assert_frame_equal(result, expected)