From 910207ffe518413e84cfa95d772cb66d57a0d08e Mon Sep 17 00:00:00 2001 From: Michael Gasvoda Date: Mon, 21 Aug 2017 19:51:18 -0400 Subject: [PATCH] BUG: clip should handle null values closes #17276 Author: Michael Gasvoda Author: mgasvoda Closes #17288 from mgasvoda/master and squashes the following commits: a1dbdf293 [mgasvoda] Merge branch 'master' into master 9333952c2 [Michael Gasvoda] Checking output of tests 4e0464eaf [Michael Gasvoda] fixing whatsnew text c44204080 [Michael Gasvoda] formatting fixes 7e2367879 [Michael Gasvoda] formatting updates 781ea724a [Michael Gasvoda] whatsnew entry d9627fe4c [Michael Gasvoda] adding clip tests 9aa0159e9 [Michael Gasvoda] Treating na values as none for clips --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/generic.py | 12 ++++++++---- pandas/tests/frame/test_analytics.py | 26 ++++++++++---------------- pandas/tests/series/test_analytics.py | 11 +++++++++++ 4 files changed, 30 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c5fe89282bf52..0d2c52c70b345 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -132,7 +132,6 @@ Other Enhancements - .. _whatsnew_0210.api_breaking: Backwards incompatible API changes @@ -384,6 +383,7 @@ Reshaping Numeric ^^^^^^^ - Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) +- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as ``None`` instead of raising ``ValueError`` (:issue:`17276`). Categorical diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c83b1073afc8e..5c9e1f22ddd20 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4741,9 +4741,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if axis is not None: axis = self._get_axis_number(axis) - if np.any(isna(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - # method is self.le for upper bound and self.ge for lower bound if is_scalar(threshold) and is_number(threshold): if method.__name__ == 'le': @@ -4823,6 +4820,14 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, axis = nv.validate_clip_with_axis(axis, args, kwargs) + # GH 17276 + # numpy doesn't like NaN as a clip value + # so ignore + if np.any(pd.isnull(lower)): + lower = None + if np.any(pd.isnull(upper)): + upper = None + # GH 2747 (arguments were reversed) if lower is not None and upper is not None: if is_scalar(lower) and is_scalar(upper): @@ -4839,7 +4844,6 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, if upper is not None: if inplace: result = self - result = result.clip_upper(upper, axis, inplace=inplace) return result diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 484a09f11b58a..93514a8a42215 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1931,22 +1931,16 @@ def test_clip_against_frame(self, axis): tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) tm.assert_frame_equal(clipped_df[mask], df[mask]) - def test_clip_na(self): - msg = "Cannot use an NA" - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=np.nan) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=[np.nan]) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(upper=np.nan) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(upper=[np.nan]) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=np.nan, upper=np.nan) + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH # 17276 + tm.assert_frame_equal(self.frame.clip(np.nan), self.frame) + tm.assert_frame_equal(self.frame.clip(upper=[1, 2, np.nan]), + self.frame) + tm.assert_frame_equal(self.frame.clip(lower=[1, np.nan, 3]), + self.frame) + tm.assert_frame_equal(self.frame.clip(upper=np.nan, lower=np.nan), + self.frame) # Matrix-like diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 44da0968d7024..f1d044f7a1132 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1000,6 +1000,17 @@ def test_clip_types_and_nulls(self): assert list(isna(s)) == list(isna(l)) assert list(isna(s)) == list(isna(u)) + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH # 17276 + s = Series([1, 2, 3]) + + assert_series_equal(s.clip(np.nan), Series([1, 2, 3])) + assert_series_equal(s.clip(upper=[1, 1, np.nan]), Series([1, 2, 3])) + assert_series_equal(s.clip(lower=[1, np.nan, 1]), Series([1, 2, 3])) + assert_series_equal(s.clip(upper=np.nan, lower=np.nan), + Series([1, 2, 3])) + def test_clip_against_series(self): # GH #6966