Skip to content

Commit b37589a

Browse files
authored
ENH: Use lazy copy for dropna (#50429)
1 parent 7afbdf1 commit b37589a

File tree

4 files changed

+46
-4
lines changed

4 files changed

+46
-4
lines changed

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6346,7 +6346,7 @@ def dropna(
63466346
raise ValueError(f"invalid how option: {how}")
63476347

63486348
if np.all(mask):
6349-
result = self.copy()
6349+
result = self.copy(deep=None)
63506350
else:
63516351
result = self.loc(axis=axis)[mask]
63526352

pandas/core/internals/managers.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1955,9 +1955,17 @@ def _blklocs(self):
19551955
"""compat with BlockManager"""
19561956
return None
19571957

1958-
def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockManager:
1958+
def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager:
19591959
# similar to get_slice, but not restricted to slice indexer
19601960
blk = self._block
1961+
if (
1962+
using_copy_on_write()
1963+
and isinstance(indexer, np.ndarray)
1964+
and len(indexer) > 0
1965+
and com.is_bool_indexer(indexer)
1966+
and indexer.all()
1967+
):
1968+
return type(self)(blk, self.index, [weakref.ref(blk)], parent=self)
19611969
array = blk._slice(indexer)
19621970
if array.ndim > 1:
19631971
# This will be caught by Series._get_values

pandas/core/series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Any:
921921
"""
922922
return self._values[i]
923923

924-
def _slice(self, slobj: slice, axis: Axis = 0) -> Series:
924+
def _slice(self, slobj: slice | np.ndarray, axis: Axis = 0) -> Series:
925925
# axis kwarg is retained for compat with NDFrame method
926926
# _slice is *always* positional
927927
return self._get_values(slobj)
@@ -5583,7 +5583,7 @@ def dropna(
55835583
return result
55845584
else:
55855585
if not inplace:
5586-
return self.copy()
5586+
return self.copy(deep=None)
55875587
return None
55885588

55895589
# ----------------------------------------------------------------------

pandas/tests/copy_view/test_methods.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,40 @@ def test_add_suffix(using_copy_on_write):
503503
tm.assert_frame_equal(df, df_orig)
504504

505505

506+
@pytest.mark.parametrize("axis, val", [(0, 5.5), (1, np.nan)])
507+
def test_dropna(using_copy_on_write, axis, val):
508+
df = DataFrame({"a": [1, 2, 3], "b": [4, val, 6], "c": "d"})
509+
df_orig = df.copy()
510+
df2 = df.dropna(axis=axis)
511+
512+
if using_copy_on_write:
513+
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
514+
else:
515+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
516+
517+
df2.iloc[0, 0] = 0
518+
if using_copy_on_write:
519+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
520+
tm.assert_frame_equal(df, df_orig)
521+
522+
523+
@pytest.mark.parametrize("val", [5, 5.5])
524+
def test_dropna_series(using_copy_on_write, val):
525+
ser = Series([1, val, 4])
526+
ser_orig = ser.copy()
527+
ser2 = ser.dropna()
528+
529+
if using_copy_on_write:
530+
assert np.shares_memory(ser2.values, ser.values)
531+
else:
532+
assert not np.shares_memory(ser2.values, ser.values)
533+
534+
ser2.iloc[0] = 0
535+
if using_copy_on_write:
536+
assert not np.shares_memory(ser2.values, ser.values)
537+
tm.assert_series_equal(ser, ser_orig)
538+
539+
506540
@pytest.mark.parametrize(
507541
"method",
508542
[

0 commit comments

Comments
 (0)