Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Remove SettingWithCopyWarning #56614

Merged
merged 13 commits into from
Feb 4, 2024
6 changes: 1 addition & 5 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,7 @@


def using_copy_on_write() -> bool:
_mode_options = _global_config["mode"]
return (
_mode_options["copy_on_write"] is True
and _mode_options["data_manager"] == "block"
)
return True


def warn_copy_on_write() -> bool:
Expand Down
5 changes: 1 addition & 4 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1890,10 +1890,7 @@ def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return (
pd.options.mode.copy_on_write is True
and _get_option("mode.data_manager", silent=True) == "block"
)
return True


@pytest.fixture
Expand Down
98 changes: 12 additions & 86 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1456,12 +1456,8 @@ def style(self) -> Styler:

@Appender(_shared_docs["items"])
def items(self) -> Iterable[tuple[Hashable, Series]]:
if self.columns.is_unique and hasattr(self, "_item_cache"):
for k in self.columns:
yield k, self._get_item_cache(k)
else:
for i, k in enumerate(self.columns):
yield k, self._ixs(i, axis=1)
for i, k in enumerate(self.columns):
yield k, self._ixs(i, axis=1)

def iterrows(self) -> Iterable[tuple[Hashable, Series]]:
"""
Expand Down Expand Up @@ -3956,24 +3952,14 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series:
if axis == 0:
new_mgr = self._mgr.fast_xs(i)

# if we are a copy, mark as such
copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None
result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes)
result._name = self.index[i]
result = result.__finalize__(self)
result._set_is_copy(self, copy=copy)
return result
return result.__finalize__(self)

# icol
else:
label = self.columns[i]

col_mgr = self._mgr.iget(i)
result = self._box_col_values(col_mgr, i)

# this is a cached value, mark it so
result._set_as_cached(label, self)
return result
return self._box_col_values(col_mgr, i)

def _get_column_array(self, i: int) -> ArrayLike:
"""
Expand Down Expand Up @@ -4034,7 +4020,7 @@ def __getitem__(self, key):
and key in self.columns
or key in self.columns.drop_duplicates(keep=False)
):
return self._get_item_cache(key)
return self._get_item(key)

elif is_mi and self.columns.is_unique and key in self.columns:
return self._getitem_multilevel(key)
Expand Down Expand Up @@ -4073,7 +4059,7 @@ def __getitem__(self, key):
if isinstance(indexer, slice):
return self._slice(indexer, axis=1)

data = self._take_with_is_copy(indexer, axis=1)
data = self.take(indexer, axis=1)

if is_single_key:
# What does looking for a single key in a non-unique index return?
Expand All @@ -4082,7 +4068,7 @@ def __getitem__(self, key):
# - we have a MultiIndex on columns (test on self.columns, #21309)
if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex):
# GH#26490 using data[key] can cause RecursionError
return data._get_item_cache(key)
return data._get_item(key)

return data

Expand Down Expand Up @@ -4111,7 +4097,7 @@ def _getitem_bool_array(self, key):
return self.copy(deep=None)

indexer = key.nonzero()[0]
return self._take_with_is_copy(indexer, axis=0)
return self.take(indexer, axis=0)

def _getitem_multilevel(self, key):
# self.columns is a MultiIndex
Expand Down Expand Up @@ -4141,7 +4127,6 @@ def _getitem_multilevel(self, key):
result, index=self.index, name=key
)

result._set_is_copy(self)
return result
else:
# loc is neither a slice nor ndarray, so must be an int
Expand Down Expand Up @@ -4170,7 +4155,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
series = self._ixs(col, axis=1)
return series._values[index]

series = self._get_item_cache(col)
series = self[col]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self._get_item(col)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm yeah this should work, thx

engine = self.index._engine

if not isinstance(self.index, MultiIndex):
Expand Down Expand Up @@ -4273,7 +4258,6 @@ def _setitem_slice(self, key: slice, value) -> None:
# NB: we can't just use self.loc[key] = value because that
# operates on labels and we need to operate positional for
# backwards-compat, xref GH#31469
self._check_setitem_copy()
self.iloc[key] = value

def _setitem_array(self, key, value):
Expand All @@ -4286,7 +4270,6 @@ def _setitem_array(self, key, value):
)
key = check_bool_indexer(self.index, key)
indexer = key.nonzero()[0]
self._check_setitem_copy()
if isinstance(value, DataFrame):
# GH#39931 reindex since iloc does not align
value = value.reindex(self.index.take(indexer))
Expand Down Expand Up @@ -4373,7 +4356,6 @@ def _setitem_frame(self, key, value):
"Must pass DataFrame or 2-d ndarray with boolean values only"
)

self._check_setitem_copy()
self._where(-key, value, inplace=True)

def _set_item_frame_value(self, key, value: DataFrame) -> None:
Expand Down Expand Up @@ -4435,7 +4417,6 @@ def _iset_item_mgr(
) -> None:
# when called from _set_item_mgr loc can be anything returned from get_loc
self._mgr.iset(loc, value, inplace=inplace, refs=refs)
self._clear_item_cache()

def _set_item_mgr(
self, key, value: ArrayLike, refs: BlockValuesRefs | None = None
Expand All @@ -4448,12 +4429,6 @@ def _set_item_mgr(
else:
self._iset_item_mgr(loc, value, refs=refs)

# check if we are modifying a copy
# try to set first as we want an invalid
# value exception to occur first
if len(self):
self._check_setitem_copy()

def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None:
# We are only called from _replace_columnwise which guarantees that
# no reindex is necessary
Expand All @@ -4464,12 +4439,6 @@ def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None:
else:
self._iset_item_mgr(loc, value._values.copy(), inplace=True)

# check if we are modifying a copy
# try to set first as we want an invalid
# value exception to occur first
if len(self):
self._check_setitem_copy()

def _set_item(self, key, value) -> None:
"""
Add series to DataFrame in specified column.
Expand Down Expand Up @@ -4520,7 +4489,6 @@ def _set_value(
icol = self.columns.get_loc(col)
iindex = self.index.get_loc(index)
self._mgr.column_setitem(icol, iindex, value, inplace_only=True)
self._clear_item_cache()

except (KeyError, TypeError, ValueError, LossySetitemError):
# get_loc might raise a KeyError for missing labels (falling back
Expand All @@ -4532,7 +4500,6 @@ def _set_value(
self.iloc[index, col] = value
else:
self.loc[index, col] = value
self._item_cache.pop(col, None)

except InvalidIndexError as ii_err:
# GH48729: Seems like you are trying to assign a value to a
Expand Down Expand Up @@ -4576,50 +4543,9 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
obj._name = name
return obj.__finalize__(self)

# ----------------------------------------------------------------------
# Lookup Caching

def _clear_item_cache(self) -> None:
self._item_cache.clear()

def _get_item_cache(self, item: Hashable) -> Series:
"""Return the cached item, item represents a label indexer."""
if using_copy_on_write() or warn_copy_on_write():
loc = self.columns.get_loc(item)
return self._ixs(loc, axis=1)

cache = self._item_cache
res = cache.get(item)
if res is None:
# All places that call _get_item_cache have unique columns,
# pending resolution of GH#33047

loc = self.columns.get_loc(item)
res = self._ixs(loc, axis=1)

cache[item] = res

# for a chain
res._is_copy = self._is_copy
return res

def _reset_cacher(self) -> None:
# no-op for DataFrame
pass

def _maybe_cache_changed(self, item, value: Series, inplace: bool) -> None:
"""
The object has called back to us saying maybe it has changed.
"""
loc = self._info_axis.get_loc(item)
arraylike = value._values

old = self._ixs(loc, axis=1)
if old._values is value._values and inplace:
# GH#46149 avoid making unnecessary copies/block-splitting
return

self._mgr.iset(loc, arraylike, inplace=inplace)
def _get_item(self, item: Hashable) -> Series:
loc = self.columns.get_loc(item)
return self._ixs(loc, axis=1)

# ----------------------------------------------------------------------
# Unsorted
Expand Down
Loading
Loading