checkpoint passing

pandas-dev · jbrockmendel · Dec 28, 2020 · Jan 6, 2021 · Jan 8, 2021 · Jan 8, 2021
commit df9d87fe8a172eebeb45434522f46d412534b88c
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4950,12 +4950,27 @@ def _replace_columnwise(
                 target, value = mapping[ax[i]]
                 newobj = ser.replace(target, value, regex=regex)
 
-                res.iloc[:, i] = newobj
+                res._isetitem(i, newobj)
 
         if inplace:
             return
         return res.__finalize__(self)
 
+    def _isetitem(self, loc: int, value):
+        cols = self.columns
+        if cols.is_unique:
+            col = cols[loc]
+            self[col] = value
+            return
+
+        # Otherwise we temporarily pin unique columns and call __setitem__
+        newcols = Index(range(len(cols)))
+        try:
+            self.columns = newcols
+            self[loc] = value
+        finally:
+            self.columns = cols
+
     @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"])
     def shift(
         self,

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
@@ -339,6 +339,8 @@ def length_of_indexer(indexer, target=None) -> int:
             # GH#25774
             return indexer.sum()
         return len(indexer)
+    elif isinstance(indexer, range):
+        return (indexer.stop - indexer.start) // indexer.step
     elif not is_list_like_indexer(indexer):
         return 1
     raise AssertionError("cannot find the length of the indexer")

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1676,6 +1676,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
         # Above we only set take_split_path to True for 2D cases
         assert self.ndim == 2
 
+        orig = indexer
         if not isinstance(indexer, tuple):
             indexer = _tuplify(self.ndim, indexer)
         if len(indexer) > self.ndim:
@@ -1689,8 +1690,20 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
 
         info_idx = indexer[1]
         pi = indexer[0]
+        if (
+            isinstance(pi, ABCDataFrame)
+            and orig is pi
+            and hasattr(self.obj._mgr, "blocks")
+            and len(self.obj._mgr.blocks) == 1
+        ):
+            # FIXME: kludge
+            return self._setitem_single_block(orig, value, name)
 
-        if com.is_null_slice(info_idx) and is_scalar(value):
+        if (
+            com.is_null_slice(info_idx)
+            and is_scalar(value)
+            and not isinstance(pi, ABCDataFrame)
+        ):
             # We can go directly through BlockManager.setitem without worrying
             #  about alignment.
             # TODO: do we need to do some kind of copy_with_setting check?
@@ -1734,7 +1747,8 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
 
             elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
                 # We are setting multiple rows in a single column.
-                self._setitem_single_column(ilocs[0], value, pi)
+                self._setitem_iat_loc(ilocs[0], pi, value)
+                # self._setitem_single_column(ilocs[0], value, pi)
 
             elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
                 # We are trying to set N values into M entries of a single
@@ -1758,7 +1772,8 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
             elif len(ilocs) == len(value):
                 # We are setting multiple columns in a single row.
                 for loc, v in zip(ilocs, value):
-                    self._setitem_single_column(loc, v, pi)
+                    self._setitem_iat_loc(loc, pi, v)
+                    # self._setitem_single_column(loc, v, pi)
 
             elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
                 # This is a setitem-with-expansion, see
@@ -1796,6 +1811,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value):
 
         for i, loc in enumerate(ilocs):
             # setting with a list, re-coerces
+            # self._setitem_iat_loc(loc, pi, value[:, i].tolist())
             self._setitem_single_column(loc, value[:, i].tolist(), pi)
 
     def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
@@ -1812,7 +1828,8 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
         if name == "iloc":
             for i, loc in enumerate(ilocs):
                 val = value.iloc[:, i]
-                self._setitem_single_column(loc, val, pi)
+                self._setitem_iat_loc(loc, pi, val)
+                # self._setitem_single_column(loc, val, pi)
 
         elif not unique_cols and value.columns.equals(self.obj.columns):
             # We assume we are already aligned, see
@@ -1829,7 +1846,8 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
                 else:
                     val = np.nan
 
-                self._setitem_single_column(loc, val, pi)
+                self._setitem_iat_loc(loc, pi, val)
+                # self._setitem_single_column(loc, val, pi)
 
         elif not unique_cols:
             raise ValueError("Setting with non-unique columns is not allowed.")
@@ -1848,7 +1866,8 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
                 else:
                     val = np.nan
 
-                self._setitem_single_column(loc, val, pi)
+                self._setitem_iat_loc(loc, pi, val)
+                # self._setitem_single_column(loc, val, pi)
 
     def _setitem_single_column(self, loc: int, value, plane_indexer):
         """
@@ -1882,6 +1901,29 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         # reset the sliced object if unique
         self.obj._iset_item(loc, ser)
 
+    def _setitem_iat_loc(self, loc: int, pi, value):
+        # TODO: likely a BM method?
+        mgr = self.obj._mgr
+        blkno = mgr.blknos[loc]
+        blkloc = mgr.blklocs[loc]
+        blk = mgr.blocks[blkno]
+        assert blk.mgr_locs[blkloc] == loc
+
+        if blk._can_hold_element(value):
+            # NB: we are assuming here that _can_hold_element is accurate
+            # TODO: do we need to do some kind of copy_with_setting check?
+            try:
+                self.obj._check_is_chained_assignment_possible()
+                blk.setitem_inplace((pi, blkloc), value)
+                self.obj._maybe_update_cacher(clear=True)
+            except ValueError:
+                if blk.is_extension:
+                    # FIXME: kludge bc _can_hold_element is wrong for EABLock
+                    return self._setitem_single_column(loc, value, pi)
+                raise
+        else:
+            self._setitem_single_column(loc, value, pi)
+
     def _setitem_single_block(self, indexer, value, name: str):
         """
         _setitem_with_indexer for the case when we have a single Block.

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1008,22 +1008,15 @@ def setitem(self, indexer, value):
             values[indexer] = value
 
         elif exact_match and is_categorical_dtype(arr_value.dtype):
-            # GH25495 - If the current dtype is not categorical,
-            # we need to create a new categorical block
             values[indexer] = value
 
         elif exact_match and is_ea_value:
-            # GH#32395 if we're going to replace the values entirely, just
-            #  substitute in the new array
             if not self.is_object and isinstance(value, (IntegerArray, FloatingArray)):
                 values[indexer] = value.to_numpy(value.dtype.numpy_dtype)
             else:
                 values[indexer] = np.asarray(value)
 
-        # if we are an exact match (ex-broadcasting),
-        # then use the resultant dtype
         elif exact_match:
-            # We are setting _all_ of the array's values, so can cast to new dtype
             values[indexer] = value
 
         elif is_ea_value:

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1464,6 +1464,7 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True
         """
         Take items along any axis.
         """
+        # TODO: should these be np.intp?
         indexer = (
             np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64")
             if isinstance(indexer, slice)

diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -350,26 +350,6 @@ def test_setitem_sequence_broadcasts(self, data, box_in_series):
         # length than the value
         super().test_setitem_sequence_broadcasts(data, box_in_series)
 
-    @skip_nested
-    def test_setitem_loc_scalar_mixed(self, data):
-        # AssertionError
-        super().test_setitem_loc_scalar_mixed(data)
-
-    @skip_nested
-    def test_setitem_loc_scalar_multiple_homogoneous(self, data):
-        # AssertionError
-        super().test_setitem_loc_scalar_multiple_homogoneous(data)
-
-    @skip_nested
-    def test_setitem_iloc_scalar_mixed(self, data):
-        # AssertionError
-        super().test_setitem_iloc_scalar_mixed(data)
-
-    @skip_nested
-    def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
-        # AssertionError
-        super().test_setitem_iloc_scalar_multiple_homogoneous(data)
-
     @skip_nested
     @pytest.mark.parametrize("setter", ["loc", None])
     def test_setitem_mask_broadcast(self, data, setter):

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -74,24 +74,14 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key):
         orig_vals = df.values
         indexer(df)[key, 0] = cat
 
-        overwrite = isinstance(key, slice) and key == slice(None)
-
-        if overwrite:
-            # TODO: GH#39986 this probably shouldn't behave differently
-            expected = DataFrame({0: cat})
-            assert not np.shares_memory(df.values, orig_vals)
-        else:
-            expected = DataFrame({0: cat}).astype(object)
-            assert np.shares_memory(df.values, orig_vals)
+        expected = DataFrame({0: cat.astype(object)})
+        assert np.shares_memory(df.values, orig_vals)
 
         tm.assert_frame_equal(df, expected)
 
         # check we dont have a view on cat (may be undesired GH#39986)
         df.iloc[0, 0] = "gamma"
-        if overwrite:
-            assert cat[0] != "gamma"
-        else:
-            assert cat[0] != "gamma"
+        assert cat[0] != "gamma"
 
     @pytest.mark.parametrize("box", [pd_array, Series])
     def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
@@ -824,7 +814,6 @@ def test_series_indexing_zerodim_np_array(self):
         result = s.iloc[np.array(0)]
         assert result == 1
 
-    @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457")
     def test_iloc_setitem_categorical_updates_inplace(self):
         # Mixed dtype ensures we go through take_split_path in setitem_with_indexer
         cat = Categorical(["A", "B", "C"])

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -529,13 +529,19 @@ def test_astype_assignment(self):
         expected = DataFrame(
             [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
+        # original (object) array can hold new values, so setting is inplace
+        expected["A"] = expected["A"].astype(object)
+        expected["B"] = expected["B"].astype(object)
         tm.assert_frame_equal(df, expected)
 
         df = df_orig.copy()
         df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
         expected = DataFrame(
             [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
+        # original (object) array can hold new values, so setting is inplace
+        expected["A"] = expected["A"].astype(object)
+        expected["B"] = expected["B"].astype(object)
         tm.assert_frame_equal(df, expected)
 
         # GH5702 (loc)
@@ -544,26 +550,37 @@ def test_astype_assignment(self):
         expected = DataFrame(
             [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
+        # df["A"] can hold the RHS, so the assignment is inplace, remains object
+        expected["A"] = expected["A"].astype(object)
         tm.assert_frame_equal(df, expected)
 
         df = df_orig.copy()
         df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
         expected = DataFrame(
             [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
+        # original (object) array can hold new values, so setting is inplace
+        expected["B"] = expected["B"].astype(object)
+        expected["C"] = expected["C"].astype(object)
         tm.assert_frame_equal(df, expected)
 
     def test_astype_assignment_full_replacements(self):
         # full replacements / no nans
-        df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
-        df.iloc[:, 0] = df["A"].astype(np.int64)
-        expected = DataFrame({"A": [1, 2, 3, 4]})
-        tm.assert_frame_equal(df, expected)
-
-        df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
-        df.loc[:, "A"] = df["A"].astype(np.int64)
-        expected = DataFrame({"A": [1, 2, 3, 4]})
-        tm.assert_frame_equal(df, expected)
+        # the new values can all be held by the existing array, so the assignment
+        #  is in-place
+        orig = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
+        value = orig.astype(np.int64)
+        # expected = DataFrame({"A": [1, 2, 3, 4]})
+
+        df = orig.copy()
+        df.iloc[
+            :, 0
+        ] = value  # <- not yet, bc value is a DataFrame; would work with value["A"]
+        tm.assert_frame_equal(df, orig)
+
+        df = orig.copy()
+        df.loc[:, "A"] = value
+        tm.assert_frame_equal(df, orig)
 
     @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc])
     def test_index_type_coercion(self, indexer):

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -781,6 +781,7 @@ def test_loc_coercion(self):
         result = df.iloc[[1]]
         tm.assert_series_equal(result.dtypes, expected)
 
+    def test_loc_coercion2(self):
         # 12045
         import datetime
 
@@ -795,6 +796,7 @@ def test_loc_coercion(self):
         result = df.iloc[[1]]
         tm.assert_series_equal(result.dtypes, expected)
 
+    def test_loc_coercion3(self):
         # 11594
         df = DataFrame({"text": ["some words"] + [None] * 9})
         expected = df.dtypes
@@ -1208,7 +1210,7 @@ def test_loc_setitem_single_row_categorical(self):
         df.loc[:, "Alpha"] = categories
 
         result = df["Alpha"]
-        expected = Series(categories, index=df.index, name="Alpha")
+        expected = Series(categories, index=df.index, name="Alpha").astype(object)
         tm.assert_series_equal(result, expected)
 
     def test_loc_setitem_datetime_coercion(self):

diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
@@ -100,7 +100,8 @@ def test_partial_setting(self):
         tm.assert_frame_equal(df, expected)
 
         # mixed dtype frame, overwrite
-        expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])}))
+        # float64 can hold df.loc[:, "A"], so setting is inplace
+        expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])}))
         df = df_orig.copy()
         df["B"] = df["B"].astype(np.float64)
         df.loc[:, "B"] = df.loc[:, "A"]
@@ -120,6 +121,7 @@ def test_partial_setting(self):
         df.loc[:, "C"] = df.loc[:, "A"]
         tm.assert_frame_equal(df, expected)
 
+    def test_partial_setting2(self):
         # GH 8473
         dates = date_range("1/1/2000", periods=8)
         df_orig = DataFrame(