-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
REF: setitem_with_indexer DataFrame always go through split_path #40380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
2c761eb
d207a12
f0dae4d
27bd89d
a5c1f5e
15f5265
73302c0
ceeeb78
6b304b8
fe9be7e
1789fca
6400be3
a1cdd19
870ae37
c32a427
d9d8beb
8a283d9
9718f6e
9f6d8e7
dd53abc
f6b09b4
c107e77
ae3ae1b
c631079
4cfe863
03e6eb9
d280f60
9db5537
4054aea
bd76479
7ea792f
20f6a16
b9daceb
6117e8e
94b33a8
8f71f27
17049e4
fdbf6f3
f8363f9
2c6da5d
2d0cf9e
e1ed083
df9d87f
aeb687c
481ece1
95f34c8
be54f15
6896e86
b183084
fa2006b
1af8686
301d582
1d24f71
d8c7c4c
f0037e6
094e47d
6791a97
d01951d
01ff673
8e8e711
18d72ec
ba1d17c
1fca2d5
1382985
10ab24d
8e5a414
cdfc811
d761df6
b4d0211
53be486
05107a2
b87fc55
3279159
9f7e274
a4aed44
e1c30fa
e062dce
9a73ef6
4a9919b
8761ffa
99e3316
be22a99
86d3e71
d49512b
d821e8b
f142f16
eccfebb
0c430c9
295737e
405b21f
520230c
0ac5516
7a46ab8
b19a787
7bf721b
7db1a5a
ca0f516
3b456f9
1e4b617
1d203a6
4e69970
0a0e712
4ce2cd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1676,6 +1676,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): | |
| # Above we only set take_split_path to True for 2D cases | ||
| assert self.ndim == 2 | ||
|
|
||
| orig = indexer | ||
| if not isinstance(indexer, tuple): | ||
| indexer = _tuplify(self.ndim, indexer) | ||
| if len(indexer) > self.ndim: | ||
|
|
@@ -1689,8 +1690,20 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): | |
|
|
||
| info_idx = indexer[1] | ||
| pi = indexer[0] | ||
| if ( | ||
| isinstance(pi, ABCDataFrame) | ||
| and orig is pi | ||
| and hasattr(self.obj._mgr, "blocks") | ||
| and len(self.obj._mgr.blocks) == 1 | ||
| ): | ||
| # FIXME: kludge | ||
| return self._setitem_single_block(orig, value, name) | ||
|
|
||
| if com.is_null_slice(info_idx) and is_scalar(value): | ||
| if ( | ||
| com.is_null_slice(info_idx) | ||
| and is_scalar(value) | ||
| and not isinstance(pi, ABCDataFrame) | ||
| ): | ||
| # We can go directly through BlockManager.setitem without worrying | ||
| # about alignment. | ||
| # TODO: do we need to do some kind of copy_with_setting check? | ||
|
|
@@ -1734,7 +1747,8 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): | |
|
|
||
| elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi): | ||
| # We are setting multiple rows in a single column. | ||
| self._setitem_single_column(ilocs[0], value, pi) | ||
| self._setitem_iat_loc(ilocs[0], pi, value) | ||
| # self._setitem_single_column(ilocs[0], value, pi) | ||
|
|
||
| elif len(ilocs) == 1 and 0 != lplane_indexer != len(value): | ||
| # We are trying to set N values into M entries of a single | ||
|
|
@@ -1758,7 +1772,8 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): | |
| elif len(ilocs) == len(value): | ||
| # We are setting multiple columns in a single row. | ||
| for loc, v in zip(ilocs, value): | ||
| self._setitem_single_column(loc, v, pi) | ||
| self._setitem_iat_loc(loc, pi, v) | ||
| # self._setitem_single_column(loc, v, pi) | ||
|
|
||
| elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: | ||
| # This is a setitem-with-expansion, see | ||
|
|
@@ -1796,6 +1811,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value): | |
|
|
||
| for i, loc in enumerate(ilocs): | ||
| # setting with a list, re-coerces | ||
| # self._setitem_iat_loc(loc, pi, value[:, i].tolist()) | ||
| self._setitem_single_column(loc, value[:, i].tolist(), pi) | ||
|
|
||
| def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str): | ||
|
|
@@ -1812,7 +1828,8 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str | |
| if name == "iloc": | ||
| for i, loc in enumerate(ilocs): | ||
| val = value.iloc[:, i] | ||
| self._setitem_single_column(loc, val, pi) | ||
| self._setitem_iat_loc(loc, pi, val) | ||
| # self._setitem_single_column(loc, val, pi) | ||
|
|
||
| elif not unique_cols and value.columns.equals(self.obj.columns): | ||
| # We assume we are already aligned, see | ||
|
|
@@ -1829,7 +1846,8 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str | |
| else: | ||
| val = np.nan | ||
|
|
||
| self._setitem_single_column(loc, val, pi) | ||
| self._setitem_iat_loc(loc, pi, val) | ||
| # self._setitem_single_column(loc, val, pi) | ||
|
|
||
| elif not unique_cols: | ||
| raise ValueError("Setting with non-unique columns is not allowed.") | ||
|
|
@@ -1848,7 +1866,8 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str | |
| else: | ||
| val = np.nan | ||
|
|
||
| self._setitem_single_column(loc, val, pi) | ||
| self._setitem_iat_loc(loc, pi, val) | ||
| # self._setitem_single_column(loc, val, pi) | ||
|
|
||
| def _setitem_single_column(self, loc: int, value, plane_indexer): | ||
| """ | ||
|
|
@@ -1882,6 +1901,29 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): | |
| # reset the sliced object if unique | ||
| self.obj._iset_item(loc, ser) | ||
|
|
||
| def _setitem_iat_loc(self, loc: int, pi, value): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this method supposed to do? Can you add some docstring / comment? |
||
| # TODO: likely a BM method? | ||
| mgr = self.obj._mgr | ||
| blkno = mgr.blknos[loc] | ||
| blkloc = mgr.blklocs[loc] | ||
| blk = mgr.blocks[blkno] | ||
| assert blk.mgr_locs[blkloc] == loc | ||
|
|
||
| if blk._can_hold_element(value): | ||
| # NB: we are assuming here that _can_hold_element is accurate | ||
| # TODO: do we need to do some kind of copy_with_setting check? | ||
| try: | ||
| self.obj._check_is_chained_assignment_possible() | ||
| blk.setitem_inplace((pi, blkloc), value) | ||
| self.obj._maybe_update_cacher(clear=True) | ||
| except ValueError: | ||
| if blk.is_extension: | ||
| # FIXME: kludge bc _can_hold_element is wrong for EABLock | ||
| return self._setitem_single_column(loc, value, pi) | ||
| raise | ||
| else: | ||
| self._setitem_single_column(loc, value, pi) | ||
|
|
||
| def _setitem_single_block(self, indexer, value, name: str): | ||
| """ | ||
| _setitem_with_indexer for the case when we have a single Block. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -529,13 +529,19 @@ def test_astype_assignment(self): | |
| expected = DataFrame( | ||
| [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") | ||
| ) | ||
| # original (object) array can hold new values, so setting is inplace | ||
| expected["A"] = expected["A"].astype(object) | ||
| expected["B"] = expected["B"].astype(object) | ||
|
||
| tm.assert_frame_equal(df, expected) | ||
|
|
||
| df = df_orig.copy() | ||
| df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) | ||
| expected = DataFrame( | ||
| [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") | ||
| ) | ||
| # original (object) array can hold new values, so setting is inplace | ||
| expected["A"] = expected["A"].astype(object) | ||
| expected["B"] = expected["B"].astype(object) | ||
| tm.assert_frame_equal(df, expected) | ||
|
|
||
| # GH5702 (loc) | ||
|
|
@@ -544,26 +550,37 @@ def test_astype_assignment(self): | |
| expected = DataFrame( | ||
| [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") | ||
| ) | ||
| # df["A"] can hold the RHS, so the assignment is inplace, remains object | ||
| expected["A"] = expected["A"].astype(object) | ||
| tm.assert_frame_equal(df, expected) | ||
|
|
||
| df = df_orig.copy() | ||
| df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) | ||
| expected = DataFrame( | ||
| [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") | ||
| ) | ||
| # original (object) array can hold new values, so setting is inplace | ||
| expected["B"] = expected["B"].astype(object) | ||
| expected["C"] = expected["C"].astype(object) | ||
| tm.assert_frame_equal(df, expected) | ||
|
|
||
| def test_astype_assignment_full_replacements(self): | ||
| # full replacements / no nans | ||
| df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) | ||
| df.iloc[:, 0] = df["A"].astype(np.int64) | ||
| expected = DataFrame({"A": [1, 2, 3, 4]}) | ||
| tm.assert_frame_equal(df, expected) | ||
|
|
||
| df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) | ||
| df.loc[:, "A"] = df["A"].astype(np.int64) | ||
| expected = DataFrame({"A": [1, 2, 3, 4]}) | ||
| tm.assert_frame_equal(df, expected) | ||
| # the new values can all be held by the existing array, so the assignment | ||
| # is in-place | ||
| orig = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) | ||
| value = orig.astype(np.int64) | ||
| # expected = DataFrame({"A": [1, 2, 3, 4]}) | ||
|
|
||
| df = orig.copy() | ||
| df.iloc[ | ||
| :, 0 | ||
| ] = value # <- not yet, bc value is a DataFrame; would work with value["A"] | ||
| tm.assert_frame_equal(df, orig) | ||
|
|
||
| df = orig.copy() | ||
| df.loc[:, "A"] = value | ||
| tm.assert_frame_equal(df, orig) | ||
|
|
||
| @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc]) | ||
| def test_index_type_coercion(self, indexer): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of this "workaround", should we have a manager method to set a new column based on a position? (basically what
mgr.isetdoes for ArrayManager)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 on this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure.
this is a fairly low priority, posted in the hopes it would help joris with ArrayManager indexing. #40456 is the most blockery of the things I have going ATM.