Skip to content

PERF: DataFrame.__setitem__ #44796

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1457,7 +1457,8 @@ def iget(self, col):
def set_inplace(self, locs, values) -> None:
# NB: This is a misnomer, is supposed to be inplace but is not,
# see GH#33457
assert locs.tolist() == [0]
# When an ndarray, we should have locs.tolist() == [0]
# When a BlockPlacement we should have list(locs) == [0]
self.values = values
try:
# TODO(GH33457) this can be removed
Expand Down
70 changes: 57 additions & 13 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,22 +1066,12 @@ def iset(

# Note: we exclude DTA/TDA here
value_is_extension_type = is_1d_only_ea_dtype(value.dtype)

# categorical/sparse/datetimetz
if value_is_extension_type:

def value_getitem(placement):
return value

else:
if not value_is_extension_type:
if value.ndim == 2:
value = value.T
else:
value = ensure_block_shape(value, ndim=2)

def value_getitem(placement):
return value[placement.indexer]

if value.shape[1:] != self.shape[1:]:
raise AssertionError(
"Shape of new values must be compatible with manager shape"
Expand All @@ -1092,11 +1082,37 @@ def value_getitem(placement):
# In this case, get_blkno_placements will yield only one tuple,
# containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1)))

# Check if we can use _iset_single fastpath
blkno = self.blknos[loc]
blk = self.blocks[blkno]
if len(blk._mgr_locs) == 1: # TODO: fastest way to check this?
return self._iset_single(
# error: Argument 1 to "_iset_single" of "BlockManager" has
# incompatible type "Union[int, slice, ndarray[Any, Any]]";
# expected "int"
loc, # type:ignore[arg-type]
value,
inplace=inplace,
blkno=blkno,
blk=blk,
)

# error: Incompatible types in assignment (expression has type
# "List[Union[int, slice, ndarray]]", variable has type "Union[int,
# slice, ndarray]")
loc = [loc] # type: ignore[assignment]

# categorical/sparse/datetimetz
if value_is_extension_type:

def value_getitem(placement):
return value

else:

def value_getitem(placement):
return value[placement.indexer]

# Accessing public blknos ensures the public versions are initialized
blknos = self.blknos[loc]
blklocs = self.blklocs[loc].copy()
Expand Down Expand Up @@ -1172,6 +1188,29 @@ def value_getitem(placement):
# Newly created block's dtype may already be present.
self._known_consolidated = False

def _iset_single(
self, loc: int, value: ArrayLike, inplace: bool, blkno: int, blk: Block
) -> None:
"""
Fastpath for iset when we are only setting a single position and
the Block currently in that position is itself single-column.

In this case we can swap out the entire Block and blklocs and blknos
are unaffected.
"""
# Caller is responsible for verifying value.shape

if inplace and blk.should_store(value):
iloc = self.blklocs[loc]
blk.set_inplace(slice(iloc, iloc + 1), value)
return

nb = new_block_2d(value, placement=blk._mgr_locs)
old_blocks = self.blocks
new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :]
self.blocks = new_blocks
return

def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
"""
Insert item at selected position.
Expand All @@ -1197,8 +1236,13 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
bp = BlockPlacement(slice(loc, loc + 1))
block = new_block_2d(values=value, placement=bp)

self._insert_update_mgr_locs(loc)
self._insert_update_blklocs_and_blknos(loc)
if not len(self.blocks):
# Fastpath
self._blklocs = np.array([0], dtype=np.intp)
self._blknos = np.array([0], dtype=np.intp)
else:
self._insert_update_mgr_locs(loc)
self._insert_update_blklocs_and_blknos(loc)

self.axes[0] = new_axis
self.blocks += (block,)
Expand Down