Skip to content

Commit 98f7341

Browse files
authored
PERF: new_block_2d (#44121)
1 parent 195d5a3 commit 98f7341

File tree

6 files changed

+50
-37
lines changed

6 files changed

+50
-37
lines changed

pandas/core/internals/blocks.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,7 +1289,8 @@ def _unstack(
12891289
new_values = new_values.T[mask]
12901290
new_placement = new_placement[mask]
12911291

1292-
blocks = [new_block(new_values, placement=new_placement, ndim=2)]
1292+
bp = BlockPlacement(new_placement)
1293+
blocks = [new_block_2d(new_values, placement=bp)]
12931294
return blocks, mask
12941295

12951296
@final
@@ -1318,7 +1319,7 @@ def quantile(
13181319
assert is_list_like(qs) # caller is responsible for this
13191320

13201321
result = quantile_compat(self.values, np.asarray(qs._values), interpolation)
1321-
return new_block(result, placement=self._mgr_locs, ndim=2)
1322+
return new_block_2d(result, placement=self._mgr_locs)
13221323

13231324

13241325
class EABackedBlock(Block):
@@ -1941,6 +1942,17 @@ def get_block_type(dtype: DtypeObj):
19411942
return cls
19421943

19431944

1945+
def new_block_2d(values: ArrayLike, placement: BlockPlacement):
1946+
# new_block specialized to case with
1947+
# ndim=2
1948+
# isinstance(placement, BlockPlacement)
1949+
# check_ndim/ensure_block_shape already checked
1950+
klass = get_block_type(values.dtype)
1951+
1952+
values = maybe_coerce_values(values)
1953+
return klass(values, ndim=2, placement=placement)
1954+
1955+
19441956
def new_block(values, placement, *, ndim: int) -> Block:
19451957
# caller is responsible for ensuring values is NOT a PandasArray
19461958

pandas/core/internals/concat.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
)
4949
from pandas.core.internals.blocks import (
5050
ensure_block_shape,
51-
new_block,
51+
new_block_2d,
5252
)
5353
from pandas.core.internals.managers import BlockManager
5454

@@ -224,11 +224,11 @@ def concatenate_managers(
224224
# _is_uniform_join_units ensures a single dtype, so
225225
# we can use np.concatenate, which is more performant
226226
# than concat_compat
227-
values = np.concatenate(vals, axis=blk.ndim - 1)
227+
values = np.concatenate(vals, axis=1)
228228
else:
229229
# TODO(EA2D): special-casing not needed with 2D EAs
230230
values = concat_compat(vals, axis=1)
231-
values = ensure_block_shape(values, blk.ndim)
231+
values = ensure_block_shape(values, ndim=2)
232232

233233
values = ensure_wrapped_if_datetimelike(values)
234234

@@ -240,7 +240,7 @@ def concatenate_managers(
240240
if fastpath:
241241
b = blk.make_block_same_class(values, placement=placement)
242242
else:
243-
b = new_block(values, placement=placement, ndim=len(axes))
243+
b = new_block_2d(values, placement=placement)
244244

245245
blocks.append(b)
246246

pandas/core/internals/construction.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@
7979
SingleArrayManager,
8080
)
8181
from pandas.core.internals.blocks import (
82+
BlockPlacement,
8283
ensure_block_shape,
83-
new_block,
84+
new_block_2d,
8485
)
8586
from pandas.core.internals.managers import (
8687
BlockManager,
@@ -370,14 +371,16 @@ def ndarray_to_mgr(
370371
if any(x is not y for x, y in zip(obj_columns, maybe_datetime)):
371372
dvals_list = [ensure_block_shape(dval, 2) for dval in maybe_datetime]
372373
block_values = [
373-
new_block(dvals_list[n], placement=n, ndim=2)
374+
new_block_2d(dvals_list[n], placement=BlockPlacement(n))
374375
for n in range(len(dvals_list))
375376
]
376377
else:
377-
nb = new_block(values, placement=slice(len(columns)), ndim=2)
378+
bp = BlockPlacement(slice(len(columns)))
379+
nb = new_block_2d(values, placement=bp)
378380
block_values = [nb]
379381
else:
380-
nb = new_block(values, placement=slice(len(columns)), ndim=2)
382+
bp = BlockPlacement(slice(len(columns)))
383+
nb = new_block_2d(values, placement=bp)
381384
block_values = [nb]
382385

383386
if len(columns) == 0:

pandas/core/internals/managers.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
extend_blocks,
7373
get_block_type,
7474
new_block,
75+
new_block_2d,
7576
)
7677
from pandas.core.internals.ops import (
7778
blockwise_all,
@@ -824,6 +825,7 @@ def _slice_take_blocks_ax0(
824825
def _make_na_block(
825826
self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False
826827
) -> Block:
828+
# Note: we only get here with self.ndim == 2
827829

828830
if use_na_proxy:
829831
assert fill_value is None
@@ -844,7 +846,7 @@ def _make_na_block(
844846
# Tuple[Any, Any]]"
845847
block_values = np.empty(block_shape, dtype=dtype) # type: ignore[arg-type]
846848
block_values.fill(fill_value)
847-
return new_block(block_values, placement=placement, ndim=block_values.ndim)
849+
return new_block_2d(block_values, placement=placement)
848850

849851
def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T:
850852
"""
@@ -1106,40 +1108,38 @@ def value_getitem(placement):
11061108
)
11071109

11081110
if unfit_val_locs:
1109-
unfit_mgr_locs = np.concatenate(unfit_mgr_locs)
1110-
unfit_count = len(unfit_mgr_locs)
1111+
unfit_idxr = np.concatenate(unfit_mgr_locs)
1112+
unfit_count = len(unfit_idxr)
11111113

11121114
new_blocks: list[Block] = []
11131115
if value_is_extension_type:
11141116
# This code (ab-)uses the fact that EA blocks contain only
11151117
# one item.
11161118
# TODO(EA2D): special casing unnecessary with 2D EAs
11171119
new_blocks.extend(
1118-
new_block(
1120+
new_block_2d(
11191121
values=value,
1120-
ndim=self.ndim,
1121-
placement=slice(mgr_loc, mgr_loc + 1),
1122+
placement=BlockPlacement(slice(mgr_loc, mgr_loc + 1)),
11221123
)
1123-
for mgr_loc in unfit_mgr_locs
1124+
for mgr_loc in unfit_idxr
11241125
)
11251126

1126-
self._blknos[unfit_mgr_locs] = np.arange(unfit_count) + len(self.blocks)
1127-
self._blklocs[unfit_mgr_locs] = 0
1127+
self._blknos[unfit_idxr] = np.arange(unfit_count) + len(self.blocks)
1128+
self._blklocs[unfit_idxr] = 0
11281129

11291130
else:
11301131
# unfit_val_locs contains BlockPlacement objects
11311132
unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:])
11321133

11331134
new_blocks.append(
1134-
new_block(
1135+
new_block_2d(
11351136
values=value_getitem(unfit_val_items),
1136-
ndim=self.ndim,
1137-
placement=unfit_mgr_locs,
1137+
placement=BlockPlacement(unfit_idxr),
11381138
)
11391139
)
11401140

1141-
self._blknos[unfit_mgr_locs] = len(self.blocks)
1142-
self._blklocs[unfit_mgr_locs] = np.arange(unfit_count)
1141+
self._blknos[unfit_idxr] = len(self.blocks)
1142+
self._blklocs[unfit_idxr] = np.arange(unfit_count)
11431143

11441144
self.blocks += tuple(new_blocks)
11451145

@@ -1161,10 +1161,15 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
11611161

11621162
if value.ndim == 2:
11631163
value = value.T
1164+
if len(value) > 1:
1165+
raise ValueError(
1166+
f"Expected a 1D array, got an array with shape {value.T.shape}"
1167+
)
11641168
else:
11651169
value = ensure_block_shape(value, ndim=self.ndim)
11661170

1167-
block = new_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
1171+
bp = BlockPlacement(slice(loc, loc + 1))
1172+
block = new_block_2d(values=value, placement=bp)
11681173

11691174
self._insert_update_mgr_locs(loc)
11701175
self._insert_update_blklocs_and_blknos(loc)
@@ -1968,7 +1973,7 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
19681973
def _tuples_to_blocks_no_consolidate(tuples) -> list[Block]:
19691974
# tuples produced within _form_blocks are of the form (placement, array)
19701975
return [
1971-
new_block(ensure_block_shape(x[1], ndim=2), placement=x[0], ndim=2)
1976+
new_block_2d(ensure_block_shape(x[1], ndim=2), placement=BlockPlacement(x[0]))
19721977
for x in tuples
19731978
]
19741979

@@ -2035,7 +2040,7 @@ def _merge_blocks(
20352040
new_mgr_locs = new_mgr_locs[argsort]
20362041

20372042
bp = BlockPlacement(new_mgr_locs)
2038-
return [new_block(new_values, placement=bp, ndim=2)]
2043+
return [new_block_2d(new_values, placement=bp)]
20392044

20402045
# can't consolidate --> no merge
20412046
return blocks

pandas/tests/frame/indexing/test_insert.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,7 @@ def test_insert_EA_no_warning(self):
100100
def test_insert_frame(self):
101101
# GH#42403
102102
df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
103-
msg = (
104-
"Expected a 1D array, got an array with shape "
105-
r"\(2, 2\)|Wrong number of items passed 2, placement implies 1"
106-
)
103+
104+
msg = r"Expected a 1D array, got an array with shape \(2, 2\)"
107105
with pytest.raises(ValueError, match=msg):
108106
df.insert(1, "newcol", df)

pandas/tests/frame/test_constructors.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,7 @@ def test_constructor_cast_failure(self):
222222
df["foo"] = np.ones((4, 2)).tolist()
223223

224224
# this is not ok
225-
msg = "|".join(
226-
[
227-
"Wrong number of items passed 2, placement implies 1",
228-
"Expected a 1D array, got an array with shape \\(4, 2\\)",
229-
]
230-
)
225+
msg = "Expected a 1D array, got an array with shape \\(4, 2\\)"
231226
with pytest.raises(ValueError, match=msg):
232227
df["test"] = np.ones((4, 2))
233228

0 commit comments

Comments
 (0)