Skip to content

Commit 54f352a

Browse files
authored
BUG: pd.concat with identical key leads to multi-indexing error (#46546)
1 parent c8cbe19 commit 54f352a

File tree

3 files changed

+50
-1
lines changed

3 files changed

+50
-1
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ Reshaping
600600
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
601601
- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
602602
- Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`)
603+
- Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`)
603604
-
604605

605606
Sparse

pandas/core/reshape/concat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
708708
names = [None]
709709

710710
if levels is None:
711-
levels = [ensure_index(keys)]
711+
levels = [ensure_index(keys).unique()]
712712
else:
713713
levels = [ensure_index(x) for x in levels]
714714

pandas/tests/reshape/concat/test_index.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.errors import PerformanceWarning
5+
46
import pandas as pd
57
from pandas import (
68
DataFrame,
@@ -323,3 +325,49 @@ def test_concat_multiindex_(self):
323325
{"col": ["a", "b", "c"]}, index=MultiIndex.from_product(iterables)
324326
)
325327
tm.assert_frame_equal(result_df, expected_df)
328+
329+
def test_concat_with_key_not_unique(self):
330+
# GitHub #46519
331+
df1 = DataFrame({"name": [1]})
332+
df2 = DataFrame({"name": [2]})
333+
df3 = DataFrame({"name": [3]})
334+
df_a = concat([df1, df2, df3], keys=["x", "y", "x"])
335+
# the warning is caused by indexing unsorted multi-index
336+
with tm.assert_produces_warning(
337+
PerformanceWarning, match="indexing past lexsort depth"
338+
):
339+
out_a = df_a.loc[("x", 0), :]
340+
341+
df_b = DataFrame(
342+
{"name": [1, 2, 3]}, index=Index([("x", 0), ("y", 0), ("x", 0)])
343+
)
344+
with tm.assert_produces_warning(
345+
PerformanceWarning, match="indexing past lexsort depth"
346+
):
347+
out_b = df_b.loc[("x", 0)]
348+
349+
tm.assert_frame_equal(out_a, out_b)
350+
351+
df1 = DataFrame({"name": ["a", "a", "b"]})
352+
df2 = DataFrame({"name": ["a", "b"]})
353+
df3 = DataFrame({"name": ["c", "d"]})
354+
df_a = concat([df1, df2, df3], keys=["x", "y", "x"])
355+
with tm.assert_produces_warning(
356+
PerformanceWarning, match="indexing past lexsort depth"
357+
):
358+
out_a = df_a.loc[("x", 0), :]
359+
360+
df_b = DataFrame(
361+
{
362+
"a": ["x", "x", "x", "y", "y", "x", "x"],
363+
"b": [0, 1, 2, 0, 1, 0, 1],
364+
"name": list("aababcd"),
365+
}
366+
).set_index(["a", "b"])
367+
df_b.index.names = [None, None]
368+
with tm.assert_produces_warning(
369+
PerformanceWarning, match="indexing past lexsort depth"
370+
):
371+
out_b = df_b.loc[("x", 0), :]
372+
373+
tm.assert_frame_equal(out_a, out_b)

0 commit comments

Comments
 (0)