|
1 | 1 | import numpy as np |
2 | 2 | import pytest |
3 | 3 |
|
| 4 | +from pandas.errors import PerformanceWarning |
| 5 | + |
4 | 6 | import pandas as pd |
5 | 7 | from pandas import ( |
6 | 8 | DataFrame, |
@@ -323,3 +325,49 @@ def test_concat_multiindex_(self): |
323 | 325 | {"col": ["a", "b", "c"]}, index=MultiIndex.from_product(iterables) |
324 | 326 | ) |
325 | 327 | tm.assert_frame_equal(result_df, expected_df) |
| 328 | + |
| 329 | + def test_concat_with_key_not_unique(self): |
| 330 | + # GitHub #46519 |
| 331 | + df1 = DataFrame({"name": [1]}) |
| 332 | + df2 = DataFrame({"name": [2]}) |
| 333 | + df3 = DataFrame({"name": [3]}) |
| 334 | + df_a = concat([df1, df2, df3], keys=["x", "y", "x"]) |
| 335 | + # the warning is caused by indexing unsorted multi-index |
| 336 | + with tm.assert_produces_warning( |
| 337 | + PerformanceWarning, match="indexing past lexsort depth" |
| 338 | + ): |
| 339 | + out_a = df_a.loc[("x", 0), :] |
| 340 | + |
| 341 | + df_b = DataFrame( |
| 342 | + {"name": [1, 2, 3]}, index=Index([("x", 0), ("y", 0), ("x", 0)]) |
| 343 | + ) |
| 344 | + with tm.assert_produces_warning( |
| 345 | + PerformanceWarning, match="indexing past lexsort depth" |
| 346 | + ): |
| 347 | + out_b = df_b.loc[("x", 0)] |
| 348 | + |
| 349 | + tm.assert_frame_equal(out_a, out_b) |
| 350 | + |
| 351 | + df1 = DataFrame({"name": ["a", "a", "b"]}) |
| 352 | + df2 = DataFrame({"name": ["a", "b"]}) |
| 353 | + df3 = DataFrame({"name": ["c", "d"]}) |
| 354 | + df_a = concat([df1, df2, df3], keys=["x", "y", "x"]) |
| 355 | + with tm.assert_produces_warning( |
| 356 | + PerformanceWarning, match="indexing past lexsort depth" |
| 357 | + ): |
| 358 | + out_a = df_a.loc[("x", 0), :] |
| 359 | + |
| 360 | + df_b = DataFrame( |
| 361 | + { |
| 362 | + "a": ["x", "x", "x", "y", "y", "x", "x"], |
| 363 | + "b": [0, 1, 2, 0, 1, 0, 1], |
| 364 | + "name": list("aababcd"), |
| 365 | + } |
| 366 | + ).set_index(["a", "b"]) |
| 367 | + df_b.index.names = [None, None] |
| 368 | + with tm.assert_produces_warning( |
| 369 | + PerformanceWarning, match="indexing past lexsort depth" |
| 370 | + ): |
| 371 | + out_b = df_b.loc[("x", 0), :] |
| 372 | + |
| 373 | + tm.assert_frame_equal(out_a, out_b) |
0 commit comments