From ddad4a9d4a1d1e626a0467b250241cecf01ff29e Mon Sep 17 00:00:00 2001 From: Anh Trinh Date: Sat, 16 Mar 2024 18:58:57 +0100 Subject: [PATCH] Fix stubname equal suffix in wide to long function --- pandas/core/reshape/melt.py | 8 +++++++- pandas/tests/reshape/test_melt.py | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 24a070a5361508..7e73cc32f6a1c1 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -603,6 +603,9 @@ def get_var_names(df, stub: str, sep: str, suffix: str): return df.columns[df.columns.str.match(regex)] def melt_stub(df, stub: str, i, j, value_vars, sep: str): + # Ensure value_name and var_name are different when passing to melt + j_original = j + j = f"{j}_1" if stub == j else j newdf = melt( df, id_vars=i, @@ -619,7 +622,10 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str): # TODO: anything else to catch? pass - return newdf.set_index(i + [j]) + newdf = newdf.set_index(i + [j]) + if j != j_original: + newdf.index = newdf.index.set_names(j_original, level=-1) + return newdf if not is_list_like(stubnames): stubnames = [stubnames] diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index f224a45ca32797..534ff8b78c0176 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -1218,6 +1218,33 @@ def test_missing_stubname(self, dtype): expected.index = expected.index.set_levels(new_level, level=0) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("stubnames", ["year", ["year"]]) + def test_stubname_equal_suffix(self, stubnames): + # https://github.com/pandas-dev/pandas/issues/46939 + df = DataFrame( + { + "year1": {0: 4.5, 1: 1.7}, + "year2": {0: 2.5, 1: 1.2}, + "X": dict(zip(range(2), range(2, 4))), + } + ) + df["id"] = df.index + result = wide_to_long( + df, + stubnames=stubnames, + i="id", + j="year", + ) + expected = DataFrame( + [[2, 4.5], [3, 1.7], [2, 2.5], [3, 1.2]], + columns=["X", "year"], + index=pd.MultiIndex.from_arrays( + [[0, 1, 0, 1], [1, 1, 2, 2]], + names=["id", "year"], + ), + ) + tm.assert_frame_equal(result, expected) + def test_wide_to_long_pyarrow_string_columns(): # GH 57066