Skip to content

Commit 5b816ce

Browse files
committed
BUG: Raise MergeError when suffixes result in duplicate column names (GH#61402)
1 parent 9c5b9ee commit 5b816ce

File tree

3 files changed

+17
-4
lines changed

3 files changed

+17
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,7 @@ Reshaping
845845
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
846846
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
847847
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
848+
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
848849

849850
Sparse
850851
^^^^^^

pandas/core/reshape/merge.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3062,13 +3062,16 @@ def renamer(x, suffix: str | None):
30623062
if not llabels.is_unique:
30633063
# Only warn when duplicates are caused because of suffixes, already duplicated
30643064
# columns in origin should not warn
3065-
dups = llabels[(llabels.duplicated()) & (~left.duplicated())].tolist()
3065+
dups.extend(llabels[(llabels.duplicated()) & (~left.duplicated())])
30663066
if not rlabels.is_unique:
3067-
dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())].tolist())
3067+
dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())])
3068+
# Suffix addition creates duplicate to pre-existing column name
3069+
dups.extend(llabels.intersection(right.difference(to_rename)))
3070+
dups.extend(rlabels.intersection(left.difference(to_rename)))
30683071
if dups:
30693072
raise MergeError(
3070-
f"Passing 'suffixes' which cause duplicate columns {set(dups)} is "
3071-
f"not allowed.",
3073+
f"Passing 'suffixes' which cause duplicate columns {set(dups)} "
3074+
"is not allowed.",
30723075
)
30733076

30743077
return llabels, rlabels

pandas/tests/reshape/merge/test_merge.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3060,3 +3060,12 @@ def test_merge_on_all_nan_column():
30603060
{"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan], "z": [4, 5, 6], "zz": [4, 5, 6]}
30613061
)
30623062
tm.assert_frame_equal(result, expected)
3063+
3064+
3065+
@pytest.mark.parametrize("suffixes", [("_dup", ""), ("", "_dup")])
3066+
def test_merge_for_suffix_collisions(suffixes):
3067+
# GH#61402
3068+
df1 = DataFrame({"col1": [1], "col2": [2]})
3069+
df2 = DataFrame({"col1": [1], "col2": [2], "col2_dup": [3]})
3070+
with pytest.raises(MergeError, match="duplicate columns"):
3071+
merge(df1, df2, on="col1", suffixes=suffixes)

0 commit comments

Comments
 (0)