Skip to content

Commit

Permalink
Backport PR pandas-dev#54755 on branch 2.1.x (BUG: merge raising for …
Browse files Browse the repository at this point in the history
…ea int and numpy float) (pandas-dev#54779)

BUG: merge raising for ea int and numpy float (pandas-dev#54755)

* BUG: merge raising for ea int and numpy float

* Fix up mypy and add check

(cherry picked from commit 9939c32)
  • Loading branch information
phofl authored Aug 27, 2023
1 parent d42fbed commit f7eb2cc
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,7 @@ Reshaping
- Bug in :func:`merge_asof` raising ``KeyError`` for extension dtypes (:issue:`52904`)
- Bug in :func:`merge_asof` raising ``ValueError`` for data backed by read-only ndarrays (:issue:`53513`)
- Bug in :func:`merge_asof` with ``left_index=True`` or ``right_index=True`` with mismatched index dtypes giving incorrect results in some cases instead of raising ``MergeError`` (:issue:`53870`)
- Bug in :func:`merge` when merging on integer ``ExtensionDtype`` and float NumPy dtype raising ``TypeError`` (:issue:`46178`)
- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
- Bug in :meth:`DataFrame.combine_first` ignoring other's columns if ``other`` is empty (:issue:`53792`)
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
Expand Down
16 changes: 16 additions & 0 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
ensure_object,
is_bool,
is_bool_dtype,
is_extension_array_dtype,
is_float_dtype,
is_integer,
is_integer_dtype,
Expand Down Expand Up @@ -1385,6 +1386,21 @@ def _maybe_coerce_merge_keys(self) -> None:
if lk.dtype.kind == rk.dtype.kind:
continue

if is_extension_array_dtype(lk.dtype) and not is_extension_array_dtype(
rk.dtype
):
ct = find_common_type([lk.dtype, rk.dtype])
if is_extension_array_dtype(ct):
rk = ct.construct_array_type()._from_sequence(rk) # type: ignore[union-attr] # noqa: E501
else:
rk = rk.astype(ct) # type: ignore[arg-type]
elif is_extension_array_dtype(rk.dtype):
ct = find_common_type([lk.dtype, rk.dtype])
if is_extension_array_dtype(ct):
lk = ct.construct_array_type()._from_sequence(lk) # type: ignore[union-attr] # noqa: E501
else:
lk = lk.astype(ct) # type: ignore[arg-type]

# check whether ints and floats
if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype):
# GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2847,3 +2847,26 @@ def test_merge_multiindex_single_level():

result = df.merge(df2, left_on=["col"], right_index=True, how="left")
tm.assert_frame_equal(result, expected)


def test_merge_ea_int_and_float_numpy():
# GH#46178
df1 = DataFrame([1.0, np.nan], dtype=pd.Int64Dtype())
df2 = DataFrame([1.5])
expected = DataFrame(columns=[0], dtype="Int64")

with tm.assert_produces_warning(UserWarning, match="You are merging"):
result = df1.merge(df2)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(UserWarning, match="You are merging"):
result = df2.merge(df1)
tm.assert_frame_equal(result, expected.astype("float64"))

df2 = DataFrame([1.0])
expected = DataFrame([1], columns=[0], dtype="Int64")
result = df1.merge(df2)
tm.assert_frame_equal(result, expected)

result = df2.merge(df1)
tm.assert_frame_equal(result, expected.astype("float64"))

0 comments on commit f7eb2cc

Please sign in to comment.