diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c61b8f3fb3701..2e64c66812306 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -739,6 +739,7 @@ Reshaping - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`) +- Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`) - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 07e8fa4841c04..0ca8661ad3b5c 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -123,7 +123,17 @@ # See https://github.com/pandas-dev/pandas/issues/52451 if np.intc is not np.int32: - _factorizers[np.intc] = libhashtable.Int64Factorizer + if np.dtype(np.intc).itemsize == 4: + _factorizers[np.intc] = libhashtable.Int32Factorizer + else: + _factorizers[np.intc] = libhashtable.Int64Factorizer + +if np.uintc is not np.uint32: + if np.dtype(np.uintc).itemsize == 4: + _factorizers[np.uintc] = libhashtable.UInt32Factorizer + else: + _factorizers[np.uintc] = libhashtable.UInt64Factorizer + _known = (np.ndarray, ExtensionArray, Index, ABCSeries) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index d4766242b8460..f0abc1afc6ab0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1843,6 +1843,41 @@ def test_merge_empty(self, left_empty, how, exp): tm.assert_frame_equal(result, expected) + def test_merge_with_uintc_columns(self): + df1 = DataFrame({"a": ["foo", "bar"], "b": np.array([1, 2], dtype=np.uintc)}) + df2 = DataFrame({"a": ["foo", "baz"], "b": np.array([3, 4], dtype=np.uintc)}) + result = df1.merge(df2, how="outer") + expected = DataFrame( + { + "a": ["bar", "baz", "foo", "foo"], + "b": np.array([2, 4, 1, 3], dtype=np.uintc), + } + ) + tm.assert_frame_equal(result.reset_index(drop=True), expected) + + def test_merge_with_intc_columns(self): + df1 = DataFrame({"a": ["foo", "bar"], "b": np.array([1, 2], dtype=np.intc)}) + df2 = DataFrame({"a": ["foo", "baz"], "b": np.array([3, 4], dtype=np.intc)}) + result = df1.merge(df2, how="outer") + expected = DataFrame( + { + "a": ["bar", "baz", "foo", "foo"], + "b": np.array([2, 4, 1, 3], dtype=np.intc), + } + ) + tm.assert_frame_equal(result.reset_index(drop=True), expected) + + def test_merge_intc_non_monotonic(self): + df = DataFrame({"join_key": Series([0, 2, 1], dtype=np.intc)}) + df_details = DataFrame( + {"join_key": Series([0, 1, 2], dtype=np.intc), "value": ["a", "b", "c"]} + ) + merged = df.merge(df_details, on="join_key", how="left") + expected = DataFrame( + {"join_key": np.array([0, 2, 1], dtype=np.intc), "value": ["a", "c", "b"]} + ) + tm.assert_frame_equal(merged.reset_index(drop=True), expected) + @pytest.fixture def left():