Skip to content

BUG: fix in categorical merges #32079

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Feb 27, 2020
Next Next commit
add int8_t and int16_t to join_t, make sure correct index is returned…
… for categorical joins
  • Loading branch information
Marco Gorelli committed Feb 18, 2020
commit 8c8f752de522b7df3c8ed5bf5a11167df39b3117
2 changes: 2 additions & 0 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ ctypedef fused join_t:
float64_t
float32_t
object
int8_t
int16_t
int32_t
int64_t
uint64_t
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3822,6 +3822,9 @@ def _join_monotonic(self, other, how="left", return_indexers=False):
join_index, lidx, ridx = self._outer_indexer(sv, ov)
join_index = self._wrap_joined_index(join_index, other)

if self._typ == "categoricalindex":
join_index = self._create_from_codes(join_index)

if return_indexers:
lidx = None if lidx is None else ensure_platform_int(lidx)
ridx = None if ridx is None else ensure_platform_int(ridx)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2163,3 +2163,17 @@ def test_merge_datetime_upcast_dtype():
}
)
tm.assert_frame_equal(result, expected)


def test_categorical_non_unique_monotonic():
# GH 28189
df = DataFrame(range(4), columns=["value"], index=CategoricalIndex(["1"] * 4))
df2 = DataFrame([[6]], columns=["value"], index=CategoricalIndex(["1"]))

result = merge(df, df2, how="left", left_index=True, right_index=True)
expected = DataFrame(
[[0, 6], [1, 6], [2, 6], [3, 6]],
columns=["value_x", "value_y"],
index=CategoricalIndex(["1"] * 4),
)
tm.assert_frame_equal(expected, result)