Skip to content

Commit

Permalink
BUG: sefault in concat of CategoricalIndex (#16133)
Browse files Browse the repository at this point in the history
* BUG: sefault in concat of cat-idx

* lint
  • Loading branch information
chris-b1 authored Apr 25, 2017
1 parent 8d122e6 commit 186957e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1629,6 +1629,7 @@ Indexing
- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
- Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`)
- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`)
- Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`)

I/O
^^^
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
is_scalar)
from pandas.core.common import _asarray_tuplesafe
from pandas.core.dtypes.missing import array_equivalent
from pandas.core.algorithms import take_1d


from pandas.util.decorators import Appender, cache_readonly
Expand Down Expand Up @@ -470,8 +471,10 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
codes = target.codes
else:
if isinstance(target, CategoricalIndex):
target = target.categories
codes = self.categories.get_indexer(target)
code_indexer = self.categories.get_indexer(target.categories)
codes = take_1d(code_indexer, target.codes, fill_value=-1)
else:
codes = self.categories.get_indexer(target)

indexer, _ = self._engine.get_indexer_non_unique(codes)

Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1928,6 +1928,27 @@ def test_concat_multiindex_dfs_with_deepcopy(self):
result_no_copy = pd.concat(example_dict, names=['testname'])
tm.assert_frame_equal(result_no_copy, expected)

def test_concat_categoricalindex(self):
# GH 16111, categories that aren't lexsorted
categories = [9, 0, 1, 2, 3]

a = pd.Series(1, index=pd.CategoricalIndex([9, 0],
categories=categories))
b = pd.Series(2, index=pd.CategoricalIndex([0, 1],
categories=categories))
c = pd.Series(3, index=pd.CategoricalIndex([1, 2],
categories=categories))

result = pd.concat([a, b, c], axis=1)

exp_idx = pd.CategoricalIndex([0, 1, 2, 9])
exp = pd.DataFrame({0: [1, np.nan, np.nan, 1],
1: [2, 2, np.nan, np.nan],
2: [np.nan, 3, 3, np.nan]},
columns=[0, 1, 2],
index=exp_idx)
tm.assert_frame_equal(result, exp)


@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
@pytest.mark.parametrize('dt', np.sctypes['float'])
Expand Down

0 comments on commit 186957e

Please sign in to comment.