Skip to content

Commit

Permalink
COMPAT: Categorical Subclassing
Browse files Browse the repository at this point in the history
xref #8640

Author: sinhrks <sinhrks@gmail.com>

Closes #13827 from sinhrks/categorical_subclass and squashes the following commits:

13c456c [sinhrks] COMPAT: Categorical Subclassing
  • Loading branch information
sinhrks authored and jreback committed Aug 4, 2016
1 parent 8ec7406 commit 61b14b2
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 30 deletions.
62 changes: 33 additions & 29 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,11 +328,16 @@ def __init__(self, values, categories=None, ordered=False,
self._categories = categories
self._codes = _coerce_indexer_dtype(codes, categories)

@property
def _constructor(self):
return Categorical

def copy(self):
""" Copy constructor. """
return Categorical(values=self._codes.copy(),
categories=self.categories, ordered=self.ordered,
fastpath=True)
return self._constructor(values=self._codes.copy(),
categories=self.categories,
ordered=self.ordered,
fastpath=True)

def astype(self, dtype, copy=True):
"""
Expand Down Expand Up @@ -414,7 +419,7 @@ def from_array(cls, data, **kwargs):
Can be an Index or array-like. The categories are assumed to be
the unique values of `data`.
"""
return Categorical(data, **kwargs)
return cls(data, **kwargs)

@classmethod
def from_codes(cls, codes, categories, ordered=False, name=None):
Expand Down Expand Up @@ -458,8 +463,8 @@ def from_codes(cls, codes, categories, ordered=False, name=None):
raise ValueError("codes need to be between -1 and "
"len(categories)-1")

return Categorical(codes, categories=categories, ordered=ordered,
fastpath=True)
return cls(codes, categories=categories, ordered=ordered,
fastpath=True)

_codes = None

Expand Down Expand Up @@ -916,9 +921,9 @@ def map(self, mapper):
"""
new_categories = self.categories.map(mapper)
try:
return Categorical.from_codes(self._codes.copy(),
categories=new_categories,
ordered=self.ordered)
return self.from_codes(self._codes.copy(),
categories=new_categories,
ordered=self.ordered)
except ValueError:
return np.take(new_categories, self._codes)

Expand Down Expand Up @@ -968,8 +973,8 @@ def shift(self, periods):
else:
codes[periods:] = -1

return Categorical.from_codes(codes, categories=self.categories,
ordered=self.ordered)
return self.from_codes(codes, categories=self.categories,
ordered=self.ordered)

def __array__(self, dtype=None):
"""
Expand Down Expand Up @@ -1159,8 +1164,8 @@ def value_counts(self, dropna=True):
count = bincount(np.where(mask, code, ncat))
ix = np.append(ix, -1)

ix = Categorical(ix, categories=cat, ordered=obj.ordered,
fastpath=True)
ix = self._constructor(ix, categories=cat, ordered=obj.ordered,
fastpath=True)

return Series(count, index=CategoricalIndex(ix), dtype='int64')

Expand Down Expand Up @@ -1313,8 +1318,8 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'):
self._codes = codes
return
else:
return Categorical(values=codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
return self._constructor(values=codes, categories=self.categories,
ordered=self.ordered, fastpath=True)

def order(self, inplace=False, ascending=True, na_position='last'):
"""
Expand Down Expand Up @@ -1441,8 +1446,8 @@ def fillna(self, value=None, method=None, limit=None):
values = values.copy()
values[mask] = self.categories.get_loc(value)

return Categorical(values, categories=self.categories,
ordered=self.ordered, fastpath=True)
return self._constructor(values, categories=self.categories,
ordered=self.ordered, fastpath=True)

def take_nd(self, indexer, allow_fill=True, fill_value=None):
""" Take the codes by the indexer, fill with the fill_value.
Expand All @@ -1455,8 +1460,8 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None):
assert isnull(fill_value)

codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
result = Categorical(codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
result = self._constructor(codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
return result

take = take_nd
Expand All @@ -1476,8 +1481,8 @@ def _slice(self, slicer):
slicer = slicer[1]

_codes = self._codes[slicer]
return Categorical(values=_codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
return self._constructor(values=_codes, categories=self.categories,
ordered=self.ordered, fastpath=True)

def __len__(self):
"""The length of this Categorical."""
Expand Down Expand Up @@ -1588,10 +1593,9 @@ def __getitem__(self, key):
else:
return self.categories[i]
else:
return Categorical(values=self._codes[key],
categories=self.categories,
ordered=self.ordered,
fastpath=True)
return self._constructor(values=self._codes[key],
categories=self.categories,
ordered=self.ordered, fastpath=True)

def __setitem__(self, key, value):
""" Item assignment.
Expand Down Expand Up @@ -1742,8 +1746,8 @@ def mode(self):
import pandas.hashtable as htable
good = self._codes != -1
values = sorted(htable.mode_int64(_ensure_int64(self._codes[good])))
result = Categorical(values=values, categories=self.categories,
ordered=self.ordered, fastpath=True)
result = self._constructor(values=values, categories=self.categories,
ordered=self.ordered, fastpath=True)
return result

def unique(self):
Expand Down Expand Up @@ -1837,8 +1841,8 @@ def repeat(self, repeats, *args, **kwargs):
"""
nv.validate_repeat(args, kwargs)
codes = self._codes.repeat(repeats)
return Categorical(values=codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
return self._constructor(values=codes, categories=self.categories,
ordered=self.ordered, fastpath=True)

# The Series.cat accessor

Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -4415,6 +4415,36 @@ def test_concat_categorical(self):
tm.assert_frame_equal(df_expected, df_concat)


class TestCategoricalSubclassing(tm.TestCase):

_multiprocess_can_split_ = True

def test_constructor(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
self.assertIsInstance(sc, tm.SubclassedCategorical)
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))

def test_from_array(self):
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
self.assertIsInstance(sc, tm.SubclassedCategorical)
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
tm.assert_categorical_equal(sc, exp)

def test_map(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
res = sc.map(lambda x: x.upper())
self.assertIsInstance(res, tm.SubclassedCategorical)
exp = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(res, exp)

def test_map(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
res = sc.map(lambda x: x.upper())
self.assertIsInstance(res, tm.SubclassedCategorical)
exp = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(res, exp)


if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
9 changes: 8 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

from pandas.computation import expressions as expr

from pandas import (bdate_range, CategoricalIndex, DatetimeIndex,
from pandas import (bdate_range, CategoricalIndex, Categorical, DatetimeIndex,
TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex,
Series, DataFrame, Panel, Panel4D)
from pandas.util.decorators import deprecate
Expand Down Expand Up @@ -2670,6 +2670,13 @@ def _constructor_sliced(self):
return SubclassedSparseSeries


class SubclassedCategorical(Categorical):

@property
def _constructor(self):
return SubclassedCategorical


@contextmanager
def patch(ob, attr, value):
"""Temporarily patch an attribute of an object.
Expand Down

0 comments on commit 61b14b2

Please sign in to comment.