Skip to content

Commit

Permalink
Mix EA into DTA/TDA; part of 24024 (#24502)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Dec 30, 2018
1 parent fce9ccf commit 0324465
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 27 deletions.
53 changes: 46 additions & 7 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@
from pandas.core.dtypes.missing import isna

from pandas.core import nanops
from pandas.core.algorithms import checked_add_with_arr, take, unique1d
from pandas.core.algorithms import (
checked_add_with_arr, take, unique1d, value_counts)
import pandas.core.common as com

from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick

from .base import ExtensionOpsMixin
from .base import ExtensionArray, ExtensionOpsMixin


def _make_comparison_op(cls, op):
Expand Down Expand Up @@ -343,7 +344,9 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)


class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin):
class DatetimeLikeArrayMixin(ExtensionOpsMixin,
AttributesMixin,
ExtensionArray):
"""
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
Expand Down Expand Up @@ -701,6 +704,43 @@ def repeat(self, repeats, *args, **kwargs):
values = self._data.repeat(repeats)
return type(self)(values.view('i8'), dtype=self.dtype)

def value_counts(self, dropna=False):
"""
Return a Series containing counts of unique values.
Parameters
----------
dropna : boolean, default True
Don't include counts of NaT values.
Returns
-------
Series
"""
from pandas import Series, Index

if dropna:
values = self[~self.isna()]._data
else:
values = self._data

cls = type(self)

result = value_counts(values, sort=False, dropna=dropna)
index = Index(cls(result.index.view('i8'), dtype=self.dtype),
name=result.index.name)
return Series(result.values, index=index, name=result.name)

def map(self, mapper):
# TODO(GH-23179): Add ExtensionArray.map
# Need to figure out if we want ExtensionArray.map first.
# If so, then we can refactor IndexOpsMixin._map_values to
# a standalone function and call from here..
# Else, just rewrite _map_infer_values to do the right thing.
from pandas import Index

return Index(self).map(mapper).array

# ------------------------------------------------------------------
# Null Handling

Expand Down Expand Up @@ -1357,10 +1397,9 @@ def _reduce(self, name, axis=0, skipna=True, **kwargs):
if op:
return op(axis=axis, skipna=skipna, **kwargs)
else:
raise TypeError("cannot perform {name} with type {dtype}"
.format(name=name, dtype=self.dtype))
# TODO: use super(DatetimeLikeArrayMixin, self)._reduce
# after we subclass ExtensionArray
return super(DatetimeLikeArrayMixin, self)._reduce(
name, skipna, **kwargs
)

def min(self, axis=None, skipna=True, *args, **kwargs):
"""
Expand Down
21 changes: 2 additions & 19 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pandas.core.dtypes.missing import isna, notna

import pandas.core.algorithms as algos
from pandas.core.arrays import ExtensionArray, datetimelike as dtl
from pandas.core.arrays import datetimelike as dtl
import pandas.core.common as com
from pandas.core.missing import backfill_1d, pad_1d

Expand Down Expand Up @@ -92,9 +92,7 @@ def wrapper(self, other):
return compat.set_function_name(wrapper, opname, cls)


class PeriodArray(dtl.DatetimeLikeArrayMixin,
dtl.DatelikeOps,
ExtensionArray):
class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
"""
Pandas ExtensionArray for storing Period data.
Expand Down Expand Up @@ -418,21 +416,6 @@ def fillna(self, value=None, method=None, limit=None):
new_values = self.copy()
return new_values

def value_counts(self, dropna=False):
from pandas import Series, PeriodIndex

if dropna:
values = self[~self.isna()]._data
else:
values = self._data

cls = type(self)

result = algos.value_counts(values, sort=False)
index = PeriodIndex(cls(result.index, freq=self.freq),
name=result.index.name)
return Series(result.values, index=index, name=result.name)

# --------------------------------------------------------------------

def _time_shift(self, n, freq=None):
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from pandas.core.dtypes.common import (
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_int64_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
Expand Down Expand Up @@ -244,6 +244,16 @@ def _maybe_clear_freq(self):
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __array__(self, dtype=None):
# TODO(https://github.com/pandas-dev/pandas/pull/23593)
# Maybe push to parent once datetimetz __array__ is figured out.
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
elif is_int64_dtype(dtype):
return self.asi8

return self._data

@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
def _validate_fill_value(self, fill_value):
if isna(fill_value):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,21 @@ def test_repeat_preserves_tz(self):
expected = DatetimeArray(arr.asi8, freq=None, tz=arr.tz)
tm.assert_equal(repeated, expected)

def test_value_counts_preserves_tz(self):
dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
arr = DatetimeArray(dti).repeat([4, 3])

result = arr.value_counts()

# Note: not tm.assert_index_equal, since `freq`s do not match
assert result.index.equals(dti)

arr[-2] = pd.NaT
result = arr.value_counts()
expected = pd.Series([1, 4, 2],
index=[pd.NaT, dti[0], dti[1]])
tm.assert_series_equal(result, expected)


class TestSequenceToDT64NS(object):

Expand Down

0 comments on commit 0324465

Please sign in to comment.