Skip to content

Implement _most_ of the EA interface for DTA/TDA #23643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Nov 14, 2018
Prev Previous commit
Next Next commit
REF: Simplify concat
_concat._concat_datetimetz -> DatetimeIndex._concat_same_dtype ->
DatetimeArray._concat_same_type
  • Loading branch information
TomAugspurger committed Nov 14, 2018
commit eceebc768b40810b35dd40aa1fca785cbc2d9639
9 changes: 2 additions & 7 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,17 +239,12 @@ def take(self, indices, allow_fill=False, fill_value=None):

@classmethod
def _concat_same_type(cls, to_concat):
freqs = {x.freq for x in to_concat}
assert len(freqs) == 1
freq = list(freqs)[0]

# dtype captures tz for datetime64tz case
dtypes = {x.dtype for x in to_concat}
assert len(dtypes) == 1
dtype = list(dtypes)[0]

values = np.concatenate([x.asi8 for x in to_concat])
return cls(values, dtype=dtype, freq=freq)
return cls(values, dtype=dtype)

def copy(self, deep=False):
values = self.asi8
Expand All @@ -262,7 +257,7 @@ def _values_for_factorize(self):

@classmethod
def _from_factorized(cls, values, original):
return cls(values, dtype=original.dtype, freq=original.freq)
return cls(values, dtype=original.dtype)

# ------------------------------------------------------------------
# Null Handling
Expand Down
8 changes: 1 addition & 7 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,13 +476,7 @@ def _concat_datetimetz(to_concat, name=None):
all inputs must be DatetimeIndex
it is used in DatetimeIndex.append also
"""
# do not pass tz to set because tzlocal cannot be hashed
if len({str(x.dtype) for x in to_concat}) != 1:
raise ValueError('to_concat must have the same tz')
tz = to_concat[0].tz
# no need to localize because internal repr will not be changed
new_values = np.concatenate([x.asi8 for x in to_concat])
return to_concat[0]._simple_new(new_values, tz=tz, name=name)
return to_concat[0]._concat_same_dtype(to_concat, name=name)


def _concat_index_same_dtype(indexes, klass=None):
Expand Down
16 changes: 9 additions & 7 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype,
is_integer, is_integer_dtype, is_list_like, is_object_dtype,
is_period_dtype, is_scalar, is_string_dtype)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -690,17 +689,21 @@ def _concat_same_dtype(self, to_concat, name):
"""
attribs = self._get_attributes_dict()
attribs['name'] = name
# do not pass tz to set because tzlocal cannot be hashed
if len({str(x.dtype) for x in to_concat}) != 1:
raise ValueError('to_concat must have the same tz')

if not is_period_dtype(self):
# reset freq
attribs['freq'] = None

if getattr(self, 'tz', None) is not None:
return _concat._concat_datetimetz(to_concat, name)
# TODO(DatetimeArray)
# - remove the .asi8 here
# - remove the _maybe_box_as_values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like you can remove part of the comment

# - combine with the `else` block
new_data = self._concat_same_type(to_concat).asi8
else:
new_data = np.concatenate([c.asi8 for c in to_concat])
new_data = type(self._values)._concat_same_type(to_concat)

new_data = self._maybe_box_as_values(new_data, **attribs)
return self._simple_new(new_data, **attribs)

def _maybe_box_as_values(self, values, **attribs):
Expand All @@ -709,7 +712,6 @@ def _maybe_box_as_values(self, values, **attribs):
# but others are not. When everyone is an ExtensionArray, this can
# be removed. Currently used in
# - sort_values
# - _concat_same_dtype
return values

def astype(self, dtype, copy=True):
Expand Down
26 changes: 14 additions & 12 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,20 @@ def test_concat_same_type_invalid(self, datetime_index):
with pytest.raises(AssertionError):
arr._concat_same_type([arr, other])

def test_concat_same_type_different_freq(self):
# we *can* concatentate DTI with different freqs.
a = DatetimeArray(pd.date_range('2000', periods=2, freq='D',
tz='US/Central'))
b = DatetimeArray(pd.date_range('2000', periods=2, freq='H',
tz='US/Central'))
result = DatetimeArray._concat_same_type([a, b])
expected = DatetimeArray(pd.to_datetime([
'2000-01-01 00:00:00', '2000-01-02 00:00:00',
'2000-01-01 00:00:00', '2000-01-01 01:00:00',
]).tz_localize("US/Central"))

tm.assert_datetime_array_equal(result, expected)


class TestTimedeltaArray(SharedTests):
index_cls = pd.TimedeltaIndex
Expand Down Expand Up @@ -339,18 +353,6 @@ def test_take_fill_valid(self, timedelta_index):
# fill_value Period invalid
arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D'))

def test_concat_same_type_invalid(self, timedelta_index):
# different freqs
tdi = timedelta_index
arr = TimedeltaArray(tdi)
other = pd.timedelta_range('1D', periods=5, freq='2D')
# FIXME: TimedeltaArray should inherit freq='2D' without specifying it
other = TimedeltaArray(other, freq='2D')
assert other.freq != arr.freq

with pytest.raises(AssertionError):
arr._concat_same_type([arr, other])


class TestPeriodArray(SharedTests):
index_cls = pd.PeriodIndex
Expand Down