Skip to content

implement constructors for TimedeltaArray, DatetimeArray #21803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 8, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
implement constructors for TimedeltaArray, DatetimeArray
  • Loading branch information
jbrockmendel committed Jul 7, 2018
commit 1eaa1a8cfd4f1b1e6b3ea7afb08e2ca7a50e1f16
84 changes: 83 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,37 @@
from pandas.core.algorithms import checked_add_with_arr


class DatetimeLikeArrayMixin(object):
class AttributesMixin(object):

@property
def _attributes(self):
# Inheriting subclass should implement _attributes as a list of strings
from pandas.errors import AbstractMethodError
raise AbstractMethodError(self)

@classmethod
def _simple_new(cls, values, **kwargs):
from pandas.errors import AbstractMethodError
raise AbstractMethodError(cls)

def _get_attributes_dict(self):
"""return an attributes dict for my class"""
return {k: getattr(self, k, None) for k in self._attributes}

def _shallow_copy(self, values=None, **kwargs):
if values is None:
# Note: slightly different from Index implementation which defaults
# to self.values
values = self._ndarray_values

attributes = self._get_attributes_dict()
attributes.update(kwargs)
if not len(values) and 'dtype' not in kwargs:
attributes['dtype'] = self.dtype
return self._simple_new(values, **attributes)


class DatetimeLikeArrayMixin(AttributesMixin):
"""
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray

Expand Down Expand Up @@ -56,9 +86,61 @@ def asi8(self):
# do not cache or you'll create a memory leak
return self.values.view('i8')

# ------------------------------------------------------------------
# Array-like Methods

def __len__(self):
return len(self._data)

def __getitem__(self, key):
"""
This getitem defers to the underlying array, which by-definition can
only handle list-likes, slices, and integer scalars
"""

is_int = lib.is_integer(key)
if lib.is_scalar(key) and not is_int:
raise IndexError("only integers, slices (`:`), ellipsis (`...`), "
"numpy.newaxis (`None`) and integer or boolean "
"arrays are valid indices")

getitem = self._data.__getitem__
if is_int:
val = getitem(key)
return self._box_func(val)
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't need the else here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the existing __getitem__ moved verbatim. Can de-indent in the next pass.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks!

if com.is_bool_indexer(key):
key = np.asarray(key)
if key.all():
key = slice(0, None, None)
else:
key = lib.maybe_booleans_to_slice(key.view(np.uint8))

attribs = self._get_attributes_dict()

is_period = is_period_dtype(self)
if is_period:
freq = self.freq
else:
freq = None
if isinstance(key, slice):
if self.freq is not None and key.step is not None:
freq = key.step * self.freq
else:
freq = self.freq

attribs['freq'] = freq

result = getitem(key)
if result.ndim > 1:
# To support MPL which performs slicing with 2 dim
# even though it only has 1 dim by definition
if is_period:
return self._simple_new(result, **attribs)
return result

return self._simple_new(result, **attribs)

# ------------------------------------------------------------------
# Null Handling

Expand Down
224 changes: 222 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,21 @@

from pandas._libs import tslib
from pandas._libs.tslib import Timestamp, NaT, iNaT
from pandas._libs.tslibs import conversion, fields, timezones
from pandas._libs.tslibs import (
conversion, fields, timezones,
resolution as libresolution)

from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64tz_dtype
from pandas.core.dtypes.common import (
_NS_DTYPE,
is_datetime64tz_dtype,
is_datetime64_dtype,
_ensure_int64)
from pandas.core.dtypes.dtypes import DatetimeTZDtype

from pandas.tseries.frequencies import to_offset, DateOffset

from .datetimelike import DatetimeLikeArrayMixin


Expand Down Expand Up @@ -66,6 +74,50 @@ class DatetimeArrayMixin(DatetimeLikeArrayMixin):
'is_year_end', 'is_leap_year']
_object_ops = ['weekday_name', 'freq', 'tz']

# -----------------------------------------------------------------
# Constructors

_attributes = ["freq", "tz"]

@classmethod
def _simple_new(cls, values, freq=None, tz=None, **kwargs):
"""
we require the we have a dtype compat for the values
if we are passed a non-dtype compat, then coerce using the constructor
"""

if getattr(values, 'dtype', None) is None:
# empty, but with dtype compat
if values is None:
values = np.empty(0, dtype=_NS_DTYPE)
return cls(values, freq=freq, tz=tz, **kwargs)
values = np.array(values, copy=False)

if not is_datetime64_dtype(values):
values = _ensure_int64(values).view(_NS_DTYPE)

result = object.__new__(cls)
result._data = values
result._freq = freq
tz = timezones.maybe_get_tz(tz)
result._tz = timezones.tz_standardize(tz)
return result

def __new__(cls, values, freq=None, tz=None):
if (freq is not None and not isinstance(freq, DateOffset) and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this condition could actually be handled to by to_offset (future PR) (except for the not-None part)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah, I anticipate most of the existing constructors will be moved from the Index subclasses to the Array subclasses; for now I just need something that works for the cases that are called internally.

freq != 'infer'):
freq = to_offset(freq)

result = cls._simple_new(values, freq=freq, tz=tz)
if freq == 'infer':
inferred = result.inferred_freq
if inferred:
result.freq = to_offset(inferred)

# NB: Among other things not yet ported from the DatetimeIndex
# constructor, this does not call _deepcopy_if_needed
return result

# -----------------------------------------------------------------
# Descriptive Properties

Expand Down Expand Up @@ -116,6 +168,10 @@ def is_normalized(self):
"""
return conversion.is_date_array_normalized(self.asi8, self.tz)

@property # NB: override with cache_readonly in immutable subclasses
def _resolution(self):
return libresolution.resolution(self.asi8, self.tz)

# ----------------------------------------------------------------
# Array-like Methods

Expand Down Expand Up @@ -207,6 +263,170 @@ def _local_timestamps(self):
reverse.put(indexer, np.arange(n))
return result.take(reverse)

def tz_convert(self, tz):
"""
Convert tz-aware Datetime Array/Index from one time zone to another.

Parameters
----------
tz : string, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time. Corresponding timestamps would be converted
to this time zone of the Datetime Array/Index. A `tz` of None will
convert to UTC and remove the timezone information.

Returns
-------
normalized : same type as self

Raises
------
TypeError
If Datetime Array/Index is tz-naive.

See Also
--------
DatetimeIndex.tz : A timezone that has a variable offset from UTC
DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
given time zone, or remove timezone from a tz-aware DatetimeIndex.

Examples
--------
With the `tz` parameter, we can change the DatetimeIndex
to other time zones:

>>> dti = pd.DatetimeIndex(start='2014-08-01 09:00',
... freq='H', periods=3, tz='Europe/Berlin')

>>> dti
DatetimeIndex(['2014-08-01 09:00:00+02:00',
'2014-08-01 10:00:00+02:00',
'2014-08-01 11:00:00+02:00'],
dtype='datetime64[ns, Europe/Berlin]', freq='H')

>>> dti.tz_convert('US/Central')
DatetimeIndex(['2014-08-01 02:00:00-05:00',
'2014-08-01 03:00:00-05:00',
'2014-08-01 04:00:00-05:00'],
dtype='datetime64[ns, US/Central]', freq='H')

With the ``tz=None``, we can remove the timezone (after converting
to UTC if necessary):

>>> dti = pd.DatetimeIndex(start='2014-08-01 09:00',freq='H',
... periods=3, tz='Europe/Berlin')

>>> dti
DatetimeIndex(['2014-08-01 09:00:00+02:00',
'2014-08-01 10:00:00+02:00',
'2014-08-01 11:00:00+02:00'],
dtype='datetime64[ns, Europe/Berlin]', freq='H')

>>> dti.tz_convert(None)
DatetimeIndex(['2014-08-01 07:00:00',
'2014-08-01 08:00:00',
'2014-08-01 09:00:00'],
dtype='datetime64[ns]', freq='H')
"""
tz = timezones.maybe_get_tz(tz)

if self.tz is None:
# tz naive, use tz_localize
raise TypeError('Cannot convert tz-naive timestamps, use '
'tz_localize to localize')

# No conversion since timestamps are all UTC to begin with
return self._shallow_copy(tz=tz)

def tz_localize(self, tz, ambiguous='raise', errors='raise'):
"""
Localize tz-naive Datetime Array/Index to tz-aware
Datetime Array/Index.

This method takes a time zone (tz) naive Datetime Array/Index object
and makes this time zone aware. It does not move the time to another
time zone.
Time zone localization helps to switch from time zone aware to time
zone unaware objects.

Parameters
----------
tz : string, pytz.timezone, dateutil.tz.tzfile or None
Time zone to convert timestamps to. Passing ``None`` will
remove the time zone information preserving local time.
ambiguous : str {'infer', 'NaT', 'raise'} or bool array,
default 'raise'
- 'infer' will attempt to infer fall dst-transition hours based on
order
- bool-ndarray where True signifies a DST time, False signifies a
non-DST time (note that this flag is only applicable for
ambiguous times)
- 'NaT' will return NaT where there are ambiguous times
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
times
errors : {'raise', 'coerce'}, default 'raise'
- 'raise' will raise a NonExistentTimeError if a timestamp is not
valid in the specified time zone (e.g. due to a transition from
or to DST time)
- 'coerce' will return NaT if the timestamp can not be converted
to the specified time zone

.. versionadded:: 0.19.0

Returns
-------
result : same type as self
Array/Index converted to the specified time zone.

Raises
------
TypeError
If the Datetime Array/Index is tz-aware and tz is not None.

See Also
--------
DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
one time zone to another.

Examples
--------
>>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)
>>> tz_naive
DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
'2018-03-03 09:00:00'],
dtype='datetime64[ns]', freq='D')

Localize DatetimeIndex in US/Eastern time zone:

>>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')
>>> tz_aware
DatetimeIndex(['2018-03-01 09:00:00-05:00',
'2018-03-02 09:00:00-05:00',
'2018-03-03 09:00:00-05:00'],
dtype='datetime64[ns, US/Eastern]', freq='D')

With the ``tz=None``, we can remove the time zone information
while keeping the local time (not converted to UTC):

>>> tz_aware.tz_localize(None)
DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
'2018-03-03 09:00:00'],
dtype='datetime64[ns]', freq='D')
"""
if self.tz is not None:
if tz is None:
new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz)
else:
raise TypeError("Already tz-aware, use tz_convert to convert.")
else:
tz = timezones.maybe_get_tz(tz)
# Convert to UTC

new_dates = conversion.tz_localize_to_utc(self.asi8, tz,
ambiguous=ambiguous,
errors=errors)
new_dates = new_dates.view(_NS_DTYPE)
return self._shallow_copy(new_dates, tz=tz)

# ----------------------------------------------------------------
# Conversion Methods - Vectorized analogues of Timestamp methods

Expand Down
14 changes: 1 addition & 13 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,20 +113,8 @@ def freq(self, value):

_attributes = ["freq"]

def _get_attributes_dict(self):
"""return an attributes dict for my class"""
return {k: getattr(self, k, None) for k in self._attributes}

# TODO: share docstring?
def _shallow_copy(self, values=None, **kwargs):
if values is None:
values = self._ndarray_values
attributes = self._get_attributes_dict()
attributes.update(kwargs)
return self._simple_new(values, **attributes)

@classmethod
def _simple_new(cls, values, freq=None):
def _simple_new(cls, values, freq=None, **kwargs):
"""
Values can be any type that can be coerced to Periods.
Ordinals in an ndarray are fastpath-ed to `_from_ordinals`
Expand Down
Loading