implement constructors for TimedeltaArray, DatetimeArray

pandas-dev · jreback · Jul 8, 2018 · Jul 7, 2018 · Jul 7, 2018 · Jul 7, 2018
commit 1eaa1a8cfd4f1b1e6b3ea7afb08e2ca7a50e1f16
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -18,7 +18,37 @@
 from pandas.core.algorithms import checked_add_with_arr
 
 
-class DatetimeLikeArrayMixin(object):
+class AttributesMixin(object):
+
+    @property
+    def _attributes(self):
+        # Inheriting subclass should implement _attributes as a list of strings
+        from pandas.errors import AbstractMethodError
+        raise AbstractMethodError(self)
+
+    @classmethod
+    def _simple_new(cls, values, **kwargs):
+        from pandas.errors import AbstractMethodError
+        raise AbstractMethodError(cls)
+
+    def _get_attributes_dict(self):
+        """return an attributes dict for my class"""
+        return {k: getattr(self, k, None) for k in self._attributes}
+
+    def _shallow_copy(self, values=None, **kwargs):
+        if values is None:
+            # Note: slightly different from Index implementation which defaults
+            # to self.values
+            values = self._ndarray_values
+
+        attributes = self._get_attributes_dict()
+        attributes.update(kwargs)
+        if not len(values) and 'dtype' not in kwargs:
+            attributes['dtype'] = self.dtype
+        return self._simple_new(values, **attributes)
+
+
+class DatetimeLikeArrayMixin(AttributesMixin):
     """
     Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
 
@@ -56,9 +86,61 @@ def asi8(self):
         # do not cache or you'll create a memory leak
         return self.values.view('i8')
 
+    # ------------------------------------------------------------------
+    # Array-like Methods
+
     def __len__(self):
         return len(self._data)
 
+    def __getitem__(self, key):
+        """
+        This getitem defers to the underlying array, which by-definition can
+        only handle list-likes, slices, and integer scalars
+        """
+
+        is_int = lib.is_integer(key)
+        if lib.is_scalar(key) and not is_int:
+            raise IndexError("only integers, slices (`:`), ellipsis (`...`), "
+                             "numpy.newaxis (`None`) and integer or boolean "
+                             "arrays are valid indices")
+
+        getitem = self._data.__getitem__
+        if is_int:
+            val = getitem(key)
+            return self._box_func(val)
+        else:
+            if com.is_bool_indexer(key):
+                key = np.asarray(key)
+                if key.all():
+                    key = slice(0, None, None)
+                else:
+                    key = lib.maybe_booleans_to_slice(key.view(np.uint8))
+
+            attribs = self._get_attributes_dict()
+
+            is_period = is_period_dtype(self)
+            if is_period:
+                freq = self.freq
+            else:
+                freq = None
+                if isinstance(key, slice):
+                    if self.freq is not None and key.step is not None:
+                        freq = key.step * self.freq
+                    else:
+                        freq = self.freq
+
+            attribs['freq'] = freq
+
+            result = getitem(key)
+            if result.ndim > 1:
+                # To support MPL which performs slicing with 2 dim
+                # even though it only has 1 dim by definition
+                if is_period:
+                    return self._simple_new(result, **attribs)
+                return result
+
+            return self._simple_new(result, **attribs)
+
     # ------------------------------------------------------------------
     # Null Handling
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -6,13 +6,21 @@
 
 from pandas._libs import tslib
 from pandas._libs.tslib import Timestamp, NaT, iNaT
-from pandas._libs.tslibs import conversion, fields, timezones
+from pandas._libs.tslibs import (
+    conversion, fields, timezones,
+    resolution as libresolution)
 
 from pandas.util._decorators import cache_readonly
 
-from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64tz_dtype
+from pandas.core.dtypes.common import (
+    _NS_DTYPE,
+    is_datetime64tz_dtype,
+    is_datetime64_dtype,
+    _ensure_int64)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 
+from pandas.tseries.frequencies import to_offset, DateOffset
+
 from .datetimelike import DatetimeLikeArrayMixin
 
 
@@ -66,6 +74,50 @@ class DatetimeArrayMixin(DatetimeLikeArrayMixin):
                  'is_year_end', 'is_leap_year']
     _object_ops = ['weekday_name', 'freq', 'tz']
 
+    # -----------------------------------------------------------------
+    # Constructors
+
+    _attributes = ["freq", "tz"]
+
+    @classmethod
+    def _simple_new(cls, values, freq=None, tz=None, **kwargs):
+        """
+        we require the we have a dtype compat for the values
+        if we are passed a non-dtype compat, then coerce using the constructor
+        """
+
+        if getattr(values, 'dtype', None) is None:
+            # empty, but with dtype compat
+            if values is None:
+                values = np.empty(0, dtype=_NS_DTYPE)
+                return cls(values, freq=freq, tz=tz, **kwargs)
+            values = np.array(values, copy=False)
+
+        if not is_datetime64_dtype(values):
+            values = _ensure_int64(values).view(_NS_DTYPE)
+
+        result = object.__new__(cls)
+        result._data = values
+        result._freq = freq
+        tz = timezones.maybe_get_tz(tz)
+        result._tz = timezones.tz_standardize(tz)
+        return result
+
+    def __new__(cls, values, freq=None, tz=None):
+        if (freq is not None and not isinstance(freq, DateOffset) and
+                freq != 'infer'):
+            freq = to_offset(freq)
+
+        result = cls._simple_new(values, freq=freq, tz=tz)
+        if freq == 'infer':
+            inferred = result.inferred_freq
+            if inferred:
+                result.freq = to_offset(inferred)
+
+        # NB: Among other things not yet ported from the DatetimeIndex
+        # constructor, this does not call _deepcopy_if_needed
+        return result
+
     # -----------------------------------------------------------------
     # Descriptive Properties
 
@@ -116,6 +168,10 @@ def is_normalized(self):
         """
         return conversion.is_date_array_normalized(self.asi8, self.tz)
 
+    @property  # NB: override with cache_readonly in immutable subclasses
+    def _resolution(self):
+        return libresolution.resolution(self.asi8, self.tz)
+
     # ----------------------------------------------------------------
     # Array-like Methods
 
@@ -207,6 +263,170 @@ def _local_timestamps(self):
         reverse.put(indexer, np.arange(n))
         return result.take(reverse)
 
+    def tz_convert(self, tz):
+        """
+        Convert tz-aware Datetime Array/Index from one time zone to another.
+
+        Parameters
+        ----------
+        tz : string, pytz.timezone, dateutil.tz.tzfile or None
+            Time zone for time. Corresponding timestamps would be converted
+            to this time zone of the Datetime Array/Index. A `tz` of None will
+            convert to UTC and remove the timezone information.
+
+        Returns
+        -------
+        normalized : same type as self
+
+        Raises
+        ------
+        TypeError
+            If Datetime Array/Index is tz-naive.
+
+        See Also
+        --------
+        DatetimeIndex.tz : A timezone that has a variable offset from UTC
+        DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
+            given time zone, or remove timezone from a tz-aware DatetimeIndex.
+
+        Examples
+        --------
+        With the `tz` parameter, we can change the DatetimeIndex
+        to other time zones:
+
+        >>> dti = pd.DatetimeIndex(start='2014-08-01 09:00',
+        ...                        freq='H', periods=3, tz='Europe/Berlin')
+
+        >>> dti
+        DatetimeIndex(['2014-08-01 09:00:00+02:00',
+                       '2014-08-01 10:00:00+02:00',
+                       '2014-08-01 11:00:00+02:00'],
+                      dtype='datetime64[ns, Europe/Berlin]', freq='H')
+
+        >>> dti.tz_convert('US/Central')
+        DatetimeIndex(['2014-08-01 02:00:00-05:00',
+                       '2014-08-01 03:00:00-05:00',
+                       '2014-08-01 04:00:00-05:00'],
+                      dtype='datetime64[ns, US/Central]', freq='H')
+
+        With the ``tz=None``, we can remove the timezone (after converting
+        to UTC if necessary):
+
+        >>> dti = pd.DatetimeIndex(start='2014-08-01 09:00',freq='H',
+        ...                        periods=3, tz='Europe/Berlin')
+
+        >>> dti
+        DatetimeIndex(['2014-08-01 09:00:00+02:00',
+                       '2014-08-01 10:00:00+02:00',
+                       '2014-08-01 11:00:00+02:00'],
+                        dtype='datetime64[ns, Europe/Berlin]', freq='H')
+
+        >>> dti.tz_convert(None)
+        DatetimeIndex(['2014-08-01 07:00:00',
+                       '2014-08-01 08:00:00',
+                       '2014-08-01 09:00:00'],
+                        dtype='datetime64[ns]', freq='H')
+        """
+        tz = timezones.maybe_get_tz(tz)
+
+        if self.tz is None:
+            # tz naive, use tz_localize
+            raise TypeError('Cannot convert tz-naive timestamps, use '
+                            'tz_localize to localize')
+
+        # No conversion since timestamps are all UTC to begin with
+        return self._shallow_copy(tz=tz)
+
+    def tz_localize(self, tz, ambiguous='raise', errors='raise'):
+        """
+        Localize tz-naive Datetime Array/Index to tz-aware
+        Datetime Array/Index.
+
+        This method takes a time zone (tz) naive Datetime Array/Index object
+        and makes this time zone aware. It does not move the time to another
+        time zone.
+        Time zone localization helps to switch from time zone aware to time
+        zone unaware objects.
+
+        Parameters
+        ----------
+        tz : string, pytz.timezone, dateutil.tz.tzfile or None
+            Time zone to convert timestamps to. Passing ``None`` will
+            remove the time zone information preserving local time.
+        ambiguous : str {'infer', 'NaT', 'raise'} or bool array,
+            default 'raise'
+            - 'infer' will attempt to infer fall dst-transition hours based on
+              order
+            - bool-ndarray where True signifies a DST time, False signifies a
+              non-DST time (note that this flag is only applicable for
+              ambiguous times)
+            - 'NaT' will return NaT where there are ambiguous times
+            - 'raise' will raise an AmbiguousTimeError if there are ambiguous
+              times
+        errors : {'raise', 'coerce'}, default 'raise'
+            - 'raise' will raise a NonExistentTimeError if a timestamp is not
+               valid in the specified time zone (e.g. due to a transition from
+               or to DST time)
+            - 'coerce' will return NaT if the timestamp can not be converted
+              to the specified time zone
+
+            .. versionadded:: 0.19.0
+
+        Returns
+        -------
+        result : same type as self
+            Array/Index converted to the specified time zone.
+
+        Raises
+        ------
+        TypeError
+            If the Datetime Array/Index is tz-aware and tz is not None.
+
+        See Also
+        --------
+        DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
+            one time zone to another.
+
+        Examples
+        --------
+        >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)
+        >>> tz_naive
+        DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
+                       '2018-03-03 09:00:00'],
+                      dtype='datetime64[ns]', freq='D')
+
+        Localize DatetimeIndex in US/Eastern time zone:
+
+        >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')
+        >>> tz_aware
+        DatetimeIndex(['2018-03-01 09:00:00-05:00',
+                       '2018-03-02 09:00:00-05:00',
+                       '2018-03-03 09:00:00-05:00'],
+                      dtype='datetime64[ns, US/Eastern]', freq='D')
+
+        With the ``tz=None``, we can remove the time zone information
+        while keeping the local time (not converted to UTC):
+
+        >>> tz_aware.tz_localize(None)
+        DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
+                       '2018-03-03 09:00:00'],
+                      dtype='datetime64[ns]', freq='D')
+        """
+        if self.tz is not None:
+            if tz is None:
+                new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz)
+            else:
+                raise TypeError("Already tz-aware, use tz_convert to convert.")
+        else:
+            tz = timezones.maybe_get_tz(tz)
+            # Convert to UTC
+
+            new_dates = conversion.tz_localize_to_utc(self.asi8, tz,
+                                                      ambiguous=ambiguous,
+                                                      errors=errors)
+        new_dates = new_dates.view(_NS_DTYPE)
+        return self._shallow_copy(new_dates, tz=tz)
+
     # ----------------------------------------------------------------
     # Conversion Methods - Vectorized analogues of Timestamp methods
 

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -113,20 +113,8 @@ def freq(self, value):
 
     _attributes = ["freq"]
 
-    def _get_attributes_dict(self):
-        """return an attributes dict for my class"""
-        return {k: getattr(self, k, None) for k in self._attributes}
-
-    # TODO: share docstring?
-    def _shallow_copy(self, values=None, **kwargs):
-        if values is None:
-            values = self._ndarray_values
-        attributes = self._get_attributes_dict()
-        attributes.update(kwargs)
-        return self._simple_new(values, **attributes)
-
     @classmethod
-    def _simple_new(cls, values, freq=None):
+    def _simple_new(cls, values, freq=None, **kwargs):
         """
         Values can be any type that can be coerced to Periods.
         Ordinals in an ndarray are fastpath-ed to `_from_ordinals`