Skip to content

Commit

Permalink
REF/API: DatetimeTZDtype (#23990)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger authored and jreback committed Dec 3, 2018
1 parent 08395af commit 3fe697f
Show file tree
Hide file tree
Showing 12 changed files with 152 additions and 146 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,7 @@ Deprecations
- :func:`pandas.types.is_period` is deprecated in favor of `pandas.types.is_period_dtype` (:issue:`23917`)
- :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`)
- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`)
- Passing a string alias like ``'datetime64[ns, UTC]'`` as the `unit` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`).

.. _whatsnew_0240.deprecations.datetimelike_int_ops:

Expand Down
25 changes: 15 additions & 10 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,16 +1174,21 @@ def validate_tz_from_dtype(dtype, tz):
ValueError : on tzinfo mismatch
"""
if dtype is not None:
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
except TypeError:
pass
if isinstance(dtype, compat.string_types):
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
except TypeError:
# Things like `datetime64[ns]`, which is OK for the
# constructors, but also nonsense, which should be validated
# but not by us. We *do* allow non-existent tz errors to
# go through
pass
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
return tz


Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
resolution as libresolution, timezones)
import pandas.compat as compat
from pandas.errors import PerformanceWarning
from pandas.util._decorators import Appender, cache_readonly
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import (
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
Expand Down Expand Up @@ -333,7 +333,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
def _box_func(self):
return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)

@cache_readonly
@property
def dtype(self):
if self.tz is None:
return _NS_DTYPE
Expand Down
32 changes: 0 additions & 32 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,38 +1767,6 @@ def is_complex_dtype(arr_or_dtype):
return issubclass(tipo, np.complexfloating)


def _coerce_to_dtype(dtype):
"""
Coerce a string or np.dtype to a pandas or numpy
dtype if possible.
If we cannot convert to a pandas dtype initially,
we convert to a numpy dtype.
Parameters
----------
dtype : The dtype that we want to coerce.
Returns
-------
pd_or_np_dtype : The coerced dtype.
"""

if is_categorical_dtype(dtype):
categories = getattr(dtype, 'categories', None)
ordered = getattr(dtype, 'ordered', False)
dtype = CategoricalDtype(categories=categories, ordered=ordered)
elif is_datetime64tz_dtype(dtype):
dtype = DatetimeTZDtype(dtype)
elif is_period_dtype(dtype):
dtype = PeriodDtype(dtype)
elif is_interval_dtype(dtype):
dtype = IntervalDtype(dtype)
else:
dtype = np.dtype(dtype)
return dtype


def _get_dtype(arr_or_dtype):
"""
Get the dtype instance associated with an array
Expand Down
133 changes: 81 additions & 52 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
""" define extension dtypes """

import re
import warnings

import numpy as np
import pytz

from pandas._libs.interval import Interval
from pandas._libs.tslibs import NaT, Period, Timestamp, timezones
Expand Down Expand Up @@ -491,64 +492,69 @@ class DatetimeTZDtype(PandasExtensionDtype):
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache = {}

def __new__(cls, unit=None, tz=None):
def __init__(self, unit="ns", tz=None):
"""
Create a new unit if needed, otherwise return from the cache
An ExtensionDtype for timezone-aware datetime data.
Parameters
----------
unit : string unit that this represents, currently must be 'ns'
tz : string tz that this represents
"""

if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz

elif unit is None:
# we are called as an empty constructor
# generally for pickle compat
return object.__new__(cls)
unit : str, default "ns"
The precision of the datetime data. Currently limited
to ``"ns"``.
tz : str, int, or datetime.tzinfo
The timezone.
elif tz is None:
Raises
------
pytz.UnknownTimeZoneError
When the requested timezone cannot be found.
# we were passed a string that we can construct
try:
m = cls._match.search(unit)
if m is not None:
unit = m.groupdict()['unit']
tz = timezones.maybe_get_tz(m.groupdict()['tz'])
except TypeError:
raise ValueError("could not construct DatetimeTZDtype")
Examples
--------
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC')
datetime64[ns, UTC]
elif isinstance(unit, compat.string_types):
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central')
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
"""
if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz

if unit != 'ns':
if unit != 'ns':
if isinstance(unit, compat.string_types) and tz is None:
# maybe a string like datetime64[ns, tz], which we support for
# now.
result = type(self).construct_from_string(unit)
unit = result.unit
tz = result.tz
msg = (
"Passing a dtype alias like 'datetime64[ns, {tz}]' "
"to DatetimeTZDtype is deprecated. Use "
"'DatetimeTZDtype.construct_from_string()' instead."
)
warnings.warn(msg.format(tz=tz), FutureWarning, stacklevel=2)
else:
raise ValueError("DatetimeTZDtype only supports ns units")

unit = unit
tz = tz
if tz:
tz = timezones.maybe_get_tz(tz)
elif tz is not None:
raise pytz.UnknownTimeZoneError(tz)
elif tz is None:
raise TypeError("A 'tz' is required.")

if tz is None:
raise ValueError("DatetimeTZDtype constructor must have a tz "
"supplied")
self._unit = unit
self._tz = tz

# hash with the actual tz if we can
# some cannot be hashed, so stringfy
try:
key = (unit, tz)
hash(key)
except TypeError:
key = (unit, str(tz))
@property
def unit(self):
"""The precision of the datetime data."""
return self._unit

# set/retrieve from cache
try:
return cls._cache[key]
except KeyError:
u = object.__new__(cls)
u.unit = unit
u.tz = tz
cls._cache[key] = u
return u
@property
def tz(self):
"""The timezone."""
return self._tz

@classmethod
def construct_array_type(cls):
Expand All @@ -565,24 +571,42 @@ def construct_array_type(cls):
@classmethod
def construct_from_string(cls, string):
"""
attempt to construct this type from a string, raise a TypeError if
it's not possible
Construct a DatetimeTZDtype from a string.
Parameters
----------
string : str
The string alias for this DatetimeTZDtype.
Should be formatted like ``datetime64[ns, <tz>]``,
where ``<tz>`` is the timezone name.
Examples
--------
>>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]')
datetime64[ns, UTC]
"""
msg = "Could not construct DatetimeTZDtype from '{}'"
try:
return cls(unit=string)
except ValueError:
raise TypeError("could not construct DatetimeTZDtype")
match = cls._match.match(string)
if match:
d = match.groupdict()
return cls(unit=d['unit'], tz=d['tz'])
except Exception:
# TODO(py3): Change this pass to `raise TypeError(msg) from e`
pass
raise TypeError(msg.format(string))

def __unicode__(self):
# format the tz
return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz)

@property
def name(self):
"""A string representation of the dtype."""
return str(self)

def __hash__(self):
# make myself hashable
# TODO: update this.
return hash(str(self))

def __eq__(self, other):
Expand All @@ -593,6 +617,11 @@ def __eq__(self, other):
self.unit == other.unit and
str(self.tz) == str(other.tz))

def __setstate__(self, state):
# for pickle compat.
self._tz = state['tz']
self._unit = state['unit']


class PeriodDtype(ExtensionDtype, PandasExtensionDtype):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def _isna_ndarraylike(obj):
vec = libmissing.isnaobj(values.ravel())
result[...] = vec.reshape(shape)

elif needs_i8_conversion(obj):
elif needs_i8_conversion(dtype):
# this is the NaT pattern
result = values.view('i8') == iNaT
else:
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2669,11 +2669,10 @@ def _astype(self, dtype, **kwargs):
these automatically copy, so copy=True has no effect
raise on an except if raise == True
"""
dtype = pandas_dtype(dtype)

# if we are passed a datetime64[ns, tz]
if is_datetime64tz_dtype(dtype):
dtype = DatetimeTZDtype(dtype)

values = self.values
if getattr(values, 'tz', None) is None:
values = DatetimeIndex(values).tz_localize('UTC')
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def test_numpy_string_dtype(self):
'datetime64[ns, Asia/Tokyo]',
'datetime64[ns, UTC]'])
def test_datetimetz_dtype(self, dtype):
assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype)
assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype)
assert (com.pandas_dtype(dtype) ==
DatetimeTZDtype.construct_from_string(dtype))
assert com.pandas_dtype(dtype) == dtype

def test_categorical_dtype(self):
Expand Down
Loading

0 comments on commit 3fe697f

Please sign in to comment.