Skip to content

REF: put EA concat logic in _concat_arrays #33535

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Prev Previous commit
Next Next commit
REF: implement _concat_arrays
  • Loading branch information
jbrockmendel committed Apr 13, 2020
commit 490c3335e9da5647a7286814957cf6d2b09e3d49
29 changes: 27 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like,
Expand Down Expand Up @@ -723,7 +724,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
return type(self)(new_values, dtype=self.dtype)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(cls, to_concat, axis: int = 0):

# do not pass tz to set because tzlocal cannot be hashed
dtypes = {str(x.dtype) for x in to_concat}
Expand All @@ -733,7 +734,7 @@ def _concat_same_type(cls, to_concat):
obj = to_concat[0]
dtype = obj.dtype

values = np.concatenate([x.asi8 for x in to_concat])
values = np.concatenate([x.asi8 for x in to_concat], axis=axis)

if is_period_dtype(to_concat[0].dtype):
new_freq = obj.freq
Expand All @@ -750,6 +751,30 @@ def _concat_same_type(cls, to_concat):

return cls._simple_new(values, dtype=dtype, freq=new_freq)

@classmethod
def _concat_arrays(cls, to_concat, axis: int = 0):
from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array

to_concat = [maybe_upcast_datetimelike_array(x) for x in to_concat]

if len({x.dtype for x in to_concat}) == 1:
if axis == 1 and is_extension_array_dtype(to_concat[0].dtype):
# TODO(EA2D): not necessary with 2D EAs
axis = 0

result = cls._concat_same_type(to_concat, axis=axis)

if axis == 1 and result.ndim == 1:
# TODO(EA2D): not necessary with 2D EAs
result = result.reshape(1, -1)
return result

to_concat = [x.astype(object) for x in to_concat]
if axis == 1:
# TODO(EA2D): not necessary with 2D EAs
to_concat = [np.atleast_2d(x) for x in to_concat]
return np.concatenate(to_concat, axis=axis)

def copy(self):
values = self.asi8.copy()
return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq)
Expand Down
114 changes: 6 additions & 108 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@

import numpy as np

from pandas._libs import tslib, tslibs

from pandas.core.dtypes.common import (
DT64NS_DTYPE,
TD64NS_DTYPE,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_dtype,
Expand All @@ -19,13 +15,7 @@
is_sparse,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCCategoricalIndex,
ABCDatetimeArray,
ABCIndexClass,
ABCRangeIndex,
ABCSeries,
)
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries


def get_dtype_kinds(l):
Expand Down Expand Up @@ -149,31 +139,6 @@ def is_nonempty(x) -> bool:
return np.concatenate(to_concat, axis=axis)


def concat_categorical(to_concat, axis: int = 0):
"""
Concatenate an object/categorical array of arrays, each of which is a
single dtype

Parameters
----------
to_concat : array of arrays
axis : int
Axis to provide concatenation in the current implementation this is
always 0, e.g. we only have 1D categoricals

Returns
-------
Categorical
A single array, preserving the combined dtypes
"""
# we could have object blocks and categoricals here
# if we only have a single categoricals then combine everything
# else its a non-compat categorical
from pandas import Categorical

return Categorical._concat_arrays(to_concat, axis=axis)


def union_categoricals(
to_union, sort_categories: bool = False, ignore_order: bool = False
):
Expand Down Expand Up @@ -330,13 +295,6 @@ def _maybe_unwrap(x):
return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)


def _concatenate_2d(to_concat, axis: int):
# coerce to 2d if needed & concatenate
if axis == 1:
to_concat = [np.atleast_2d(x) for x in to_concat]
return np.concatenate(to_concat, axis=axis)


def concat_datetime(to_concat, axis=0, typs=None):
"""
provide concatenation of an datetimelike array of arrays each of which is a
Expand All @@ -352,70 +310,10 @@ def concat_datetime(to_concat, axis=0, typs=None):
-------
a single array, preserving the combined dtypes
"""
if typs is None:
typs = get_dtype_kinds(to_concat)

# multiple types, need to coerce to object
if len(typs) != 1:
return _concatenate_2d(
[_convert_datetimelike_to_object(x) for x in to_concat], axis=axis
)

# must be single dtype
if any(typ.startswith("datetime") for typ in typs):

if "datetime" in typs:
to_concat = [x.astype(np.int64, copy=False) for x in to_concat]
return _concatenate_2d(to_concat, axis=axis).view(DT64NS_DTYPE)
else:
# when to_concat has different tz, len(typs) > 1.
# thus no need to care
return _concat_datetimetz(to_concat)

elif "timedelta" in typs:
return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view(
TD64NS_DTYPE
)

elif any(typ.startswith("period") for typ in typs):
assert len(typs) == 1
cls = to_concat[0]
new_values = cls._concat_same_type(to_concat)
return new_values
from pandas.core.arrays import datetimelike as dtl
from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array

to_concat = [maybe_upcast_datetimelike_array(x) for x in to_concat]

def _convert_datetimelike_to_object(x):
# coerce datetimelike array to object dtype

# if dtype is of datetimetz or timezone
if x.dtype.kind == DT64NS_DTYPE.kind:
if getattr(x, "tz", None) is not None:
x = np.asarray(x.astype(object))
else:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp")
x = x.reshape(shape)

elif x.dtype == TD64NS_DTYPE:
shape = x.shape
x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True)
x = x.reshape(shape)

return x


def _concat_datetimetz(to_concat, name=None):
"""
concat DatetimeIndex with the same tz
all inputs must be DatetimeIndex
it is used in DatetimeIndex.append also
"""
# Right now, internals will pass a List[DatetimeArray] here
# for reductions like quantile. I would like to disentangle
# all this before we get here.
sample = to_concat[0]

if isinstance(sample, ABCIndexClass):
return sample._concat_same_dtype(to_concat, name=name)
elif isinstance(sample, ABCDatetimeArray):
return sample._concat_same_type(to_concat)
obj = [x for x in to_concat if isinstance(x, dtl.DatetimeLikeArrayMixin)][0]
return type(obj)._concat_arrays(to_concat, axis=axis)
8 changes: 4 additions & 4 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,8 +778,8 @@ def _fast_union(self, other, sort=None):
left, right = self, other
left_start = left[0]
loc = right.searchsorted(left_start, side="left")
right_chunk = right.values[:loc]
dates = concat_compat((left.values, right_chunk))
right_chunk = right._values[:loc]
dates = concat_compat([left._values, right_chunk])
result = self._shallow_copy(dates)
result._set_freq("infer")
# TODO: can we infer that it has self.freq?
Expand All @@ -793,8 +793,8 @@ def _fast_union(self, other, sort=None):
# concatenate
if left_end < right_end:
loc = right.searchsorted(left_end, side="right")
right_chunk = right.values[loc:]
dates = concat_compat((left.values, right_chunk))
right_chunk = right._values[loc:]
dates = concat_compat([left._values, right_chunk])
result = self._shallow_copy(dates)
result._set_freq("infer")
# TODO: can we infer that it has self.freq?
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
is_timedelta64_dtype,
pandas_dtype,
)
from pandas.core.dtypes.concat import concat_categorical, concat_datetime
from pandas.core.dtypes.concat import concat_datetime
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand Down Expand Up @@ -2642,7 +2642,7 @@ class CategoricalBlock(ExtensionBlock):
is_categorical = True
_verify_integrity = True
_can_hold_na = True
_concatenator = staticmethod(concat_categorical)
_concatenator = staticmethod(Categorical._concat_arrays)

should_store = Block.should_store

Expand Down