REF: implement _concat_arrays

pandas-dev · jbrockmendel · Apr 13, 2020 · Apr 13, 2020 · Apr 13, 2020 · Apr 13, 2020
commit 490c3335e9da5647a7286814957cf6d2b09e3d49
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -24,6 +24,7 @@
     is_datetime64tz_dtype,
     is_datetime_or_timedelta_dtype,
     is_dtype_equal,
+    is_extension_array_dtype,
     is_float_dtype,
     is_integer_dtype,
     is_list_like,
@@ -723,7 +724,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
         return type(self)(new_values, dtype=self.dtype)
 
     @classmethod
-    def _concat_same_type(cls, to_concat):
+    def _concat_same_type(cls, to_concat, axis: int = 0):
 
         # do not pass tz to set because tzlocal cannot be hashed
         dtypes = {str(x.dtype) for x in to_concat}
@@ -733,7 +734,7 @@ def _concat_same_type(cls, to_concat):
         obj = to_concat[0]
         dtype = obj.dtype
 
-        values = np.concatenate([x.asi8 for x in to_concat])
+        values = np.concatenate([x.asi8 for x in to_concat], axis=axis)
 
         if is_period_dtype(to_concat[0].dtype):
             new_freq = obj.freq
@@ -750,6 +751,30 @@ def _concat_same_type(cls, to_concat):
 
         return cls._simple_new(values, dtype=dtype, freq=new_freq)
 
+    @classmethod
+    def _concat_arrays(cls, to_concat, axis: int = 0):
+        from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array
+
+        to_concat = [maybe_upcast_datetimelike_array(x) for x in to_concat]
+
+        if len({x.dtype for x in to_concat}) == 1:
+            if axis == 1 and is_extension_array_dtype(to_concat[0].dtype):
+                # TODO(EA2D): not necessary with 2D EAs
+                axis = 0
+
+            result = cls._concat_same_type(to_concat, axis=axis)
+
+            if axis == 1 and result.ndim == 1:
+                # TODO(EA2D): not necessary with 2D EAs
+                result = result.reshape(1, -1)
+            return result
+
+        to_concat = [x.astype(object) for x in to_concat]
+        if axis == 1:
+            # TODO(EA2D): not necessary with 2D EAs
+            to_concat = [np.atleast_2d(x) for x in to_concat]
+        return np.concatenate(to_concat, axis=axis)
+
     def copy(self):
         values = self.asi8.copy()
         return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -4,11 +4,7 @@
 
 import numpy as np
 
-from pandas._libs import tslib, tslibs
-
 from pandas.core.dtypes.common import (
-    DT64NS_DTYPE,
-    TD64NS_DTYPE,
     is_bool_dtype,
     is_categorical_dtype,
     is_datetime64_dtype,
@@ -19,13 +15,7 @@
     is_sparse,
     is_timedelta64_dtype,
 )
-from pandas.core.dtypes.generic import (
-    ABCCategoricalIndex,
-    ABCDatetimeArray,
-    ABCIndexClass,
-    ABCRangeIndex,
-    ABCSeries,
-)
+from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries
 
 
 def get_dtype_kinds(l):
@@ -149,31 +139,6 @@ def is_nonempty(x) -> bool:
     return np.concatenate(to_concat, axis=axis)
 
 
-def concat_categorical(to_concat, axis: int = 0):
-    """
-    Concatenate an object/categorical array of arrays, each of which is a
-    single dtype
-
-    Parameters
-    ----------
-    to_concat : array of arrays
-    axis : int
-        Axis to provide concatenation in the current implementation this is
-        always 0, e.g. we only have 1D categoricals
-
-    Returns
-    -------
-    Categorical
-        A single array, preserving the combined dtypes
-    """
-    # we could have object blocks and categoricals here
-    # if we only have a single categoricals then combine everything
-    # else its a non-compat categorical
-    from pandas import Categorical
-
-    return Categorical._concat_arrays(to_concat, axis=axis)
-
-
 def union_categoricals(
     to_union, sort_categories: bool = False, ignore_order: bool = False
 ):
@@ -330,13 +295,6 @@ def _maybe_unwrap(x):
     return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
 
 
-def _concatenate_2d(to_concat, axis: int):
-    # coerce to 2d if needed & concatenate
-    if axis == 1:
-        to_concat = [np.atleast_2d(x) for x in to_concat]
-    return np.concatenate(to_concat, axis=axis)
-
-
 def concat_datetime(to_concat, axis=0, typs=None):
     """
     provide concatenation of an datetimelike array of arrays each of which is a
@@ -352,70 +310,10 @@ def concat_datetime(to_concat, axis=0, typs=None):
     -------
     a single array, preserving the combined dtypes
     """
-    if typs is None:
-        typs = get_dtype_kinds(to_concat)
-
-    # multiple types, need to coerce to object
-    if len(typs) != 1:
-        return _concatenate_2d(
-            [_convert_datetimelike_to_object(x) for x in to_concat], axis=axis
-        )
-
-    # must be single dtype
-    if any(typ.startswith("datetime") for typ in typs):
-
-        if "datetime" in typs:
-            to_concat = [x.astype(np.int64, copy=False) for x in to_concat]
-            return _concatenate_2d(to_concat, axis=axis).view(DT64NS_DTYPE)
-        else:
-            # when to_concat has different tz, len(typs) > 1.
-            # thus no need to care
-            return _concat_datetimetz(to_concat)
-
-    elif "timedelta" in typs:
-        return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view(
-            TD64NS_DTYPE
-        )
-
-    elif any(typ.startswith("period") for typ in typs):
-        assert len(typs) == 1
-        cls = to_concat[0]
-        new_values = cls._concat_same_type(to_concat)
-        return new_values
+    from pandas.core.arrays import datetimelike as dtl
+    from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array
 
+    to_concat = [maybe_upcast_datetimelike_array(x) for x in to_concat]
 
-def _convert_datetimelike_to_object(x):
-    # coerce datetimelike array to object dtype
-
-    # if dtype is of datetimetz or timezone
-    if x.dtype.kind == DT64NS_DTYPE.kind:
-        if getattr(x, "tz", None) is not None:
-            x = np.asarray(x.astype(object))
-        else:
-            shape = x.shape
-            x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp")
-            x = x.reshape(shape)
-
-    elif x.dtype == TD64NS_DTYPE:
-        shape = x.shape
-        x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True)
-        x = x.reshape(shape)
-
-    return x
-
-
-def _concat_datetimetz(to_concat, name=None):
-    """
-    concat DatetimeIndex with the same tz
-    all inputs must be DatetimeIndex
-    it is used in DatetimeIndex.append also
-    """
-    # Right now, internals will pass a List[DatetimeArray] here
-    # for reductions like quantile. I would like to disentangle
-    # all this before we get here.
-    sample = to_concat[0]
-
-    if isinstance(sample, ABCIndexClass):
-        return sample._concat_same_dtype(to_concat, name=name)
-    elif isinstance(sample, ABCDatetimeArray):
-        return sample._concat_same_type(to_concat)
+    obj = [x for x in to_concat if isinstance(x, dtl.DatetimeLikeArrayMixin)][0]
+    return type(obj)._concat_arrays(to_concat, axis=axis)
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -778,8 +778,8 @@ def _fast_union(self, other, sort=None):
             left, right = self, other
             left_start = left[0]
             loc = right.searchsorted(left_start, side="left")
-            right_chunk = right.values[:loc]
-            dates = concat_compat((left.values, right_chunk))
+            right_chunk = right._values[:loc]
+            dates = concat_compat([left._values, right_chunk])
             result = self._shallow_copy(dates)
             result._set_freq("infer")
             # TODO: can we infer that it has self.freq?
@@ -793,8 +793,8 @@ def _fast_union(self, other, sort=None):
         # concatenate
         if left_end < right_end:
             loc = right.searchsorted(left_end, side="right")
-            right_chunk = right.values[loc:]
-            dates = concat_compat((left.values, right_chunk))
+            right_chunk = right._values[loc:]
+            dates = concat_compat([left._values, right_chunk])
             result = self._shallow_copy(dates)
             result._set_freq("infer")
             # TODO: can we infer that it has self.freq?

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -48,7 +48,7 @@
     is_timedelta64_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.concat import concat_categorical, concat_datetime
+from pandas.core.dtypes.concat import concat_datetime
 from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
@@ -2642,7 +2642,7 @@ class CategoricalBlock(ExtensionBlock):
     is_categorical = True
     _verify_integrity = True
     _can_hold_na = True
-    _concatenator = staticmethod(concat_categorical)
+    _concatenator = staticmethod(Categorical._concat_arrays)
 
     should_store = Block.should_store