REF: Simplify concat

_concat._concat_datetimetz -> DatetimeIndex._concat_same_dtype -> DatetimeArray._concat_same_type
pandas-dev · jreback · Nov 14, 2018 · Nov 12, 2018 · Nov 12, 2018 · Nov 12, 2018
commit eceebc768b40810b35dd40aa1fca785cbc2d9639
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -239,17 +239,12 @@ def take(self, indices, allow_fill=False, fill_value=None):
 
     @classmethod
     def _concat_same_type(cls, to_concat):
-        freqs = {x.freq for x in to_concat}
-        assert len(freqs) == 1
-        freq = list(freqs)[0]
-
-        # dtype captures tz for datetime64tz case
         dtypes = {x.dtype for x in to_concat}
         assert len(dtypes) == 1
         dtype = list(dtypes)[0]
 
         values = np.concatenate([x.asi8 for x in to_concat])
-        return cls(values, dtype=dtype, freq=freq)
+        return cls(values, dtype=dtype)
 
     def copy(self, deep=False):
         values = self.asi8
@@ -262,7 +257,7 @@ def _values_for_factorize(self):
 
     @classmethod
     def _from_factorized(cls, values, original):
-        return cls(values, dtype=original.dtype, freq=original.freq)
+        return cls(values, dtype=original.dtype)
 
     # ------------------------------------------------------------------
     # Null Handling

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -476,13 +476,7 @@ def _concat_datetimetz(to_concat, name=None):
     all inputs must be DatetimeIndex
     it is used in DatetimeIndex.append also
     """
-    # do not pass tz to set because tzlocal cannot be hashed
-    if len({str(x.dtype) for x in to_concat}) != 1:
-        raise ValueError('to_concat must have the same tz')
-    tz = to_concat[0].tz
-    # no need to localize because internal repr will not be changed
-    new_values = np.concatenate([x.asi8 for x in to_concat])
-    return to_concat[0]._simple_new(new_values, tz=tz, name=name)
+    return to_concat[0]._concat_same_dtype(to_concat, name=name)
 
 
 def _concat_index_same_dtype(indexes, klass=None):

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -18,7 +18,6 @@
     is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype,
     is_integer, is_integer_dtype, is_list_like, is_object_dtype,
     is_period_dtype, is_scalar, is_string_dtype)
-import pandas.core.dtypes.concat as _concat
 from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
 
@@ -690,17 +689,21 @@ def _concat_same_dtype(self, to_concat, name):
         """
         attribs = self._get_attributes_dict()
         attribs['name'] = name
+        # do not pass tz to set because tzlocal cannot be hashed
+        if len({str(x.dtype) for x in to_concat}) != 1:
+            raise ValueError('to_concat must have the same tz')
 
         if not is_period_dtype(self):
             # reset freq
             attribs['freq'] = None
-
-        if getattr(self, 'tz', None) is not None:
-            return _concat._concat_datetimetz(to_concat, name)
+            # TODO(DatetimeArray)
+            # - remove the .asi8 here
+            # - remove the _maybe_box_as_values
+            # - combine with the `else` block
+            new_data = self._concat_same_type(to_concat).asi8
         else:
-            new_data = np.concatenate([c.asi8 for c in to_concat])
+            new_data = type(self._values)._concat_same_type(to_concat)
 
-        new_data = self._maybe_box_as_values(new_data, **attribs)
         return self._simple_new(new_data, **attribs)
 
     def _maybe_box_as_values(self, values, **attribs):
@@ -709,7 +712,6 @@ def _maybe_box_as_values(self, values, **attribs):
         # but others are not. When everyone is an ExtensionArray, this can
         # be removed. Currently used in
         # - sort_values
-        # - _concat_same_dtype
         return values
 
     def astype(self, dtype, copy=True):

diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
@@ -271,6 +271,20 @@ def test_concat_same_type_invalid(self, datetime_index):
         with pytest.raises(AssertionError):
             arr._concat_same_type([arr, other])
 
+    def test_concat_same_type_different_freq(self):
+        # we *can* concatentate DTI with different freqs.
+        a = DatetimeArray(pd.date_range('2000', periods=2, freq='D',
+                                        tz='US/Central'))
+        b = DatetimeArray(pd.date_range('2000', periods=2, freq='H',
+                                        tz='US/Central'))
+        result = DatetimeArray._concat_same_type([a, b])
+        expected = DatetimeArray(pd.to_datetime([
+            '2000-01-01 00:00:00', '2000-01-02 00:00:00',
+            '2000-01-01 00:00:00', '2000-01-01 01:00:00',
+        ]).tz_localize("US/Central"))
+
+        tm.assert_datetime_array_equal(result, expected)
+
 
 class TestTimedeltaArray(SharedTests):
     index_cls = pd.TimedeltaIndex
@@ -339,18 +353,6 @@ def test_take_fill_valid(self, timedelta_index):
             # fill_value Period invalid
             arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D'))
 
-    def test_concat_same_type_invalid(self, timedelta_index):
-        # different freqs
-        tdi = timedelta_index
-        arr = TimedeltaArray(tdi)
-        other = pd.timedelta_range('1D', periods=5, freq='2D')
-        # FIXME: TimedeltaArray should inherit freq='2D' without specifying it
-        other = TimedeltaArray(other, freq='2D')
-        assert other.freq != arr.freq
-
-        with pytest.raises(AssertionError):
-            arr._concat_same_type([arr, other])
-
 
 class TestPeriodArray(SharedTests):
     index_cls = pd.PeriodIndex