kswarj · kswarj · Jun 22, 2022 · Jun 22, 2022 · Jun 22, 2022
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -271,6 +271,9 @@ the given ``dayfirst`` value when the value is a delimited date string (e.g.
 Ignoring dtypes in concat with empty or all-NA columns
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+.. note::
+    This behaviour change has been reverted in pandas 1.4.3.
+
 When using :func:`concat` to concatenate two or more :class:`DataFrame` objects,
 if one of the DataFrames was empty or had all-NA values, its dtype was
 *sometimes* ignored when finding the concatenated dtype.  These are now
@@ -301,9 +304,15 @@ object, the ``np.nan`` is retained.
 
 *New behavior*:
 
-.. ipython:: python
+.. code-block:: ipython
+
+    In [4]: res
+    Out[4]:
+                       bar
+    0  2013-01-01 00:00:00
+    1                  NaN
+
 
-    res
 
 .. _whatsnew_140.notable_bug_fixes.value_counts_and_mode_do_not_coerce_to_nan:
 

diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst
@@ -10,6 +10,17 @@ including other versions of pandas.
 
 .. ---------------------------------------------------------------------------
 
+.. _whatsnew_143.concat:
+
+Behaviour of ``concat`` with empty or all-NA DataFrame columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The behaviour change in version 1.4.0 to stop ignoring the data type
+of empty or all-NA columns with float or object dtype in :func:`concat`
+(:ref:`whatsnew_140.notable_bug_fixes.concat_with_empty_or_all_na`) has been
+reverted (:issue:`45637`).
+
+
 .. _whatsnew_143.regressions:
 
 Fixed regressions

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -31,7 +31,6 @@
 )
 from pandas.core.dtypes.missing import isna
 
-from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.indexers import (
     check_array_indexer,
@@ -46,22 +45,13 @@
     from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning
     from pandas.core.arrays.arrow.dtype import ArrowDtype
 
-    ARROW_CMP_FUNCS = {
-        "eq": pc.equal,
-        "ne": pc.not_equal,
-        "lt": pc.less,
-        "gt": pc.greater,
-        "le": pc.less_equal,
-        "ge": pc.greater_equal,
-    }
-
 if TYPE_CHECKING:
     from pandas import Series
 
 ArrowExtensionArrayT = TypeVar("ArrowExtensionArrayT", bound="ArrowExtensionArray")
 
 
-class ArrowExtensionArray(OpsMixin, ExtensionArray):
+class ArrowExtensionArray(ExtensionArray):
     """
     Base class for ExtensionArray backed by Arrow ChunkedArray.
     """
@@ -189,34 +179,6 @@ def __arrow_array__(self, type=None):
         """Convert myself to a pyarrow ChunkedArray."""
         return self._data
 
-    def _cmp_method(self, other, op):
-        from pandas.arrays import BooleanArray
-
-        pc_func = ARROW_CMP_FUNCS[op.__name__]
-        if isinstance(other, ArrowExtensionArray):
-            result = pc_func(self._data, other._data)
-        elif isinstance(other, (np.ndarray, list)):
-            result = pc_func(self._data, other)
-        elif is_scalar(other):
-            try:
-                result = pc_func(self._data, pa.scalar(other))
-            except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
-                mask = isna(self) | isna(other)
-                valid = ~mask
-                result = np.zeros(len(self), dtype="bool")
-                result[valid] = op(np.array(self)[valid], other)
-                return BooleanArray(result, mask)
-        else:
-            return NotImplementedError(
-                f"{op.__name__} not implemented for {type(other)}"
-            )
-
-        if pa_version_under2p0:
-            result = result.to_pandas().values
-        else:
-            result = result.to_numpy()
-        return BooleanArray._from_sequence(result)
-
     def equals(self, other) -> bool:
         if not isinstance(other, ArrowExtensionArray):
             return False
@@ -619,7 +581,7 @@ def _replace_with_indices(
             # fast path for a contiguous set of indices
             arrays = [
                 chunk[:start],
-                pa.array(value, type=chunk.type, from_pandas=True),
+                pa.array(value, type=chunk.type),
                 chunk[stop + 1 :],
             ]
             arrays = [arr for arr in arrays if len(arr)]

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -34,6 +34,7 @@
 )
 from pandas.core.dtypes.missing import isna
 
+from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays.arrow import ArrowExtensionArray
 from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.arrays.integer import Int64Dtype
@@ -50,6 +51,15 @@
 
     from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning
 
+    ARROW_CMP_FUNCS = {
+        "eq": pc.equal,
+        "ne": pc.not_equal,
+        "lt": pc.less,
+        "gt": pc.greater,
+        "le": pc.less_equal,
+        "ge": pc.greater_equal,
+    }
+
 ArrowStringScalarOrNAT = Union[str, libmissing.NAType]
 
 
@@ -64,7 +74,9 @@ def _chk_pyarrow_available() -> None:
 # fallback for the ones that pyarrow doesn't yet support
 
 
-class ArrowStringArray(ArrowExtensionArray, BaseStringArray, ObjectStringArrayMixin):
+class ArrowStringArray(
+    OpsMixin, ArrowExtensionArray, BaseStringArray, ObjectStringArrayMixin
+):
     """
     Extension array for string data in a ``pyarrow.ChunkedArray``.
 
@@ -178,6 +190,32 @@ def to_numpy(
             result[mask] = na_value
         return result
 
+    def _cmp_method(self, other, op):
+        from pandas.arrays import BooleanArray
+
+        pc_func = ARROW_CMP_FUNCS[op.__name__]
+        if isinstance(other, ArrowStringArray):
+            result = pc_func(self._data, other._data)
+        elif isinstance(other, (np.ndarray, list)):
+            result = pc_func(self._data, other)
+        elif is_scalar(other):
+            try:
+                result = pc_func(self._data, pa.scalar(other))
+            except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
+                mask = isna(self) | isna(other)
+                valid = ~mask
+                result = np.zeros(len(self), dtype="bool")
+                result[valid] = op(np.array(self)[valid], other)
+                return BooleanArray(result, mask)
+        else:
+            return NotImplemented
+
+        if pa_version_under2p0:
+            result = result.to_pandas().values
+        else:
+            result = result.to_numpy()
+        return BooleanArray._from_sequence(result)
+
     def insert(self, loc: int, item):
         if not isinstance(item, str) and item is not libmissing.NA:
             raise TypeError("Scalar must be NA or str")

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -18,6 +18,7 @@
 import pandas._libs.missing as libmissing
 from pandas._libs.tslibs import (
     NaT,
+    Period,
     iNaT,
 )
 
@@ -739,3 +740,40 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool:
 
     # fallback, default to allowing NaN, None, NA, NaT
     return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal))
+
+
+def isna_all(arr: ArrayLike) -> bool:
+    """
+    Optimized equivalent to isna(arr).all()
+    """
+    total_len = len(arr)
+
+    # Usually it's enough to check but a small fraction of values to see if
+    #  a block is NOT null, chunks should help in such cases.
+    #  parameters 1000 and 40 were chosen arbitrarily
+    chunk_len = max(total_len // 40, 1000)
+
+    dtype = arr.dtype
+    if dtype.kind == "f":
+        checker = nan_checker
+
+    elif dtype.kind in ["m", "M"] or dtype.type is Period:
+        # error: Incompatible types in assignment (expression has type
+        # "Callable[[Any], Any]", variable has type "ufunc")
+        checker = lambda x: np.asarray(x.view("i8")) == iNaT  # type: ignore[assignment]
+
+    else:
+        # error: Incompatible types in assignment (expression has type "Callable[[Any],
+        # Any]", variable has type "ufunc")
+        checker = lambda x: _isna_array(  # type: ignore[assignment]
+            x, inf_as_na=INF_AS_NA
+        )
+
+    return all(
+        # error: Argument 1 to "__call__" of "ufunc" has incompatible type
+        # "Union[ExtensionArray, Any]"; expected "Union[Union[int, float, complex, str,
+        # bytes, generic], Sequence[Union[int, float, complex, str, bytes, generic]],
+        # Sequence[Sequence[Any]], _SupportsArray]"
+        checker(arr[i : i + chunk_len]).all()  # type: ignore[arg-type]
+        for i in range(0, total_len, chunk_len)
+    )