From 844dc4a4fb8d213303085709aa4a3649400ed51a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 30 Dec 2019 11:55:17 -0600
Subject: [PATCH] API: Uses pd.NA in IntegerArray (#29964)

---
 doc/source/user_guide/integer_na.rst   |  28 +++++
 doc/source/whatsnew/v1.0.0.rst         |  58 ++++++++++
 pandas/core/arrays/boolean.py          |   6 +-
 pandas/core/arrays/integer.py          | 116 +++++++++++++------
 pandas/tests/arrays/test_integer.py    | 149 ++++++++++++++++++-------
 pandas/tests/base/test_conversion.py   |   2 +-
 pandas/tests/extension/test_integer.py |  27 +++--
 7 files changed, 298 insertions(+), 88 deletions(-)
diff --git a/doc/source/user_guide/integer_na.rst b/doc/source/user_guide/integer_na.rst
index 77568f3bcb244..a45d7a4fa1547 100644
--- a/doc/source/user_guide/integer_na.rst
+++ b/doc/source/user_guide/integer_na.rst
@@ -15,6 +15,10 @@ Nullable integer data type
    IntegerArray is currently experimental. Its API or implementation may
    change without warning.
 
+.. versionchanged:: 1.0.0
+
+   Now uses :attr:`pandas.NA` as the missing value rather
+   than :attr:`numpy.nan`.
 
 In :ref:`missing_data`, we saw that pandas primarily uses ``NaN`` to represent
 missing data. Because ``NaN`` is a float, this forces an array of integers with
@@ -23,6 +27,9 @@ much. But if your integer column is, say, an identifier, casting to float can
 be problematic. Some integers cannot even be represented as floating point
 numbers.
 
+Construction
+------------
+
 Pandas can represent integer data with possibly missing values using
 :class:`arrays.IntegerArray`. This is an :ref:`extension types <extending.extension-types>`
 implemented within pandas.
@@ -39,6 +46,12 @@ NumPy's ``'int64'`` dtype:
 
    pd.array([1, 2, np.nan], dtype="Int64")
 
+All NA-like values are replaced with :attr:`pandas.NA`.
+
+.. ipython:: python
+
+   pd.array([1, 2, np.nan, None, pd.NA], dtype="Int64")
+
 This array can be stored in a :class:`DataFrame` or :class:`Series` like any
 NumPy array.
 
@@ -78,6 +91,9 @@ with the dtype.
    In the future, we may provide an option for :class:`Series` to infer a
    nullable-integer dtype.
 
+Operations
+----------
+
 Operations involving an integer array will behave similar to NumPy arrays.
 Missing values will be propagated, and the data will be coerced to another
 dtype if needed.
@@ -123,3 +139,15 @@ Reduction and groupby operations such as 'sum' work as well.
 
    df.sum()
    df.groupby('B').A.sum()
+
+Scalar NA Value
+---------------
+
+:class:`arrays.IntegerArray` uses :attr:`pandas.NA` as its scalar
+missing value. Slicing a single element that's missing will return
+:attr:`pandas.NA`
+
+.. ipython:: python
+
+   a = pd.array([1, None], dtype="Int64")
+   a[1]
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index a6ba7770dadcc..8755abe642068 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -365,6 +365,64 @@ The following methods now also correctly output values for unobserved categories
 
 As a reminder, you can specify the ``dtype`` to disable all inference.
 
+:class:`arrays.IntegerArray` now uses :attr:`pandas.NA`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` rather than
+:attr:`numpy.nan` as its missing value marker (:issue:`29964`).
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+   >>> a = pd.array([1, 2, None], dtype="Int64")
+   >>> a
+   <IntegerArray>
+   [1, 2, NaN]
+   Length: 3, dtype: Int64
+
+   >>> a[2]
+   nan
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+   a = pd.array([1, 2, None], dtype="Int64")
+   a[2]
+
+See :ref:`missing_data.NA` for more on the differences between :attr:`pandas.NA`
+and :attr:`numpy.nan`.
+
+:class:`arrays.IntegerArray` comparisons return :class:`arrays.BooleanArray`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Comparison operations on a :class:`arrays.IntegerArray` now returns a
+:class:`arrays.BooleanArray` rather than a NumPy array (:issue:`29964`).
+
+*pandas 0.25.x*
+
+.. code-block:: python
+
+   >>> a = pd.array([1, 2, None], dtype="Int64")
+   >>> a
+   <IntegerArray>
+   [1, 2, NaN]
+   Length: 3, dtype: Int64
+
+   >>> a > 1
+   array([False,  True, False])
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+   a = pd.array([1, 2, None], dtype="Int64")
+   a > 1
+
+Note that missing values now propagate, rather than always comparing unequal
+like :attr:`numpy.nan`. See :ref:`missing_data.NA` for more.
+
 By default :meth:`Categorical.min` now returns the minimum instead of np.nan
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 600165ad9ac13..7301c0ab434a0 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -730,7 +730,6 @@ def all(self, skipna: bool = True, **kwargs):
     @classmethod
     def _create_logical_method(cls, op):
         def logical_method(self, other):
-
             if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
                 # Rely on pandas to unbox and dispatch to us.
                 return NotImplemented
@@ -777,8 +776,11 @@ def logical_method(self, other):
     @classmethod
     def _create_comparison_method(cls, op):
         def cmp_method(self, other):
+            from pandas.arrays import IntegerArray
 
-            if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
+            if isinstance(
+                other, (ABCDataFrame, ABCSeries, ABCIndexClass, IntegerArray)
+            ):
                 # Rely on pandas to unbox and dispatch to us.
                 return NotImplemented
 
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index ee8b2c3bb723f..62f31addedc0b 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -1,10 +1,10 @@
 import numbers
-from typing import Type
+from typing import Any, Tuple, Type
 import warnings
 
 import numpy as np
 
-from pandas._libs import lib
+from pandas._libs import lib, missing as libmissing
 from pandas.compat import set_function_name
 from pandas.util._decorators import cache_readonly
 
@@ -44,7 +44,7 @@ class _IntegerDtype(ExtensionDtype):
     name: str
     base = None
     type: Type
-    na_value = np.nan
+    na_value = libmissing.NA
 
     def __repr__(self) -> str:
         sign = "U" if self.is_unsigned_integer else ""
@@ -263,6 +263,11 @@ class IntegerArray(ExtensionArray, ExtensionOpsMixin):
 
     .. versionadded:: 0.24.0
 
+    .. versionchanged:: 1.0.0
+
+       Now uses :attr:`pandas.NA` as the missing value rather
+       than :attr:`numpy.nan`.
+
     .. warning::
 
        IntegerArray is currently experimental, and its API or internal
@@ -358,14 +363,6 @@ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
     def _from_factorized(cls, values, original):
         return integer_array(values, dtype=original.dtype)
 
-    def _formatter(self, boxed=False):
-        def fmt(x):
-            if isna(x):
-                return "NaN"
-            return str(x)
-
-        return fmt
-
     def __getitem__(self, item):
         if is_integer(item):
             if self._mask[item]:
@@ -373,14 +370,30 @@ def __getitem__(self, item):
             return self._data[item]
         return type(self)(self._data[item], self._mask[item])
 
-    def _coerce_to_ndarray(self):
+    def _coerce_to_ndarray(self, dtype=None, na_value=lib._no_default):
         """
         coerce to an ndarary of object dtype
         """
+        if dtype is None:
+            dtype = object
+
+        if na_value is lib._no_default and is_float_dtype(dtype):
+            na_value = np.nan
+        elif na_value is lib._no_default:
+            na_value = libmissing.NA
+
+        if is_integer_dtype(dtype):
+            # Specifically, a NumPy integer dtype, not a pandas integer dtype,
+            # since we're coercing to a numpy dtype by definition in this function.
+            if not self.isna().any():
+                return self._data.astype(dtype)
+            else:
+                raise ValueError(
+                    "cannot convert to integer NumPy array with missing values"
+                )
 
-        # TODO(jreback) make this better
-        data = self._data.astype(object)
-        data[self._mask] = self._na_value
+        data = self._data.astype(dtype)
+        data[self._mask] = na_value
         return data
 
     __array_priority__ = 1000  # higher than ndarray so ops dispatch to us
@@ -390,7 +403,7 @@ def __array__(self, dtype=None):
         the array interface, return my values
         We return an object array here to preserve our scalar values
         """
-        return self._coerce_to_ndarray()
+        return self._coerce_to_ndarray(dtype=dtype)
 
     def __arrow_array__(self, type=None):
         """
@@ -506,7 +519,7 @@ def isna(self):
 
     @property
     def _na_value(self):
-        return np.nan
+        return self.dtype.na_value
 
     @classmethod
     def _concat_same_type(cls, to_concat):
@@ -545,7 +558,7 @@ def astype(self, dtype, copy=True):
             return type(self)(result, mask=self._mask, copy=False)
 
         # coerce
-        data = self._coerce_to_ndarray()
+        data = self._coerce_to_ndarray(dtype=dtype)
         return astype_nansafe(data, dtype, copy=False)
 
     @property
@@ -600,12 +613,19 @@ def value_counts(self, dropna=True):
             # w/o passing the dtype
             array = np.append(array, [self._mask.sum()])
             index = Index(
-                np.concatenate([index.values, np.array([np.nan], dtype=object)]),
+                np.concatenate(
+                    [index.values, np.array([self.dtype.na_value], dtype=object)]
+                ),
                 dtype=object,
             )
 
         return Series(array, index=index)
 
+    def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
+        # TODO: https://github.com/pandas-dev/pandas/issues/30037
+        # use masked algorithms, rather than object-dtype / np.nan.
+        return self._coerce_to_ndarray(na_value=np.nan), np.nan
+
     def _values_for_argsort(self) -> np.ndarray:
         """Return values for sorting.
 
@@ -629,9 +649,11 @@ def _create_comparison_method(cls, op):
 
         @unpack_zerodim_and_defer(op.__name__)
         def cmp_method(self, other):
+            from pandas.arrays import BooleanArray
+
             mask = None
 
-            if isinstance(other, IntegerArray):
+            if isinstance(other, (BooleanArray, IntegerArray)):
                 other, mask = other._data, other._mask
 
             elif is_list_like(other):
@@ -643,25 +665,35 @@ def cmp_method(self, other):
                 if len(self) != len(other):
                     raise ValueError("Lengths must match to compare")
 
-            # numpy will show a DeprecationWarning on invalid elementwise
-            # comparisons, this will raise in the future
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", "elementwise", FutureWarning)
-                with np.errstate(all="ignore"):
-                    method = getattr(self._data, f"__{op_name}__")
-                    result = method(other)
+            if other is libmissing.NA:
+                # numpy does not handle pd.NA well as "other" scalar (it returns
+                # a scalar False instead of an array)
+                # This may be fixed by NA.__array_ufunc__. Revisit this check
+                # once that's implemented.
+                result = np.zeros(self._data.shape, dtype="bool")
+                mask = np.ones(self._data.shape, dtype="bool")
+            else:
+                with warnings.catch_warnings():
+                    # numpy may show a FutureWarning:
+                    #     elementwise comparison failed; returning scalar instead,
+                    #     but in the future will perform elementwise comparison
+                    # before returning NotImplemented. We fall back to the correct
+                    # behavior today, so that should be fine to ignore.
+                    warnings.filterwarnings("ignore", "elementwise", FutureWarning)
+                    with np.errstate(all="ignore"):
+                        method = getattr(self._data, f"__{op_name}__")
+                        result = method(other)
 
                     if result is NotImplemented:
                         result = invalid_comparison(self._data, other, op)
 
             # nans propagate
             if mask is None:
-                mask = self._mask
+                mask = self._mask.copy()
             else:
                 mask = self._mask | mask
 
-            result[mask] = op_name == "ne"
-            return result
+            return BooleanArray(result, mask)
 
         name = f"__{op.__name__}__"
         return set_function_name(cmp_method, name, cls)
@@ -673,7 +705,8 @@ def _reduce(self, name, skipna=True, **kwargs):
         # coerce to a nan-aware float if needed
         if mask.any():
             data = self._data.astype("float64")
-            data[mask] = self._na_value
+            # We explicitly use NaN within reductions.
+            data[mask] = np.nan
 
         op = getattr(nanops, "nan" + name)
         result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
@@ -739,12 +772,13 @@ def integer_arithmetic_method(self, other):
                     raise TypeError("can only perform ops with numeric values")
 
             else:
-                if not (is_float(other) or is_integer(other)):
+                if not (is_float(other) or is_integer(other) or other is libmissing.NA):
                     raise TypeError("can only perform ops with numeric values")
 
-            # nans propagate
             if omask is None:
                 mask = self._mask.copy()
+                if other is libmissing.NA:
+                    mask |= True
             else:
                 mask = self._mask | omask
 
@@ -754,20 +788,23 @@ def integer_arithmetic_method(self, other):
                 # x ** 0 is 1.
                 if omask is not None:
                     mask = np.where((other == 0) & ~omask, False, mask)
-                else:
+                elif other is not libmissing.NA:
                     mask = np.where(other == 0, False, mask)
 
             elif op_name == "rpow":
                 # 1 ** x is 1.
                 if omask is not None:
                     mask = np.where((other == 1) & ~omask, False, mask)
-                else:
+                elif other is not libmissing.NA:
                     mask = np.where(other == 1, False, mask)
                 # x ** 0 is 1.
                 mask = np.where((self._data == 0) & ~self._mask, False, mask)
 
-            with np.errstate(all="ignore"):
-                result = op(self._data, other)
+            if other is libmissing.NA:
+                result = np.ones_like(self._data)
+            else:
+                with np.errstate(all="ignore"):
+                    result = op(self._data, other)
 
             # divmod returns a tuple
             if op_name == "divmod":
@@ -790,6 +827,11 @@ def integer_arithmetic_method(self, other):
 _dtype_docstring = """
 An ExtensionDtype for {dtype} integer data.
 
+.. versionchanged:: 1.0.0
+
+   Now uses :attr:`pandas.NA` as its missing value,
+   rather than :attr:`numpy.nan`.
+
 Attributes
 ----------
 None
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index e534c93c69f68..f9b002d4409ce 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -90,7 +90,7 @@ def test_repr_dtype(dtype, expected):
 
 def test_repr_array():
     result = repr(integer_array([1, None, 3]))
-    expected = "<IntegerArray>\n[1, NaN, 3]\nLength: 3, dtype: Int64"
+    expected = "<IntegerArray>\n[1, NA, 3]\nLength: 3, dtype: Int64"
     assert result == expected
 
 
@@ -98,9 +98,9 @@ def test_repr_array_long():
     data = integer_array([1, 2, None] * 1000)
     expected = (
         "<IntegerArray>\n"
-        "[  1,   2, NaN,   1,   2, NaN,   1,   2, NaN,   1,\n"
+        "[ 1,  2, NA,  1,  2, NA,  1,  2, NA,  1,\n"
         " ...\n"
-        " NaN,   1,   2, NaN,   1,   2, NaN,   1,   2, NaN]\n"
+        " NA,  1,  2, NA,  1,  2, NA,  1,  2, NA]\n"
         "Length: 3000, dtype: Int64"
     )
     result = repr(data)
@@ -108,13 +108,17 @@ def test_repr_array_long():
 
 
 class TestConstructors:
+    def test_uses_pandas_na(self):
+        a = pd.array([1, None], dtype=pd.Int64Dtype())
+        assert a[1] is pd.NA
+
     def test_from_dtype_from_float(self, data):
         # construct from our dtype & string dtype
         dtype = data.dtype
 
         # from float
         expected = pd.Series(data)
-        result = pd.Series(np.array(data).astype("float"), dtype=str(dtype))
+        result = pd.Series(np.array(data, dtype="float"), dtype=str(dtype))
         tm.assert_series_equal(result, expected)
 
         # from int / list
@@ -156,10 +160,13 @@ def _check_op(self, s, op_name, other, exc=None):
 
         # 1 ** na is na, so need to unmask those
         if op_name == "__pow__":
-            mask = np.where(s == 1, False, mask)
+            mask = np.where(~s.isna() & (s == 1), False, mask)
 
         elif op_name == "__rpow__":
-            mask = np.where(other == 1, False, mask)
+            other_is_one = other == 1
+            if isinstance(other_is_one, pd.Series):
+                other_is_one = other_is_one.fillna(False)
+            mask = np.where(other_is_one, False, mask)
 
         # float result type or float op
         if (
@@ -208,20 +215,27 @@ def _check_op_integer(self, result, expected, mask, s, op_name, other):
                 else:
                     expected = expected.fillna(0)
             else:
-                expected[(s.values == 0) & ((expected == 0) | expected.isna())] = 0
+                expected[
+                    (s.values == 0).fillna(False)
+                    & ((expected == 0).fillna(False) | expected.isna())
+                ] = 0
         try:
-            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
+            expected[
+                ((expected == np.inf) | (expected == -np.inf)).fillna(False)
+            ] = fill_value
             original = expected
             expected = expected.astype(s.dtype)
 
         except ValueError:
 
             expected = expected.astype(float)
-            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
+            expected[
+                ((expected == np.inf) | (expected == -np.inf)).fillna(False)
+            ] = fill_value
             original = expected
             expected = expected.astype(s.dtype)
 
-        expected[mask] = np.nan
+        expected[mask] = pd.NA
 
         # assert that the expected astype is ok
         # (skip for unsigned as they have wrap around)
@@ -255,21 +269,18 @@ def test_arith_integer_array(self, data, all_arithmetic_operators):
     def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         # scalar
         op = all_arithmetic_operators
-
         s = pd.Series(data)
         self._check_op(s, op, 1, exc=TypeError)
 
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
         # frame & scalar
         op = all_arithmetic_operators
-
         df = pd.DataFrame({"A": data})
         self._check_op(df, op, 1, exc=TypeError)
 
     def test_arith_series_with_array(self, data, all_arithmetic_operators):
         # ndarray & other series
         op = all_arithmetic_operators
-
         s = pd.Series(data)
         other = np.ones(len(s), dtype=s.dtype.type)
         self._check_op(s, op, other, exc=TypeError)
@@ -359,9 +370,9 @@ def test_pow_scalar(self):
         expected = pd.array([0, 1, None, 2], dtype="Int64")
         tm.assert_extension_array_equal(result, expected)
 
-        # result = a ** pd.NA
-        # expected = pd.array([None, 1, None, None], dtype="Int64")
-        # tm.assert_extension_array_equal(result, expected)
+        result = a ** pd.NA
+        expected = pd.array([None, 1, None, None], dtype="Int64")
+        tm.assert_extension_array_equal(result, expected)
 
         result = a ** np.nan
         expected = np.array([np.nan, 1, np.nan, np.nan], dtype="float64")
@@ -376,9 +387,9 @@ def test_pow_scalar(self):
         expected = pd.array([1, 1, 1, 1], dtype="Int64")
         tm.assert_extension_array_equal(result, expected)
 
-        # result = pd.NA ** a
-        # expected = pd.array([1, None, None, None], dtype="Int64")
-        # tm.assert_extension_array_equal(result, expected)
+        result = pd.NA ** a
+        expected = pd.array([1, None, None, None], dtype="Int64")
+        tm.assert_extension_array_equal(result, expected)
 
         result = np.nan ** a
         expected = np.array([1, np.nan, np.nan, np.nan], dtype="float64")
@@ -406,10 +417,10 @@ def _compare_other(self, data, op_name, other):
 
         # array
         result = pd.Series(op(data, other))
-        expected = pd.Series(op(data._data, other))
+        expected = pd.Series(op(data._data, other), dtype="boolean")
 
         # fill the nan locations
-        expected[data._mask] = op_name == "__ne__"
+        expected[data._mask] = pd.NA
 
         tm.assert_series_equal(result, expected)
 
@@ -417,22 +428,61 @@ def _compare_other(self, data, op_name, other):
         s = pd.Series(data)
         result = op(s, other)
 
-        expected = pd.Series(data._data)
-        expected = op(expected, other)
+        expected = op(pd.Series(data._data), other)
 
         # fill the nan locations
-        expected[data._mask] = op_name == "__ne__"
+        expected[data._mask] = pd.NA
+        expected = expected.astype("boolean")
 
         tm.assert_series_equal(result, expected)
 
-    def test_compare_scalar(self, data, all_compare_operators):
-        op_name = all_compare_operators
-        self._compare_other(data, op_name, 0)
+    @pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1])
+    def test_scalar(self, other, all_compare_operators):
+        op = self.get_op_from_name(all_compare_operators)
+        a = pd.array([1, 0, None], dtype="Int64")
+
+        result = op(a, other)
+
+        if other is pd.NA:
+            expected = pd.array([None, None, None], dtype="boolean")
+        else:
+            values = op(a._data, other)
+            expected = pd.arrays.BooleanArray(values, a._mask, copy=True)
+        tm.assert_extension_array_equal(result, expected)
+
+        # ensure we haven't mutated anything inplace
+        result[0] = pd.NA
+        tm.assert_extension_array_equal(a, pd.array([1, 0, None], dtype="Int64"))
+
+    def test_array(self, all_compare_operators):
+        op = self.get_op_from_name(all_compare_operators)
+        a = pd.array([0, 1, 2, None, None, None], dtype="Int64")
+        b = pd.array([0, 1, None, 0, 1, None], dtype="Int64")
+
+        result = op(a, b)
+        values = op(a._data, b._data)
+        mask = a._mask | b._mask
 
-    def test_compare_array(self, data, all_compare_operators):
-        op_name = all_compare_operators
-        other = pd.Series([0] * len(data))
-        self._compare_other(data, op_name, other)
+        expected = pd.arrays.BooleanArray(values, mask)
+        tm.assert_extension_array_equal(result, expected)
+
+        # ensure we haven't mutated anything inplace
+        result[0] = pd.NA
+        tm.assert_extension_array_equal(
+            a, pd.array([0, 1, 2, None, None, None], dtype="Int64")
+        )
+        tm.assert_extension_array_equal(
+            b, pd.array([0, 1, None, 0, 1, None], dtype="Int64")
+        )
+
+    def test_compare_with_booleanarray(self, all_compare_operators):
+        op = self.get_op_from_name(all_compare_operators)
+        a = pd.array([True, False, None] * 3, dtype="boolean")
+        b = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype="Int64")
+        other = pd.array([False] * 3 + [True] * 3 + [None] * 3, dtype="boolean")
+        expected = op(a, other)
+        result = op(a, b)
+        tm.assert_extension_array_equal(result, expected)
 
     def test_no_shared_mask(self, data):
         result = data + 1
@@ -442,20 +492,21 @@ def test_compare_to_string(self, any_nullable_int_dtype):
         # GH 28930
         s = pd.Series([1, None], dtype=any_nullable_int_dtype)
         result = s == "a"
-        expected = pd.Series([False, False])
+        expected = pd.Series([False, pd.NA], dtype="boolean")
 
         self.assert_series_equal(result, expected)
 
     def test_compare_to_int(self, any_nullable_int_dtype, all_compare_operators):
         # GH 28930
-        s1 = pd.Series([1, 2, 3], dtype=any_nullable_int_dtype)
-        s2 = pd.Series([1, 2, 3], dtype="int")
+        s1 = pd.Series([1, None, 3], dtype=any_nullable_int_dtype)
+        s2 = pd.Series([1, None, 3], dtype="float")
 
         method = getattr(s1, all_compare_operators)
         result = method(2)
 
         method = getattr(s2, all_compare_operators)
-        expected = method(2)
+        expected = method(2).astype("boolean")
+        expected[s2.isna()] = pd.NA
 
         self.assert_series_equal(result, expected)
 
@@ -543,6 +594,17 @@ def test_astype(self, all_data):
         expected = pd.Series(np.asarray(mixed))
         tm.assert_series_equal(result, expected)
 
+    def test_astype_to_larger_numpy(self):
+        a = pd.array([1, 2], dtype="Int32")
+        result = a.astype("int64")
+        expected = np.array([1, 2], dtype="int64")
+        tm.assert_numpy_array_equal(result, expected)
+
+        a = pd.array([1, 2], dtype="UInt32")
+        result = a.astype("uint64")
+        expected = np.array([1, 2], dtype="uint64")
+        tm.assert_numpy_array_equal(result, expected)
+
     @pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
     def test_astype_specific_casting(self, dtype):
         s = pd.Series([1, 2, 3], dtype="Int64")
@@ -572,12 +634,17 @@ def test_construct_cast_invalid(self, dtype):
         with pytest.raises(TypeError, match=msg):
             pd.Series(arr).astype(dtype)
 
+    def test_coerce_to_ndarray_float_NA_rasies(self):
+        a = pd.array([0, 1, 2], dtype="Int64")
+        with pytest.raises(TypeError, match="NAType"):
+            a._coerce_to_ndarray(dtype="float", na_value=pd.NA)
+
 
 def test_frame_repr(data_missing):
 
     df = pd.DataFrame({"A": data_missing})
     result = repr(df)
-    expected = "     A\n0  NaN\n1    1"
+    expected = "    A\n0  NA\n1   1"
     assert result == expected
 
 
@@ -593,7 +660,7 @@ def test_conversions(data_missing):
     # we assert that we are exactly equal
     # including type conversions of scalars
     result = df["A"].astype("object").values
-    expected = np.array([np.nan, 1], dtype=object)
+    expected = np.array([pd.NA, 1], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
 
     for r, e in zip(result, expected):
@@ -756,7 +823,7 @@ def test_cross_type_arithmetic():
     tm.assert_series_equal(result, expected)
 
     result = (df.A + df.C) * 3 == 12
-    expected = pd.Series([False, True, False])
+    expected = pd.Series([False, True, None], dtype="boolean")
     tm.assert_series_equal(result, expected)
 
     result = df.A + df.B
@@ -820,7 +887,7 @@ def test_reduce_to_float(op):
 def test_astype_nansafe():
     # see gh-22343
     arr = integer_array([np.nan, 1, 2], dtype="Int8")
-    msg = "cannot convert float NaN to integer"
+    msg = "cannot convert to integer NumPy array with missing values"
 
     with pytest.raises(ValueError, match=msg):
         arr.astype("uint32")
@@ -895,7 +962,9 @@ def test_arrow_array(data):
     import pyarrow as pa
 
     arr = pa.array(data)
-    expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True)
+    expected = np.array(data, dtype=object)
+    expected[data.isna()] = None
+    expected = pa.array(expected, type=data.dtype.name.lower(), from_pandas=True)
     assert arr.equals(expected)
 
 
diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
index 8fa52af832907..4b6349a505509 100644
--- a/pandas/tests/base/test_conversion.py
+++ b/pandas/tests/base/test_conversion.py
@@ -315,7 +315,7 @@ def test_array_multiindex_raises():
         ),
         (
             pd.core.arrays.integer_array([0, np.nan]),
-            np.array([0, np.nan], dtype=object),
+            np.array([0, pd.NA], dtype=object),
         ),
         (
             pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]),
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
index d051345fdd12d..8e54543e5437c 100644
--- a/pandas/tests/extension/test_integer.py
+++ b/pandas/tests/extension/test_integer.py
@@ -34,7 +34,7 @@
 
 
 def make_data():
-    return list(range(1, 9)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100]
+    return list(range(1, 9)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100]
 
 
 @pytest.fixture(
@@ -65,7 +65,7 @@ def data_for_twos(dtype):
 
 @pytest.fixture
 def data_missing(dtype):
-    return integer_array([np.nan, 1], dtype=dtype)
+    return integer_array([pd.NA, 1], dtype=dtype)
 
 
 @pytest.fixture
@@ -75,18 +75,18 @@ def data_for_sorting(dtype):
 
 @pytest.fixture
 def data_missing_for_sorting(dtype):
-    return integer_array([1, np.nan, 0], dtype=dtype)
+    return integer_array([1, pd.NA, 0], dtype=dtype)
 
 
 @pytest.fixture
 def na_cmp():
-    # we are np.nan
-    return lambda x, y: np.isnan(x) and np.isnan(y)
+    # we are pd.NA
+    return lambda x, y: x is pd.NA and y is pd.NA
 
 
 @pytest.fixture
 def na_value():
-    return np.nan
+    return pd.NA
 
 
 @pytest.fixture
@@ -94,7 +94,7 @@ def data_for_grouping(dtype):
     b = 1
     a = 0
     c = 2
-    na = np.nan
+    na = pd.NA
     return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
 
 
@@ -129,7 +129,7 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
             expected = s.combine(other, op)
 
             if op_name in ("__rtruediv__", "__truediv__", "__div__"):
-                expected = expected.astype(float)
+                expected = expected.fillna(np.nan).astype(float)
                 if op_name == "__rtruediv__":
                     # TODO reverse operators result in object dtype
                     result = result.astype(float)
@@ -142,6 +142,7 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
                 # combine method result in 'biggest' (int64) dtype
                 expected = expected.astype(s.dtype)
                 pass
+
             if (op_name == "__rpow__") and isinstance(other, pd.Series):
                 # TODO pow on Int arrays gives different result with NA
                 # see https://github.com/pandas-dev/pandas/issues/22022
@@ -162,6 +163,16 @@ def test_error(self, data, all_arithmetic_operators):
 
 
 class TestComparisonOps(base.BaseComparisonOpsTests):
+    def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
+        if exc is None:
+            result = op(s, other)
+            # Override to do the astype to boolean
+            expected = s.combine(other, op).astype("boolean")
+            self.assert_series_equal(result, expected)
+        else:
+            with pytest.raises(exc):
+                op(s, other)
+
     def check_opname(self, s, op_name, other, exc=None):
         super().check_opname(s, op_name, other, exc=None)