Backport PR #54579 on branch 2.1.x (ENH: Reflect changes from numpy…

… namespace refactor Part 3) (#54583) Backport PR #54579: ENH: Reflect changes from `numpy` namespace refactor Part 3 Co-authored-by: Mateusz Sokół <8431159+mtsokol@users.noreply.github.com>
pandas-dev · Aug 16, 2023 · 4fbe078 · 4fbe078
1 parent 4160d69
commit 4fbe078
Show file tree

Hide file tree

Showing 102 changed files with 280 additions and 283 deletions.
diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py
@@ -247,7 +247,7 @@ def setup(self, series_type, vals_type):
         elif series_type == "long":
             ser_vals = np.arange(N_many)
         elif series_type == "long_floats":
-            ser_vals = np.arange(N_many, dtype=np.float_)
+            ser_vals = np.arange(N_many, dtype=np.float64)
 
         self.series = Series(ser_vals).astype(object)
 
@@ -258,7 +258,7 @@ def setup(self, series_type, vals_type):
         elif vals_type == "long":
             values = np.arange(N_many)
         elif vals_type == "long_floats":
-            values = np.arange(N_many, dtype=np.float_)
+            values = np.arange(N_many, dtype=np.float64)
 
         self.values = values.astype(object)
 

diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst
@@ -107,7 +107,7 @@ methods.
 .. ipython:: python
 
     frame = pd.DataFrame(
-        {"col1": ["A", "B", np.NaN, "C", "D"], "col2": ["F", np.NaN, "G", "H", "I"]}
+        {"col1": ["A", "B", np.nan, "C", "D"], "col2": ["F", np.nan, "G", "H", "I"]}
     )
     frame
 

diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst
@@ -183,8 +183,8 @@ can be improved by passing an ``np.ndarray``.
       ...:     return s * dx
       ...: cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b,
       ...:                                            np.ndarray col_N):
-      ...:     assert (col_a.dtype == np.float_
-      ...:             and col_b.dtype == np.float_ and col_N.dtype == np.int_)
+      ...:     assert (col_a.dtype == np.float64
+      ...:             and col_b.dtype == np.float64 and col_N.dtype == np.int_)
       ...:     cdef Py_ssize_t i, n = len(col_N)
       ...:     assert (len(col_a) == len(col_b) == n)
       ...:     cdef np.ndarray[double] res = np.empty(n)

diff --git a/doc/source/user_guide/gotchas.rst b/doc/source/user_guide/gotchas.rst
@@ -327,7 +327,7 @@ present in the more domain-specific statistical programming language `R
    ``numpy.unsignedinteger`` | ``uint8, uint16, uint32, uint64``
    ``numpy.object_`` | ``object_``
    ``numpy.bool_`` | ``bool_``
-   ``numpy.character`` | ``string_, unicode_``
+   ``numpy.character`` | ``bytes_, str_``
 
 The R language, by contrast, only has a handful of built-in data types:
 ``integer``, ``numeric`` (floating-point), ``character``, and

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -4881,7 +4881,7 @@ unspecified columns of the given DataFrame. The argument ``selector``
 defines which table is the selector table (which you can make queries from).
 The argument ``dropna`` will drop rows from the input ``DataFrame`` to ensure
 tables are synchronized.  This means that if a row for one of the tables
-being written to is entirely ``np.NaN``, that row will be dropped from all tables.
+being written to is entirely ``np.nan``, that row will be dropped from all tables.
 
 If ``dropna`` is False, **THE USER IS RESPONSIBLE FOR SYNCHRONIZING THE TABLES**.
 Remember that entirely ``np.Nan`` rows are not written to the HDFStore, so if

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -556,7 +556,7 @@ You must pass in the ``line_terminator`` explicitly, even in this case.
 
 .. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
 
-Proper handling of ``np.NaN`` in a string data-typed column with the Python engine
+Proper handling of ``np.nan`` in a string data-typed column with the Python engine
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 There was bug in :func:`read_excel` and :func:`read_csv` with the Python

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -59,7 +59,7 @@ from pandas._libs.util cimport get_nat
 
 cdef:
     float64_t FP_ERR = 1e-13
-    float64_t NaN = <float64_t>np.NaN
+    float64_t NaN = <float64_t>np.nan
     int64_t NPY_NAT = get_nat()
 
 

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -52,7 +52,7 @@ from pandas._libs.missing cimport checknull
 
 cdef int64_t NPY_NAT = util.get_nat()
 
-cdef float64_t NaN = <float64_t>np.NaN
+cdef float64_t NaN = <float64_t>np.nan
 
 cdef enum InterpolationEnumType:
     INTERPOLATION_LINEAR,

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -144,7 +144,7 @@ cdef:
     object oINT64_MIN = <int64_t>INT64_MIN
     object oUINT64_MAX = <uint64_t>UINT64_MAX
 
-    float64_t NaN = <float64_t>np.NaN
+    float64_t NaN = <float64_t>np.nan
 
 # python-visible
 i8max = <int64_t>INT64_MAX

diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
@@ -75,7 +75,7 @@ cdef inline bint is_integer_object(object obj) noexcept nogil:
 
 cdef inline bint is_float_object(object obj) noexcept nogil:
     """
-    Cython equivalent of `isinstance(val, (float, np.float_))`
+    Cython equivalent of `isinstance(val, (float, np.float64))`
 
     Parameters
     ----------
@@ -91,7 +91,7 @@ cdef inline bint is_float_object(object obj) noexcept nogil:
 
 cdef inline bint is_complex_object(object obj) noexcept nogil:
     """
-    Cython equivalent of `isinstance(val, (complex, np.complex_))`
+    Cython equivalent of `isinstance(val, (complex, np.complex128))`
 
     Parameters
     ----------

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -57,7 +57,7 @@ cdef:
     float32_t MAXfloat32 = np.inf
     float64_t MAXfloat64 = np.inf
 
-    float64_t NaN = <float64_t>np.NaN
+    float64_t NaN = <float64_t>np.nan
 
 cdef bint is_monotonic_increasing_start_end_bounds(
     ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -777,7 +777,7 @@ def series_with_multilevel_index() -> Series:
     index = MultiIndex.from_tuples(tuples)
     data = np.random.default_rng(2).standard_normal(8)
     ser = Series(data, index=index)
-    ser.iloc[3] = np.NaN
+    ser.iloc[3] = np.nan
     return ser
 
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2109,7 +2109,7 @@ def _codes(self) -> np.ndarray:
 
     def _box_func(self, i: int):
         if i == -1:
-            return np.NaN
+            return np.nan
         return self.categories[i]
 
     def _unbox_scalar(self, key) -> int:

diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py
@@ -537,8 +537,8 @@ def __init__(self, lhs, rhs) -> None:
             )
 
         # do not upcast float32s to float64 un-necessarily
-        acceptable_dtypes = [np.float32, np.float_]
-        _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
+        acceptable_dtypes = [np.float32, np.float64]
+        _cast_inplace(com.flatten(self), acceptable_dtypes, np.float64)
 
 
 UNARY_OPS_SYMS = ("+", "-", "~", "not")

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -850,7 +850,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
             dtype = np.dtype(np.float64)
 
     elif is_complex(val):
-        dtype = np.dtype(np.complex_)
+        dtype = np.dtype(np.complex128)
 
     if lib.is_period(val):
         dtype = PeriodDtype(freq=val.freq)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -1351,7 +1351,7 @@ def is_complex_dtype(arr_or_dtype) -> bool:
     False
     >>> is_complex_dtype(int)
     False
-    >>> is_complex_dtype(np.complex_)
+    >>> is_complex_dtype(np.complex128)
     True
     >>> is_complex_dtype(np.array(['a', 'b']))
     False

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5307,7 +5307,7 @@ def reindex(
         level : int or name
             Broadcast across a level, matching Index values on the
             passed MultiIndex level.
-        fill_value : scalar, default np.NaN
+        fill_value : scalar, default np.nan
             Value to use for missing values. Defaults to NaN, but can be any
             "compatible" value.
         limit : int, default None
@@ -7376,7 +7376,7 @@ def ffill(
         2  3.0  4.0 NaN  1.0
         3  3.0  3.0 NaN  4.0
 
-        >>> ser = pd.Series([1, np.NaN, 2, 3])
+        >>> ser = pd.Series([1, np.nan, 2, 3])
         >>> ser.ffill()
         0   1.0
         1   1.0
@@ -8375,7 +8375,7 @@ def isna(self) -> Self:
         --------
         Show which entries in a DataFrame are NA.
 
-        >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],
+        >>> df = pd.DataFrame(dict(age=[5, 6, np.nan],
         ...                        born=[pd.NaT, pd.Timestamp('1939-05-27'),
         ...                              pd.Timestamp('1940-04-25')],
         ...                        name=['Alfred', 'Batman', ''],
@@ -8394,7 +8394,7 @@ def isna(self) -> Self:
 
         Show which entries in a Series are NA.
 
-        >>> ser = pd.Series([5, 6, np.NaN])
+        >>> ser = pd.Series([5, 6, np.nan])
         >>> ser
         0    5.0
         1    6.0
@@ -8442,7 +8442,7 @@ def notna(self) -> Self:
         --------
         Show which entries in a DataFrame are not NA.
 
-        >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],
+        >>> df = pd.DataFrame(dict(age=[5, 6, np.nan],
         ...                        born=[pd.NaT, pd.Timestamp('1939-05-27'),
         ...                              pd.Timestamp('1940-04-25')],
         ...                        name=['Alfred', 'Batman', ''],
@@ -8461,7 +8461,7 @@ def notna(self) -> Self:
 
         Show which entries in a Series are not NA.
 
-        >>> ser = pd.Series([5, 6, np.NaN])
+        >>> ser = pd.Series([5, 6, np.nan])
         >>> ser
         0    5.0
         1    6.0
@@ -8628,7 +8628,7 @@ def clip(
 
         Clips using specific lower threshold per column element, with missing values:
 
-        >>> t = pd.Series([2, -4, np.NaN, 6, 3])
+        >>> t = pd.Series([2, -4, np.nan, 6, 3])
         >>> t
         0    2.0
         1   -4.0
@@ -9828,7 +9828,7 @@ def align(
         copy : bool, default True
             Always returns new objects. If copy=False and no reindexing is
             required then original objects are returned.
-        fill_value : scalar, default np.NaN
+        fill_value : scalar, default np.nan
             Value to use for missing values. Defaults to NaN, but can be any
             "compatible" value.
         method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -5418,7 +5418,7 @@ def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT:
     def _reindex_output(
         self,
         output: OutputFrameOrSeries,
-        fill_value: Scalar = np.NaN,
+        fill_value: Scalar = np.nan,
         qs: npt.NDArray[np.float64] | None = None,
     ) -> OutputFrameOrSeries:
         """
@@ -5436,7 +5436,7 @@ def _reindex_output(
         ----------
         output : Series or DataFrame
             Object resulting from grouping and applying an operation.
-        fill_value : scalar, default np.NaN
+        fill_value : scalar, default np.nan
             Value to use for unobserved categories if self.observed is False.
         qs : np.ndarray[float64] or None, default None
             quantile values, only relevant for quantile.

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2848,7 +2848,7 @@ def isna(self) -> npt.NDArray[np.bool_]:
         Show which entries in a pandas.Index are NA. The result is an
         array.
 
-        >>> idx = pd.Index([5.2, 6.0, np.NaN])
+        >>> idx = pd.Index([5.2, 6.0, np.nan])
         >>> idx
         Index([5.2, 6.0, nan], dtype='float64')
         >>> idx.isna()
@@ -2904,7 +2904,7 @@ def notna(self) -> npt.NDArray[np.bool_]:
         Show which entries in an Index are not NA. The result is an
         array.
 
-        >>> idx = pd.Index([5.2, 6.0, np.NaN])
+        >>> idx = pd.Index([5.2, 6.0, np.nan])
         >>> idx
         Index([5.2, 6.0, nan], dtype='float64')
         >>> idx.notna()

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -124,7 +124,7 @@ def _get_next_label(label):
     elif is_integer_dtype(dtype):
         return label + 1
     elif is_float_dtype(dtype):
-        return np.nextafter(label, np.infty)
+        return np.nextafter(label, np.inf)
     else:
         raise TypeError(f"cannot determine next label for type {repr(type(label))}")
 
@@ -141,7 +141,7 @@ def _get_prev_label(label):
     elif is_integer_dtype(dtype):
         return label - 1
     elif is_float_dtype(dtype):
-        return np.nextafter(label, -np.infty)
+        return np.nextafter(label, -np.inf)
     else:
         raise TypeError(f"cannot determine next label for type {repr(type(label))}")
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -5586,7 +5586,7 @@ def dropna(
         Empty strings are not considered NA values. ``None`` is considered an
         NA value.
 
-        >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
+        >>> ser = pd.Series([np.nan, 2, pd.NaT, '', None, 'I stay'])
         >>> ser
         0       NaN
         1         2

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
@@ -1215,7 +1215,7 @@ def contains(
         --------
         Returning a Series of booleans using only a literal pattern.
 
-        >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
+        >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.nan])
         >>> s1.str.contains('og', regex=False)
         0    False
         1     True
@@ -1226,7 +1226,7 @@ def contains(
 
         Returning an Index of booleans using only a literal pattern.
 
-        >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
+        >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.nan])
         >>> ind.str.contains('23', regex=False)
         Index([False, False, False, True, nan], dtype='object')
 
@@ -3500,7 +3500,7 @@ def str_extractall(arr, pat, flags: int = 0) -> DataFrame:
             for match_i, match_tuple in enumerate(regex.findall(subject)):
                 if isinstance(match_tuple, str):
                     match_tuple = (match_tuple,)
-                na_tuple = [np.NaN if group == "" else group for group in match_tuple]
+                na_tuple = [np.nan if group == "" else group for group in match_tuple]
                 match_list.append(na_tuple)
                 result_key = tuple(subject_key + (match_i,))
                 index_list.append(result_key)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1715,7 +1715,7 @@ def format_percentiles(
     """
     percentiles = np.asarray(percentiles)
 
-    # It checks for np.NaN as well
+    # It checks for np.nan as well
     if (
         not is_numeric_dtype(percentiles)
         or not np.all(percentiles >= 0)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -637,15 +637,15 @@ def test_apply_with_byte_string():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("val", ["asd", 12, None, np.NaN])
+@pytest.mark.parametrize("val", ["asd", 12, None, np.nan])
 def test_apply_category_equalness(val):
     # Check if categorical comparisons on apply, GH 21239
-    df_values = ["asd", None, 12, "asd", "cde", np.NaN]
+    df_values = ["asd", None, 12, "asd", "cde", np.nan]
     df = DataFrame({"a": df_values}, dtype="category")
 
     result = df.a.apply(lambda x: x == val)
     expected = Series(
-        [np.NaN if pd.isnull(x) else x == val for x in df_values], name="a"
+        [np.nan if pd.isnull(x) else x == val for x in df_values], name="a"
     )
     tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
@@ -242,7 +242,7 @@ def test_apply_categorical(by_row):
     assert result.dtype == object
 
 
-@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]])
+@pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
 def test_apply_categorical_with_nan_values(series, by_row):
     # GH 20714 bug fixed in: GH 24275
     s = Series(series, dtype="category")
@@ -254,7 +254,7 @@ def test_apply_categorical_with_nan_values(series, by_row):
 
     result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
     result = result.astype(object)
-    expected = Series(["1", "1", np.NaN], dtype="category")
+    expected = Series(["1", "1", np.nan], dtype="category")
     expected = expected.astype(object)
     tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
@@ -73,8 +73,8 @@ def test_min_max_reduce(self):
     @pytest.mark.parametrize(
         "categories,expected",
         [
-            (list("ABC"), np.NaN),
-            ([1, 2, 3], np.NaN),
+            (list("ABC"), np.nan),
+            ([1, 2, 3], np.nan),
             pytest.param(
                 Series(date_range("2020-01-01", periods=3), dtype="category"),
                 NaT,

diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
@@ -129,7 +129,7 @@ def test_set_na(self, left_right_dtypes):
             # GH#45484 TypeError, not ValueError, matches what we get with
             # non-NA un-holdable value.
             with pytest.raises(TypeError, match=msg):
-                result[0] = np.NaN
+                result[0] = np.nan
             return
 
         result[0] = np.nan