From d59eb7f8b3d69a83535bc3cc2035db7ac942ecbf Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Sun, 21 Apr 2024 13:35:03 -0400
Subject: [PATCH 001/100] DOC: fixing SA01 error for DatetimeIndex: second,
 nanosecond, and microsecond (#58342)

* DOC: fixing SA01 error for DatetimeIndex: second, nanosecond, microsecond

* fixing EXPECTED TO FAIL, BUT NOT FAILING error
---
 ci/code_checks.sh               |  6 ------
 pandas/core/arrays/datetimes.py | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index d1cdff8f7f56b..ad12458ad6b0d 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -109,7 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.DatetimeIndex.ceil SA01" \
         -i "pandas.DatetimeIndex.date SA01" \
-        -i "pandas.DatetimeIndex.day SA01" \
         -i "pandas.DatetimeIndex.day_of_year SA01" \
         -i "pandas.DatetimeIndex.dayofyear SA01" \
         -i "pandas.DatetimeIndex.floor SA01" \
@@ -118,8 +117,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeIndex.indexer_between_time RT03" \
         -i "pandas.DatetimeIndex.inferred_freq SA01" \
         -i "pandas.DatetimeIndex.is_leap_year SA01" \
-        -i "pandas.DatetimeIndex.microsecond SA01" \
-        -i "pandas.DatetimeIndex.nanosecond SA01" \
         -i "pandas.DatetimeIndex.quarter SA01" \
         -i "pandas.DatetimeIndex.round SA01" \
         -i "pandas.DatetimeIndex.snap PR01,RT03,SA01" \
@@ -296,7 +293,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.ceil PR01,PR02,SA01" \
         -i "pandas.Series.dt.components SA01" \
         -i "pandas.Series.dt.date SA01" \
-        -i "pandas.Series.dt.day SA01" \
         -i "pandas.Series.dt.day_name PR01,PR02" \
         -i "pandas.Series.dt.day_of_year SA01" \
         -i "pandas.Series.dt.dayofyear SA01" \
@@ -306,10 +302,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.floor PR01,PR02,SA01" \
         -i "pandas.Series.dt.freq GL08" \
         -i "pandas.Series.dt.is_leap_year SA01" \
-        -i "pandas.Series.dt.microsecond SA01" \
         -i "pandas.Series.dt.microseconds SA01" \
         -i "pandas.Series.dt.month_name PR01,PR02" \
-        -i "pandas.Series.dt.nanosecond SA01" \
         -i "pandas.Series.dt.nanoseconds SA01" \
         -i "pandas.Series.dt.normalize PR01" \
         -i "pandas.Series.dt.quarter SA01" \
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 5d0dfc67bd90a..7704c99141fc2 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1597,6 +1597,12 @@ def isocalendar(self) -> DataFrame:
         """
         The day of the datetime.
 
+        See Also
+        --------
+        DatetimeIndex.year: The year of the datetime.
+        DatetimeIndex.month: The month as January=1, December=12.
+        DatetimeIndex.hour: The hours of the datetime.
+
         Examples
         --------
         >>> datetime_series = pd.Series(
@@ -1706,6 +1712,11 @@ def isocalendar(self) -> DataFrame:
         """
         The microseconds of the datetime.
 
+        See Also
+        --------
+        DatetimeIndex.second: The seconds of the datetime.
+        DatetimeIndex.nanosecond: The nanoseconds of the datetime.
+
         Examples
         --------
         >>> datetime_series = pd.Series(
@@ -1729,6 +1740,11 @@ def isocalendar(self) -> DataFrame:
         """
         The nanoseconds of the datetime.
 
+        See Also
+        --------
+        DatetimeIndex.second: The seconds of the datetime.
+        DatetimeIndex.microsecond: The microseconds of the datetime.
+
         Examples
         --------
         >>> datetime_series = pd.Series(

From 99f1df6cb87e9b73dd8e71dbee686a7b555c285a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?=
 <6618166+twoertwein@users.noreply.github.com>
Date: Sun, 21 Apr 2024 13:41:20 -0400
Subject: [PATCH 002/100] TYP: export SASReader in pandas.api.typing (#58349)

* TYP: export SASReader in pandas.api.typing

* fix test
---
 doc/source/whatsnew/v3.0.0.rst |  1 +
 pandas/api/typing/__init__.py  |  2 ++
 pandas/io/sas/sas7bdat.py      |  5 ++---
 pandas/io/sas/sas_xport.py     |  5 ++---
 pandas/io/sas/sasreader.py     | 13 +++++++------
 pandas/tests/api/test_api.py   |  1 +
 6 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 8618d7d525771..c817e09b3b360 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -29,6 +29,7 @@ enhancement2
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
+- :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
 - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
diff --git a/pandas/api/typing/__init__.py b/pandas/api/typing/__init__.py
index df6392bf692a2..c58fa0f085266 100644
--- a/pandas/api/typing/__init__.py
+++ b/pandas/api/typing/__init__.py
@@ -30,6 +30,7 @@
 # TODO: Can't import Styler without importing jinja2
 # from pandas.io.formats.style import Styler
 from pandas.io.json._json import JsonReader
+from pandas.io.sas.sasreader import SASReader
 from pandas.io.stata import StataReader
 
 __all__ = [
@@ -49,6 +50,7 @@
     "RollingGroupby",
     "SeriesGroupBy",
     "StataReader",
+    "SASReader",
     # See TODO above
     # "Styler",
     "TimedeltaIndexResamplerGroupby",
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 6a392a0f02caf..25257d5fcc192 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -16,7 +16,6 @@
 
 from __future__ import annotations
 
-from collections import abc
 from datetime import datetime
 import sys
 from typing import TYPE_CHECKING
@@ -45,7 +44,7 @@
 
 from pandas.io.common import get_handle
 import pandas.io.sas.sas_constants as const
-from pandas.io.sas.sasreader import ReaderBase
+from pandas.io.sas.sasreader import SASReader
 
 if TYPE_CHECKING:
     from pandas._typing import (
@@ -116,7 +115,7 @@ def __init__(
 
 
 # SAS7BDAT represents a SAS data file in SAS7BDAT format.
-class SAS7BDATReader(ReaderBase, abc.Iterator):
+class SAS7BDATReader(SASReader):
     """
     Read SAS files in SAS7BDAT format.
 
diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py
index adba9bf117a8e..89dbdab64c23c 100644
--- a/pandas/io/sas/sas_xport.py
+++ b/pandas/io/sas/sas_xport.py
@@ -10,7 +10,6 @@
 
 from __future__ import annotations
 
-from collections import abc
 from datetime import datetime
 import struct
 from typing import TYPE_CHECKING
@@ -24,7 +23,7 @@
 import pandas as pd
 
 from pandas.io.common import get_handle
-from pandas.io.sas.sasreader import ReaderBase
+from pandas.io.sas.sasreader import SASReader
 
 if TYPE_CHECKING:
     from pandas._typing import (
@@ -252,7 +251,7 @@ def _parse_float_vec(vec):
     return ieee
 
 
-class XportReader(ReaderBase, abc.Iterator):
+class XportReader(SASReader):
     __doc__ = _xport_reader_doc
 
     def __init__(
diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py
index 69d911863338f..12d698a4f76a8 100644
--- a/pandas/io/sas/sasreader.py
+++ b/pandas/io/sas/sasreader.py
@@ -8,6 +8,7 @@
     ABC,
     abstractmethod,
 )
+from collections.abc import Iterator
 from typing import (
     TYPE_CHECKING,
     overload,
@@ -33,9 +34,9 @@
     from pandas import DataFrame
 
 
-class ReaderBase(ABC):
+class SASReader(Iterator["DataFrame"], ABC):
     """
-    Protocol for XportReader and SAS7BDATReader classes.
+    Abstract class for XportReader and SAS7BDATReader.
     """
 
     @abstractmethod
@@ -66,7 +67,7 @@ def read_sas(
     chunksize: int = ...,
     iterator: bool = ...,
     compression: CompressionOptions = ...,
-) -> ReaderBase: ...
+) -> SASReader: ...
 
 
 @overload
@@ -79,7 +80,7 @@ def read_sas(
     chunksize: None = ...,
     iterator: bool = ...,
     compression: CompressionOptions = ...,
-) -> DataFrame | ReaderBase: ...
+) -> DataFrame | SASReader: ...
 
 
 @doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer")
@@ -92,7 +93,7 @@ def read_sas(
     chunksize: int | None = None,
     iterator: bool = False,
     compression: CompressionOptions = "infer",
-) -> DataFrame | ReaderBase:
+) -> DataFrame | SASReader:
     """
     Read SAS files stored as either XPORT or SAS7BDAT format files.
 
@@ -145,7 +146,7 @@ def read_sas(
                 f"unable to infer format of SAS file from filename: {fname!r}"
             )
 
-    reader: ReaderBase
+    reader: SASReader
     if format.lower() == "xport":
         from pandas.io.sas.sas_xport import XportReader
 
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 0f2a641d13b11..b23876d9280f7 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -267,6 +267,7 @@ class TestApi(Base):
         "RollingGroupby",
         "SeriesGroupBy",
         "StataReader",
+        "SASReader",
         "TimedeltaIndexResamplerGroupby",
         "TimeGrouper",
         "Window",

From b111ac671e9eb8119e53ca57be54d24c47f672f8 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 22 Apr 2024 00:14:30 +0530
Subject: [PATCH 003/100] DOC: Enforce Numpy Docstring Validation for
 pandas.HDFStore.groups (#58357)

* DOC: added SA01 to HDFStore.groups

* DOC: removed HDFStore.groups
---
 ci/code_checks.sh     | 1 -
 pandas/io/pytables.py | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index ad12458ad6b0d..cabc25b5e0ba5 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -131,7 +131,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.DatetimeTZDtype.unit SA01" \
         -i "pandas.Grouper PR02" \
-        -i "pandas.HDFStore.groups SA01" \
         -i "pandas.HDFStore.info RT03,SA01" \
         -i "pandas.HDFStore.keys SA01" \
         -i "pandas.HDFStore.put PR01,SA01" \
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 25808f5b4a132..d7fc71d037f2d 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -1504,6 +1504,10 @@ def groups(self) -> list:
         list
             List of objects.
 
+        See Also
+        --------
+        HDFStore.get_node : Returns the node with the key.
+
         Examples
         --------
         >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

From 3b0824e92e9932588f5d0e58e1b0aa59df2e76fa Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Mon, 22 Apr 2024 12:29:47 -0400
Subject: [PATCH 004/100] DOC: Fix SA01 errors for Index.hasnans, Index.map,
 Index.nbytes (#58343)

* Shorten sentence length

* Remove Series.nbytes from ci/code_checks.sh

* Update see also method names

* Update see also method names

* Update see also methods for Index.map

* Update method descriptions
---
 ci/code_checks.sh           |  4 ----
 pandas/core/base.py         |  5 +++++
 pandas/core/indexes/base.py | 10 ++++++++++
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index cabc25b5e0ba5..4debc2eb91449 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -156,18 +156,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
         -i "pandas.Index.get_slice_bound PR07" \
-        -i "pandas.Index.hasnans SA01" \
         -i "pandas.Index.identical PR01,SA01" \
         -i "pandas.Index.inferred_type SA01" \
         -i "pandas.Index.insert PR07,RT03,SA01" \
         -i "pandas.Index.intersection PR07,RT03,SA01" \
         -i "pandas.Index.item SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
-        -i "pandas.Index.map SA01" \
         -i "pandas.Index.memory_usage RT03" \
         -i "pandas.Index.name SA01" \
         -i "pandas.Index.names GL08" \
-        -i "pandas.Index.nbytes SA01" \
         -i "pandas.Index.nunique RT03" \
         -i "pandas.Index.putmask PR01,RT03" \
         -i "pandas.Index.ravel PR01,RT03" \
@@ -344,7 +341,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.mod PR07" \
         -i "pandas.Series.mode SA01" \
         -i "pandas.Series.mul PR07" \
-        -i "pandas.Series.nbytes SA01" \
         -i "pandas.Series.ne PR07,SA01" \
         -i "pandas.Series.nunique RT03" \
         -i "pandas.Series.pad PR01,SA01" \
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 9b1251a4ef5d8..424f0609dd485 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -419,6 +419,11 @@ def nbytes(self) -> int:
         """
         Return the number of bytes in the underlying data.
 
+        See Also
+        --------
+        Series.ndim : Number of dimensions of the underlying data.
+        Series.size : Return the number of elements in the underlying data.
+
         Examples
         --------
         For Series:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 8ede401f37184..d1d1c5ea3171f 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2423,6 +2423,12 @@ def hasnans(self) -> bool:
         -------
         bool
 
+        See Also
+        --------
+        Index.isna : Detect missing values.
+        Index.dropna : Return Index without NA/NaN values.
+        Index.fillna : Fill NA/NaN values with the specified value.
+
         Examples
         --------
         >>> s = pd.Series([1, 2, 3], index=["a", "b", None])
@@ -6067,6 +6073,10 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
             If the function returns a tuple with more than one element
             a MultiIndex will be returned.
 
+        See Also
+        --------
+        Index.where : Replace values where the condition is False.
+
         Examples
         --------
         >>> idx = pd.Index([1, 2, 3])

From 2768a22d3b6bb70029f406968bc366faf2c7267f Mon Sep 17 00:00:00 2001
From: gboeker <68177766+gboeker@users.noreply.github.com>
Date: Mon, 22 Apr 2024 12:31:05 -0400
Subject: [PATCH 005/100] DOC: Enforce Numpy Docstring Validation for
 DatetimeIndex (#58353)

* fix line too long

* add return for snap

* undo return

* fix docstring issues for DatetimeIndex.quarter

* remove pandas.Series.dt.quarter from codechecks

* fix docstring issues for DatetimeIndex.round

* fix docstring issues for DatetimeIndex.time

* fix docstring issues for DatetimeIndex.timetz

* add see also for timetz

* fix code check errors

* delete round from code_checks

* fix code check errors
---
 ci/code_checks.sh                  | 20 ++++----------------
 pandas/core/arrays/datetimelike.py |  5 +++++
 pandas/core/arrays/datetimes.py    | 20 ++++++++++++++++++++
 pandas/core/indexes/datetimes.py   |  7 +++++++
 4 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 4debc2eb91449..443fa4b4005d3 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -107,22 +107,16 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.to_parquet RT03" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
-        -i "pandas.DatetimeIndex.ceil SA01" \
         -i "pandas.DatetimeIndex.date SA01" \
         -i "pandas.DatetimeIndex.day_of_year SA01" \
         -i "pandas.DatetimeIndex.dayofyear SA01" \
-        -i "pandas.DatetimeIndex.floor SA01" \
         -i "pandas.DatetimeIndex.freqstr SA01" \
         -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
         -i "pandas.DatetimeIndex.indexer_between_time RT03" \
         -i "pandas.DatetimeIndex.inferred_freq SA01" \
         -i "pandas.DatetimeIndex.is_leap_year SA01" \
-        -i "pandas.DatetimeIndex.quarter SA01" \
-        -i "pandas.DatetimeIndex.round SA01" \
-        -i "pandas.DatetimeIndex.snap PR01,RT03,SA01" \
+        -i "pandas.DatetimeIndex.snap PR01,RT03" \
         -i "pandas.DatetimeIndex.std PR01,RT03" \
-        -i "pandas.DatetimeIndex.time SA01" \
-        -i "pandas.DatetimeIndex.timetz SA01" \
         -i "pandas.DatetimeIndex.to_period RT03" \
         -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \
         -i "pandas.DatetimeIndex.tz SA01" \
@@ -286,7 +280,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.div PR07" \
         -i "pandas.Series.droplevel SA01" \
         -i "pandas.Series.dt.as_unit PR01,PR02" \
-        -i "pandas.Series.dt.ceil PR01,PR02,SA01" \
+        -i "pandas.Series.dt.ceil PR01,PR02" \
         -i "pandas.Series.dt.components SA01" \
         -i "pandas.Series.dt.date SA01" \
         -i "pandas.Series.dt.day_name PR01,PR02" \
@@ -295,20 +289,17 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.days SA01" \
         -i "pandas.Series.dt.days_in_month SA01" \
         -i "pandas.Series.dt.daysinmonth SA01" \
-        -i "pandas.Series.dt.floor PR01,PR02,SA01" \
+        -i "pandas.Series.dt.floor PR01,PR02" \
         -i "pandas.Series.dt.freq GL08" \
         -i "pandas.Series.dt.is_leap_year SA01" \
         -i "pandas.Series.dt.microseconds SA01" \
         -i "pandas.Series.dt.month_name PR01,PR02" \
         -i "pandas.Series.dt.nanoseconds SA01" \
         -i "pandas.Series.dt.normalize PR01" \
-        -i "pandas.Series.dt.quarter SA01" \
         -i "pandas.Series.dt.qyear GL08" \
-        -i "pandas.Series.dt.round PR01,PR02,SA01" \
+        -i "pandas.Series.dt.round PR01,PR02" \
         -i "pandas.Series.dt.seconds SA01" \
         -i "pandas.Series.dt.strftime PR01,PR02" \
-        -i "pandas.Series.dt.time SA01" \
-        -i "pandas.Series.dt.timetz SA01" \
         -i "pandas.Series.dt.to_period PR01,PR02,RT03" \
         -i "pandas.Series.dt.total_seconds PR01" \
         -i "pandas.Series.dt.tz SA01" \
@@ -428,14 +419,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timedelta.total_seconds SA01" \
         -i "pandas.Timedelta.view SA01" \
         -i "pandas.TimedeltaIndex.as_unit RT03,SA01" \
-        -i "pandas.TimedeltaIndex.ceil SA01" \
         -i "pandas.TimedeltaIndex.components SA01" \
         -i "pandas.TimedeltaIndex.days SA01" \
-        -i "pandas.TimedeltaIndex.floor SA01" \
         -i "pandas.TimedeltaIndex.inferred_freq SA01" \
         -i "pandas.TimedeltaIndex.microseconds SA01" \
         -i "pandas.TimedeltaIndex.nanoseconds SA01" \
-        -i "pandas.TimedeltaIndex.round SA01" \
         -i "pandas.TimedeltaIndex.seconds SA01" \
         -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
         -i "pandas.Timestamp PR07,SA01" \
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 8ada9d88e08bc..974289160b145 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1825,6 +1825,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
     ------
     ValueError if the `freq` cannot be converted.
 
+    See Also
+    --------
+    DatetimeIndex.floor : Perform floor operation on the data to the specified `freq`.
+    DatetimeIndex.snap : Snap time stamps to nearest occurring frequency.
+
     Notes
     -----
     If the timestamps have a timezone, {op}ing will take place relative to the
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 7704c99141fc2..fb9f047d432a1 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1391,6 +1391,14 @@ def time(self) -> npt.NDArray[np.object_]:
 
         The time part of the Timestamps.
 
+        See Also
+        --------
+        DatetimeIndex.timetz : Returns numpy array of :class:`datetime.time`
+            objects with timezones. The time part of the Timestamps.
+        DatetimeIndex.date : Returns numpy array of python :class:`datetime.date`
+            objects. Namely, the date part of Timestamps without time and timezone
+            information.
+
         Examples
         --------
         For Series:
@@ -1428,6 +1436,12 @@ def timetz(self) -> npt.NDArray[np.object_]:
 
         The time part of the Timestamps.
 
+        See Also
+        --------
+        DatetimeIndex.time : Returns numpy array of :class:`datetime.time` objects.
+            The time part of the Timestamps.
+        DatetimeIndex.tz : Return the timezone.
+
         Examples
         --------
         For Series:
@@ -1836,6 +1850,12 @@ def isocalendar(self) -> DataFrame:
         """
         The quarter of the date.
 
+        See Also
+        --------
+        DatetimeIndex.snap : Snap time stamps to nearest occurring frequency.
+        DatetimeIndex.time : Returns numpy array of datetime.time objects.
+            The time part of the Timestamps.
+
         Examples
         --------
         For Series:
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index cefdc14145d1f..7122de745e13b 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -455,6 +455,13 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex:
         -------
         DatetimeIndex
 
+        See Also
+        --------
+        DatetimeIndex.round : Perform round operation on the data to the
+            specified `freq`.
+        DatetimeIndex.floor : Perform floor operation on the data to the
+            specified `freq`.
+
         Examples
         --------
         >>> idx = pd.DatetimeIndex(

From 09c7201d6db4be265aafda8feb7577c02145f2eb Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 22 Apr 2024 22:34:44 +0530
Subject: [PATCH 006/100] DOC: Enforce Numpy Docstring Validation for
 pandas.HDFStore.info (#58368)

* DOC: add return description and see also section to pandas.HDFStore.info

* DOC: add 2 df in the examples for pandas.HDFStore.info

* DOC: remove pandas.HDFStore.info
---
 ci/code_checks.sh     |  1 -
 pandas/io/pytables.py | 15 ++++++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 443fa4b4005d3..fdcbcbe31c47f 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -125,7 +125,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.DatetimeTZDtype.unit SA01" \
         -i "pandas.Grouper PR02" \
-        -i "pandas.HDFStore.info RT03,SA01" \
         -i "pandas.HDFStore.keys SA01" \
         -i "pandas.HDFStore.put PR01,SA01" \
         -i "pandas.HDFStore.select SA01" \
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index d7fc71d037f2d..89c6ac9a58382 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -1688,17 +1688,26 @@ def info(self) -> str:
         Returns
         -------
         str
+            A String containing the python pandas class name, filepath to the HDF5
+            file and all the object keys along with their respective dataframe shapes.
+
+        See Also
+        --------
+        HDFStore.get_storer : Returns the storer object for a key.
 
         Examples
         --------
-        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
+        >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
+        >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["C", "D"])
         >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data", df)  # doctest: +SKIP
+        >>> store.put("data1", df1)  # doctest: +SKIP
+        >>> store.put("data2", df2)  # doctest: +SKIP
         >>> print(store.info())  # doctest: +SKIP
         >>> store.close()  # doctest: +SKIP
         <class 'pandas.io.pytables.HDFStore'>
         File path: store.h5
-        /data    frame    (shape->[2,2])
+        /data1            frame        (shape->[2,2])
+        /data2            frame        (shape->[2,2])
         """
         path = pprint_thing(self._path)
         output = f"{type(self)}\nFile path: {path}\n"

From 5db3196e8a5779a2548ba5f48ed8f4ebfb2cf31b Mon Sep 17 00:00:00 2001
From: gboeker <68177766+gboeker@users.noreply.github.com>
Date: Mon, 22 Apr 2024 13:16:42 -0400
Subject: [PATCH 007/100] DOC: Fix SA01 Docstring Errors for DataFrame (#58364)

* DataFrame.__iter__ fix SA01

* add See Also for DataFrame.column

* DataFrame.droplevel SA01 fixed

* remove pandas.Series.droplevel from code_checks.sh
---
 ci/code_checks.sh      |  4 ----
 pandas/core/frame.py   |  4 ++++
 pandas/core/generic.py | 11 +++++++++++
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index fdcbcbe31c47f..d2ba06902096e 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -80,10 +80,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.CategoricalIndex.codes SA01" \
         -i "pandas.CategoricalIndex.ordered SA01" \
         -i "pandas.DataFrame.__dataframe__ SA01" \
-        -i "pandas.DataFrame.__iter__ SA01" \
         -i "pandas.DataFrame.at_time PR01" \
-        -i "pandas.DataFrame.columns SA01" \
-        -i "pandas.DataFrame.droplevel SA01" \
         -i "pandas.DataFrame.hist RT03" \
         -i "pandas.DataFrame.infer_objects RT03" \
         -i "pandas.DataFrame.kurt RT03,SA01" \
@@ -277,7 +274,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.cat.reorder_categories PR01,PR02" \
         -i "pandas.Series.cat.set_categories PR01,PR02" \
         -i "pandas.Series.div PR07" \
-        -i "pandas.Series.droplevel SA01" \
         -i "pandas.Series.dt.as_unit PR01,PR02" \
         -i "pandas.Series.dt.ceil PR01,PR02" \
         -i "pandas.Series.dt.components SA01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0185ca8241617..50dc514e7181f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -12893,6 +12893,10 @@ def isin_(x):
             """
                 The column labels of the DataFrame.
 
+                See Also
+                --------
+                DataFrame.index: The index (row labels) of the DataFrame.
+
                 Examples
                 --------
                 >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index dbe2006642484..a7f155ec93524 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -783,6 +783,12 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self:
         {klass}
             {klass} with requested index / column level(s) removed.
 
+        See Also
+        --------
+        DataFrame.replace : Replace values given in `to_replace` with `value`.
+        DataFrame.pivot : Return reshaped DataFrame organized by given
+            index / column values.
+
         Examples
         --------
         >>> df = (
@@ -1862,6 +1868,11 @@ def __iter__(self) -> Iterator:
         iterator
             Info axis as iterator.
 
+        See Also
+        --------
+        DataFrame.items : Iterate over (column name, Series) pairs.
+        DataFrame.itertuples : Iterate over DataFrame rows as namedtuples.
+
         Examples
         --------
         >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

From f1297fae4561c1cdf1c0eab1ec6fa2247ef73f07 Mon Sep 17 00:00:00 2001
From: William Andrea <22385371+wjandrea@users.noreply.github.com>
Date: Mon, 22 Apr 2024 13:30:27 -0400
Subject: [PATCH 008/100] More idiomatic example code in BaseIndexer (#58356)

No need to loop when NumPy supports range and array addition
---
 pandas/core/indexers/objects.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py
index 2e6bcda520aba..d108f840a1b4f 100644
--- a/pandas/core/indexers/objects.py
+++ b/pandas/core/indexers/objects.py
@@ -53,11 +53,8 @@ class BaseIndexer:
     >>> from pandas.api.indexers import BaseIndexer
     >>> class CustomIndexer(BaseIndexer):
     ...     def get_window_bounds(self, num_values, min_periods, center, closed, step):
-    ...         start = np.empty(num_values, dtype=np.int64)
-    ...         end = np.empty(num_values, dtype=np.int64)
-    ...         for i in range(num_values):
-    ...             start[i] = i
-    ...             end[i] = i + self.window_size
+    ...         start = np.arange(num_values, dtype=np.int64)
+    ...         end = np.arange(num_values, dtype=np.int64) + self.window_size
     ...         return start, end
     >>> df = pd.DataFrame({"values": range(5)})
     >>> indexer = CustomIndexer(window_size=2)

From e714aca6f2ed594c95a9681dac3d4858f23552a2 Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Mon, 22 Apr 2024 13:43:48 -0400
Subject: [PATCH 009/100] DOC: fixing SA01 errors for Index: name, dtype, and
 equals (#58355)

* DOC: fixing SA01 errors for Index: name, dtype, and equals

* fixing Blank line contains whitespace error
---
 ci/code_checks.sh           |  3 ---
 pandas/core/indexes/base.py | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index d2ba06902096e..d595162fd84e9 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -136,10 +136,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.drop_duplicates RT03" \
         -i "pandas.Index.droplevel RT03,SA01" \
         -i "pandas.Index.dropna RT03,SA01" \
-        -i "pandas.Index.dtype SA01" \
         -i "pandas.Index.duplicated RT03" \
         -i "pandas.Index.empty GL08" \
-        -i "pandas.Index.equals SA01" \
         -i "pandas.Index.fillna RT03" \
         -i "pandas.Index.get_indexer PR07,SA01" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
@@ -153,7 +151,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.item SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.memory_usage RT03" \
-        -i "pandas.Index.name SA01" \
         -i "pandas.Index.names GL08" \
         -i "pandas.Index.nunique RT03" \
         -i "pandas.Index.putmask PR01,RT03" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index d1d1c5ea3171f..424126132656c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -976,6 +976,10 @@ def dtype(self) -> DtypeObj:
         """
         Return the dtype object of the underlying data.
 
+        See Also
+        --------
+        Index.inferred_type: Return a string of the type inferred from the values.
+
         Examples
         --------
         >>> idx = pd.Index([1, 2, 3])
@@ -1638,6 +1642,11 @@ def name(self) -> Hashable:
         """
         Return Index or MultiIndex name.
 
+        See Also
+        --------
+        Index.set_names: Able to set new names partially and by level.
+        Index.rename: Able to set new names partially and by level.
+
         Examples
         --------
         >>> idx = pd.Index([1, 2, 3], name="x")
@@ -5181,6 +5190,12 @@ def equals(self, other: Any) -> bool:
             True if "other" is an Index and it has the same elements and order
             as the calling index; False otherwise.
 
+        See Also
+        --------
+        Index.identical: Checks that object attributes and types are also equal.
+        Index.has_duplicates: Check if the Index has duplicate values.
+        Index.is_unique: Return if the index has unique values.
+
         Examples
         --------
         >>> idx1 = pd.Index([1, 2, 3])

From 22e524799de6189e93e5d4f1907f3e6ea282a28a Mon Sep 17 00:00:00 2001
From: Nrezhang <102526155+Nrezhang@users.noreply.github.com>
Date: Mon, 22 Apr 2024 13:45:18 -0400
Subject: [PATCH 010/100] DOC: Fix SA01 errors for pandas.Index.astype (#58352)

* pandas.Index.astype

* check fixes

* series to index
---
 ci/code_checks.sh           | 1 -
 pandas/core/indexes/base.py | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index d595162fd84e9..f03ea65866031 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -129,7 +129,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index PR07" \
         -i "pandas.Index.T SA01" \
         -i "pandas.Index.append PR07,RT03,SA01" \
-        -i "pandas.Index.astype SA01" \
         -i "pandas.Index.copy PR07,SA01" \
         -i "pandas.Index.difference PR07,RT03,SA01" \
         -i "pandas.Index.drop PR07,SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 424126132656c..63facb61ed498 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1060,6 +1060,12 @@ def astype(self, dtype, copy: bool = True):
         Index
             Index with values cast to specified dtype.
 
+        See Also
+        --------
+        Index.dtype: Return the dtype object of the underlying data.
+        Index.dtypes: Return the dtype object of the underlying data.
+        Index.convert_dtypes: Convert columns to the best possible dtypes.
+
         Examples
         --------
         >>> idx = pd.Index([1, 2, 3])

From 3461db5656b2ea2b90368f521c63fbcccb48d68d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 22 Apr 2024 07:56:29 -1000
Subject: [PATCH 011/100] CLN: Use more memoryviews (#58330)

* Add memoryviews in reshape.pyx

* Use more const memoryviews
---
 pandas/_libs/lib.pyx     | 6 +++---
 pandas/_libs/reshape.pyx | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 7aa1cb715521e..24afbe3a07bf1 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -477,7 +477,7 @@ def has_infs(const floating[:] arr) -> bool:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def has_only_ints_or_nan(floating[:] arr) -> bool:
+def has_only_ints_or_nan(const floating[:] arr) -> bool:
     cdef:
         floating val
         intp_t i
@@ -631,7 +631,7 @@ ctypedef fused int6432_t:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool:
+def is_range_indexer(const int6432_t[:] left, Py_ssize_t n) -> bool:
     """
     Perform an element by element comparison on 1-d integer arrays, meant for indexer
     comparisons
@@ -652,7 +652,7 @@ def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def is_sequence_range(ndarray[int6432_t, ndim=1] sequence, int64_t step) -> bool:
+def is_sequence_range(const int6432_t[:] sequence, int64_t step) -> bool:
     """
     Check if sequence is equivalent to a range with the specified step.
     """
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index 21d1405328da6..28ea06739e0c8 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -19,7 +19,7 @@ from pandas._libs.lib cimport c_is_list_like
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask,
+def unstack(const numeric_object_t[:, :] values, const uint8_t[:] mask,
             Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
             numeric_object_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
     """
@@ -80,7 +80,7 @@ def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def explode(ndarray[object] values):
+def explode(object[:] values):
     """
     transform array list-likes to long form
     preserve non-list entries

From 454e2e1d9d7b118953ecfb4edc6f9fe7f5cb07b8 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 22 Apr 2024 08:01:24 -1000
Subject: [PATCH 012/100] CLN: Use generators when objects are re-iterated over
 in core/internals (#58319)

* Make _split generator

* More iterators

* Remove typing
---
 pandas/core/frame.py                     |  4 +--
 pandas/core/internals/blocks.py          | 23 ++++++-------
 pandas/core/internals/managers.py        | 41 +++++++++---------------
 pandas/tests/internals/test_internals.py |  2 +-
 4 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 50dc514e7181f..567fcb1ef7c05 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -12925,12 +12925,12 @@ def _to_dict_of_blocks(self):
         Return a dict of dtype -> Constructor Types that
         each is a homogeneous dtype.
 
-        Internal ONLY - only works for BlockManager
+        Internal ONLY.
         """
         mgr = self._mgr
         return {
             k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self)
-            for k, v in mgr.to_dict().items()
+            for k, v in mgr.to_iter_dict()
         }
 
     @property
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 7be1d5d95ffdf..1b72c164f7945 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -118,6 +118,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import (
+        Generator,
         Iterable,
         Sequence,
     )
@@ -385,20 +386,18 @@ def _split_op_result(self, result: ArrayLike) -> list[Block]:
         return [nb]
 
     @final
-    def _split(self) -> list[Block]:
+    def _split(self) -> Generator[Block, None, None]:
         """
         Split a block into a list of single-column blocks.
         """
         assert self.ndim == 2
 
-        new_blocks = []
         for i, ref_loc in enumerate(self._mgr_locs):
             vals = self.values[slice(i, i + 1)]
 
             bp = BlockPlacement(ref_loc)
             nb = type(self)(vals, placement=bp, ndim=2, refs=self.refs)
-            new_blocks.append(nb)
-        return new_blocks
+            yield nb
 
     @final
     def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
@@ -537,7 +536,9 @@ def convert_dtypes(
         rbs = []
         for blk in blks:
             # Determine dtype column by column
-            sub_blks = [blk] if blk.ndim == 1 or self.shape[0] == 1 else blk._split()
+            sub_blks = (
+                [blk] if blk.ndim == 1 or self.shape[0] == 1 else list(blk._split())
+            )
             dtypes = [
                 convert_dtypes(
                     b.values,
@@ -1190,8 +1191,7 @@ def putmask(self, mask, new) -> list[Block]:
                 is_array = isinstance(new, np.ndarray)
 
                 res_blocks = []
-                nbs = self._split()
-                for i, nb in enumerate(nbs):
+                for i, nb in enumerate(self._split()):
                     n = new
                     if is_array:
                         # we have a different value per-column
@@ -1255,8 +1255,7 @@ def where(self, other, cond) -> list[Block]:
                 is_array = isinstance(other, (np.ndarray, ExtensionArray))
 
                 res_blocks = []
-                nbs = self._split()
-                for i, nb in enumerate(nbs):
+                for i, nb in enumerate(self._split()):
                     oth = other
                     if is_array:
                         # we have a different value per-column
@@ -1698,8 +1697,7 @@ def where(self, other, cond) -> list[Block]:
                 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray))
 
                 res_blocks = []
-                nbs = self._split()
-                for i, nb in enumerate(nbs):
+                for i, nb in enumerate(self._split()):
                     n = orig_other
                     if is_array:
                         # we have a different value per-column
@@ -1760,8 +1758,7 @@ def putmask(self, mask, new) -> list[Block]:
                 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))
 
                 res_blocks = []
-                nbs = self._split()
-                for i, nb in enumerate(nbs):
+                for i, nb in enumerate(self._split()):
                     n = orig_new
                     if is_array:
                         # we have a different value per-column
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 8fda9cd23b508..7c1bcbec1d3f2 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -92,6 +92,8 @@
 )
 
 if TYPE_CHECKING:
+    from collections.abc import Generator
+
     from pandas._typing import (
         ArrayLike,
         AxisInt,
@@ -645,8 +647,7 @@ def get_bool_data(self) -> Self:
                 new_blocks.append(blk)
 
             elif blk.is_object:
-                nbs = blk._split()
-                new_blocks.extend(nb for nb in nbs if nb.is_bool)
+                new_blocks.extend(nb for nb in blk._split() if nb.is_bool)
 
         return self._combine(new_blocks)
 
@@ -1525,7 +1526,9 @@ def _insert_update_mgr_locs(self, loc) -> None:
         When inserting a new Block at location 'loc', we increment
         all of the mgr_locs of blocks above that by one.
         """
-        for blkno, count in _fast_count_smallints(self.blknos[loc:]):
+        # Faster version of set(arr) for sequences of small numbers
+        blknos = np.bincount(self.blknos[loc:]).nonzero()[0]
+        for blkno in blknos:
             # .620 this way, .326 of which is in increment_above
             blk = self.blocks[blkno]
             blk._mgr_locs = blk._mgr_locs.increment_above(loc)
@@ -1597,7 +1600,7 @@ def grouped_reduce(self, func: Callable) -> Self:
             nrows = 0
         else:
             nrows = result_blocks[0].values.shape[-1]
-        index = Index(range(nrows))
+        index = default_index(nrows)
 
         return type(self).from_blocks(result_blocks, [self.axes[0], index])
 
@@ -1735,21 +1738,18 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
         bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False)
         return bm
 
-    def to_dict(self) -> dict[str, Self]:
+    def to_iter_dict(self) -> Generator[tuple[str, Self], None, None]:
         """
-        Return a dict of str(dtype) -> BlockManager
+        Yield a tuple of (str(dtype), BlockManager)
 
         Returns
         -------
-        values : a dict of dtype -> BlockManager
+        values : a tuple of (str(dtype), BlockManager)
         """
-
-        bd: dict[str, list[Block]] = {}
-        for b in self.blocks:
-            bd.setdefault(str(b.dtype), []).append(b)
-
-        # TODO(EA2D): the combine will be unnecessary with 2D EAs
-        return {dtype: self._combine(blocks) for dtype, blocks in bd.items()}
+        key = lambda block: str(block.dtype)
+        for dtype, blocks in itertools.groupby(sorted(self.blocks, key=key), key=key):
+            # TODO(EA2D): the combine will be unnecessary with 2D EAs
+            yield dtype, self._combine(list(blocks))
 
     def as_array(
         self,
@@ -2330,7 +2330,7 @@ def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, DtypeObj]:
 
 
 def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list[Block]:
-    tuples = list(enumerate(arrays))
+    tuples = enumerate(arrays)
 
     if not consolidate:
         return _tuples_to_blocks_no_consolidate(tuples, refs)
@@ -2351,7 +2351,7 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list
             if issubclass(dtype.type, (str, bytes)):
                 dtype = np.dtype(object)
 
-            values, placement = _stack_arrays(list(tup_block), dtype)
+            values, placement = _stack_arrays(tup_block, dtype)
             if is_dtlike:
                 values = ensure_wrapped_if_datetimelike(values)
             blk = block_type(values, placement=BlockPlacement(placement), ndim=2)
@@ -2450,15 +2450,6 @@ def _merge_blocks(
     return blocks, False
 
 
-def _fast_count_smallints(arr: npt.NDArray[np.intp]):
-    """Faster version of set(arr) for sequences of small numbers."""
-    counts = np.bincount(arr)
-    nz = counts.nonzero()[0]
-    # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here,
-    #  in one benchmark by a factor of 11
-    return zip(nz, counts[nz])
-
-
 def _preprocess_slice_or_indexer(
     slice_or_indexer: slice | np.ndarray, length: int, allow_fill: bool
 ):
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 92addeb29252a..43bcf84f901b1 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -347,7 +347,7 @@ def test_split(self):
         # GH#37799
         values = np.random.default_rng(2).standard_normal((3, 4))
         blk = new_block(values, placement=BlockPlacement([3, 1, 6]), ndim=2)
-        result = blk._split()
+        result = list(blk._split())
 
         # check that we get views, not copies
         values[:] = -9999

From cf953dac795e49a530df33d1f1c012bd7346a555 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dominik=20Smr=C5=BE?= <dom.smrz@gmail.com>
Date: Mon, 22 Apr 2024 20:55:37 +0200
Subject: [PATCH 013/100] Allow `tan` to be used in `df.eval`. (#58334)

* Allow `tan` to be used in `df.eval`.

* Whatsnew: Link issue for fixing `tan` in `eval`.
---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 pandas/core/computation/ops.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index c817e09b3b360..7823f74b7a153 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -458,6 +458,7 @@ Other
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
+- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py
index 7d8e23abf43b6..b7a1cb173f659 100644
--- a/pandas/core/computation/ops.py
+++ b/pandas/core/computation/ops.py
@@ -45,6 +45,7 @@
 _unary_math_ops = (
     "sin",
     "cos",
+    "tan",
     "exp",
     "log",
     "expm1",

From 281d4a8d62b2397225822b3a4f0ba4c4df6cff07 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 23 Apr 2024 00:26:22 +0530
Subject: [PATCH 014/100] DOC: Enforce Numpy Docstring Validation for
 pandas.HDFStore.keys (#58371)

* DOC: add SA01 to HDFStore.keys

* DOC: remove HDFStore.keys

* DOC: fix typo in See Also for HDFStore.keys
---
 ci/code_checks.sh     | 1 -
 pandas/io/pytables.py | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index f03ea65866031..17316c80f86ba 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -122,7 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.DatetimeTZDtype.unit SA01" \
         -i "pandas.Grouper PR02" \
-        -i "pandas.HDFStore.keys SA01" \
         -i "pandas.HDFStore.put PR01,SA01" \
         -i "pandas.HDFStore.select SA01" \
         -i "pandas.HDFStore.walk SA01" \
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 89c6ac9a58382..5c04342b9eb55 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -656,6 +656,12 @@ def keys(self, include: str = "pandas") -> list[str]:
         ------
         raises ValueError if kind has an illegal value
 
+        See Also
+        --------
+        HDFStore.info : Prints detailed information on the store.
+        HDFStore.get_node : Returns the node with the key.
+        HDFStore.get_storer : Returns the storer object for a key.
+
         Examples
         --------
         >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

From 19c4769f7d793d715b008675a4f94b2e5570b025 Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Mon, 22 Apr 2024 14:56:51 -0400
Subject: [PATCH 015/100] Doc: Fixing SA01 error for DataFrame: pop and columns
 (#58359)

Doc: Fixinf SA01 error for DataFrame: pop and columns
---
 ci/code_checks.sh    | 1 -
 pandas/core/frame.py | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 17316c80f86ba..a7a4bcf165f2a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -90,7 +90,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.median RT03,SA01" \
         -i "pandas.DataFrame.min RT03" \
         -i "pandas.DataFrame.plot PR02,SA01" \
-        -i "pandas.DataFrame.pop SA01" \
         -i "pandas.DataFrame.prod RT03" \
         -i "pandas.DataFrame.product RT03" \
         -i "pandas.DataFrame.reorder_levels SA01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 567fcb1ef7c05..3bcf41893b6c8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5535,6 +5535,11 @@ def pop(self, item: Hashable) -> Series:
         Series
             Series representing the item that is dropped.
 
+        See Also
+        --------
+        DataFrame.drop: Drop specified labels from rows or columns.
+        DataFrame.drop_duplicates: Return DataFrame with duplicate rows removed.
+
         Examples
         --------
         >>> df = pd.DataFrame(
@@ -12896,6 +12901,7 @@ def isin_(x):
                 See Also
                 --------
                 DataFrame.index: The index (row labels) of the DataFrame.
+                DataFrame.axes: Return a list representing the axes of the DataFrame.
 
                 Examples
                 --------

From 963ce7a594b4346d70a2b39a6fc81af0bb463809 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 23 Apr 2024 01:52:57 +0530
Subject: [PATCH 016/100] DOC: Enforce Numpy Docstring Validation for
 pandas.HDFStore.select (#58374)

* DOC: add SA01 to HDFStore.select

* DOC: remove HDFStore.select
---
 ci/code_checks.sh     | 1 -
 pandas/io/pytables.py | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index a7a4bcf165f2a..599d4d65b9101 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -122,7 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype.unit SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.HDFStore.put PR01,SA01" \
-        -i "pandas.HDFStore.select SA01" \
         -i "pandas.HDFStore.walk SA01" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.T SA01" \
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 5c04342b9eb55..0af5c753977bd 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -859,6 +859,12 @@ def select(
         object
             Retrieved object from file.
 
+        See Also
+        --------
+        HDFStore.select_as_coordinates : Returns the selection as an index.
+        HDFStore.select_column : Returns a single column from the table.
+        HDFStore.select_as_multiple : Retrieves pandas objects from multiple tables.
+
         Examples
         --------
         >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

From 0cafd1007640b9c6f3542eddd10ffffbaee49c88 Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Mon, 22 Apr 2024 18:11:44 -0400
Subject: [PATCH 017/100] DOC: Fixing SA01 issues for DatetimeIndex: date and
 tz (#58377)

* DOC: Fixing SA01 issues for DatetimeIndex: date and tz

* fixing: XPECTED TO FAIL, BUT NOT FAILING error
---
 ci/code_checks.sh               |  4 ----
 pandas/core/arrays/datetimes.py | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 599d4d65b9101..801fe7eccd1ed 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -103,7 +103,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.to_parquet RT03" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
-        -i "pandas.DatetimeIndex.date SA01" \
         -i "pandas.DatetimeIndex.day_of_year SA01" \
         -i "pandas.DatetimeIndex.dayofyear SA01" \
         -i "pandas.DatetimeIndex.freqstr SA01" \
@@ -115,7 +114,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeIndex.std PR01,RT03" \
         -i "pandas.DatetimeIndex.to_period RT03" \
         -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \
-        -i "pandas.DatetimeIndex.tz SA01" \
         -i "pandas.DatetimeIndex.tz_convert RT03" \
         -i "pandas.DatetimeTZDtype SA01" \
         -i "pandas.DatetimeTZDtype.tz SA01" \
@@ -270,7 +268,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.as_unit PR01,PR02" \
         -i "pandas.Series.dt.ceil PR01,PR02" \
         -i "pandas.Series.dt.components SA01" \
-        -i "pandas.Series.dt.date SA01" \
         -i "pandas.Series.dt.day_name PR01,PR02" \
         -i "pandas.Series.dt.day_of_year SA01" \
         -i "pandas.Series.dt.dayofyear SA01" \
@@ -290,7 +287,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.strftime PR01,PR02" \
         -i "pandas.Series.dt.to_period PR01,PR02,RT03" \
         -i "pandas.Series.dt.total_seconds PR01" \
-        -i "pandas.Series.dt.tz SA01" \
         -i "pandas.Series.dt.tz_convert PR01,PR02,RT03" \
         -i "pandas.Series.dt.tz_localize PR01,PR02" \
         -i "pandas.Series.dt.unit GL08" \
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index fb9f047d432a1..203308b4f0dee 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -593,6 +593,13 @@ def tz(self) -> tzinfo | None:
         datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
             Returns None when the array is tz-naive.
 
+        See Also
+        --------
+        DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
+            given time zone, or remove timezone from a tz-aware DatetimeIndex.
+        DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
+            one time zone to another.
+
         Examples
         --------
         For Series:
@@ -1476,6 +1483,14 @@ def date(self) -> npt.NDArray[np.object_]:
         Namely, the date part of Timestamps without time and
         timezone information.
 
+        See Also
+        --------
+        DatetimeIndex.time : Returns numpy array of :class:`datetime.time` objects.
+            The time part of the Timestamps.
+        DatetimeIndex.year : The year of the datetime.
+        DatetimeIndex.month : The month as January=1, December=12.
+        DatetimeIndex.day : The day of the datetime.
+
         Examples
         --------
         For Series:

From bfe5be01fef4eaecf4ab033e74139b0a3cac4a39 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 22 Apr 2024 15:32:37 -1000
Subject: [PATCH 018/100] REF: Defer creating Index._engine until needed
 (#58370)

---
 pandas/core/frame.py        | 3 +--
 pandas/core/indexes/base.py | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 3bcf41893b6c8..4d89272013a52 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4012,7 +4012,6 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
             return series._values[index]
 
         series = self._get_item(col)
-        engine = self.index._engine
 
         if not isinstance(self.index, MultiIndex):
             # CategoricalIndex: Trying to use the engine fastpath may give incorrect
@@ -4023,7 +4022,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar:
 
         # For MultiIndex going through engine effectively restricts us to
         #  same-length tuples; see test_get_set_value_no_partial_indexing
-        loc = engine.get_loc(index)
+        loc = self.index._engine.get_loc(index)
         return series._values[loc]
 
     def isetitem(self, loc, value) -> None:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 63facb61ed498..d2129c54fabc4 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -832,7 +832,8 @@ def _reset_identity(self) -> None:
 
     @final
     def _cleanup(self) -> None:
-        self._engine.clear_mapping()
+        if "_engine" in self._cache:
+            self._engine.clear_mapping()
 
     @cache_readonly
     def _engine(

From ec1dff9ff3289ab2a456d293e232cffcd4abb90d Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Tue, 23 Apr 2024 12:51:09 -0400
Subject: [PATCH 019/100] Add mailing list link (#58358)

* Add mailing list link

* Update mailing list link
---
 doc/source/development/community.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/development/community.rst b/doc/source/development/community.rst
index ccf7be8e47748..ab8294b8f135a 100644
--- a/doc/source/development/community.rst
+++ b/doc/source/development/community.rst
@@ -100,6 +100,8 @@ The pandas mailing list `pandas-dev@python.org <mailto://pandas-dev@python
 conversations and to engage people in the wider community who might not
 be active on the issue tracker but we would like to include in discussions.
 
+Join the mailing list and view the archives `here <https://mail.python.org/mailman/listinfo/pandas-dev>`_.
+
 .. _community.slack:
 
 Community slack

From 903cd53911a3e1dd79b51c28db9cfbed95fb4fc1 Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Tue, 23 Apr 2024 12:58:49 -0400
Subject: [PATCH 020/100] DOC: fixinf SA01 issue for DataFrame.to_feather
 (#58378)

---
 ci/code_checks.sh    |  1 -
 pandas/core/frame.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 801fe7eccd1ed..cf21ae92496ac 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -99,7 +99,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.std PR01,RT03,SA01" \
         -i "pandas.DataFrame.sum RT03" \
         -i "pandas.DataFrame.swaplevel SA01" \
-        -i "pandas.DataFrame.to_feather SA01" \
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.to_parquet RT03" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4d89272013a52..e8a0e37b70145 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2685,6 +2685,16 @@ def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None:
             This includes the `compression`, `compression_level`, `chunksize`
             and `version` keywords.
 
+        See Also
+        --------
+        DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
+        DataFrame.to_excel : Write object to an Excel sheet.
+        DataFrame.to_sql : Write to a sql table.
+        DataFrame.to_csv : Write a csv file.
+        DataFrame.to_json : Convert the object to a JSON string.
+        DataFrame.to_html : Render a DataFrame as an HTML table.
+        DataFrame.to_string : Convert DataFrame to a string.
+
         Notes
         -----
         This function writes the dataframe as a `feather file

From ff2727147d367b5b81659931e9804733711e8f6c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 23 Apr 2024 10:04:00 -0700
Subject: [PATCH 021/100] BUG: setitem with mixed-resolution dt64s (#56419)

* BUG: setitem with mixed-resolution dt64s

* Move whatsnew to 3.0

* de-xfail

* improve exception message
---
 doc/source/whatsnew/v3.0.0.rst               |  1 +
 pandas/core/arrays/datetimes.py              |  2 +-
 pandas/core/arrays/timedeltas.py             |  2 +-
 pandas/core/indexes/datetimes.py             |  2 ++
 pandas/core/internals/blocks.py              | 17 ++++++++--
 pandas/tests/series/indexing/test_setitem.py | 33 ++++++++++++++++++++
 pandas/tests/series/methods/test_clip.py     | 28 ++++++++++++++---
 pandas/tests/series/methods/test_fillna.py   | 14 ++-------
 8 files changed, 79 insertions(+), 20 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 7823f74b7a153..4213cc8e6cfcf 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -360,6 +360,7 @@ Datetimelike
 - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
 - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
 - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
+- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 203308b4f0dee..be087e19ce7b6 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -539,7 +539,7 @@ def _unbox_scalar(self, value) -> np.datetime64:
         if value is NaT:
             return np.datetime64(value._value, self.unit)
         else:
-            return value.as_unit(self.unit).asm8
+            return value.as_unit(self.unit, round_ok=False).asm8
 
     def _scalar_from_string(self, value) -> Timestamp | NaTType:
         return Timestamp(value, tz=self.tz)
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 6eb4d234b349d..ff43f97161136 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -322,7 +322,7 @@ def _unbox_scalar(self, value) -> np.timedelta64:
         if value is NaT:
             return np.timedelta64(value._value, self.unit)
         else:
-            return value.as_unit(self.unit).asm8
+            return value.as_unit(self.unit, round_ok=False).asm8
 
     def _scalar_from_string(self, value) -> Timedelta | NaTType:
         return Timedelta(value)
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 7122de745e13b..6d5f32774f485 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -515,6 +515,8 @@ def _parsed_string_to_bounds(
         freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)
         per = Period(parsed, freq=freq)
         start, end = per.start_time, per.end_time
+        start = start.as_unit(self.unit)
+        end = end.as_unit(self.unit)
 
         # GH 24076
         # If an incoming date string contained a UTC offset, need to localize
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 1b72c164f7945..28d3292a1c65b 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -38,7 +38,10 @@
     Shape,
     npt,
 )
-from pandas.errors import AbstractMethodError
+from pandas.errors import (
+    AbstractMethodError,
+    OutOfBoundsDatetime,
+)
 from pandas.util._decorators import cache_readonly
 from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import validate_bool_kwarg
@@ -478,7 +481,17 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
                 f"{self.values.dtype}. Please report a bug at "
                 "https://github.com/pandas-dev/pandas/issues."
             )
-        return self.astype(new_dtype)
+        try:
+            return self.astype(new_dtype)
+        except OutOfBoundsDatetime as err:
+            # e.g. GH#56419 if self.dtype is a low-resolution dt64 and we try to
+            #  upcast to a higher-resolution dt64, we may have entries that are
+            #  out of bounds for the higher resolution.
+            #  Re-raise with a more informative message.
+            raise OutOfBoundsDatetime(
+                f"Incompatible (high-resolution) value for dtype='{self.dtype}'. "
+                "Explicitly cast before operating."
+            ) from err
 
     @final
     def convert(self) -> list[Block]:
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index 99535f273075c..7a2a4892f61fb 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -1467,6 +1467,39 @@ def test_slice_key(self, obj, key, expected, warn, val, indexer_sli, is_inplace)
             raise AssertionError("xfail not relevant for this test.")
 
 
+@pytest.mark.parametrize(
+    "exp_dtype",
+    [
+        "M8[ms]",
+        "M8[ms, UTC]",
+        "m8[ms]",
+    ],
+)
+class TestCoercionDatetime64HigherReso(CoercionTest):
+    @pytest.fixture
+    def obj(self, exp_dtype):
+        idx = date_range("2011-01-01", freq="D", periods=4, unit="s")
+        if exp_dtype == "m8[ms]":
+            idx = idx - Timestamp("1970-01-01")
+            assert idx.dtype == "m8[s]"
+        elif exp_dtype == "M8[ms, UTC]":
+            idx = idx.tz_localize("UTC")
+        return Series(idx)
+
+    @pytest.fixture
+    def val(self, exp_dtype):
+        ts = Timestamp("2011-01-02 03:04:05.678").as_unit("ms")
+        if exp_dtype == "m8[ms]":
+            return ts - Timestamp("1970-01-01")
+        elif exp_dtype == "M8[ms, UTC]":
+            return ts.tz_localize("UTC")
+        return ts
+
+    @pytest.fixture
+    def warn(self):
+        return FutureWarning
+
+
 @pytest.mark.parametrize(
     "val,exp_dtype,warn",
     [
diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py
index 75b4050c18afe..8ed422fc118dc 100644
--- a/pandas/tests/series/methods/test_clip.py
+++ b/pandas/tests/series/methods/test_clip.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import OutOfBoundsDatetime
+
 import pandas as pd
 from pandas import (
     Series,
@@ -131,12 +133,30 @@ def test_clip_with_datetimes(self):
         )
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.parametrize("dtype", [object, "M8[us]"])
-    def test_clip_with_timestamps_and_oob_datetimes(self, dtype):
+    def test_clip_with_timestamps_and_oob_datetimes_object(self):
         # GH-42794
-        ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype)
+        ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=object)
 
         result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
-        expected = Series([Timestamp.min, Timestamp.max], dtype=dtype)
+        expected = Series([Timestamp.min, Timestamp.max], dtype=object)
+
+        tm.assert_series_equal(result, expected)
+
+    def test_clip_with_timestamps_and_oob_datetimes_non_nano(self):
+        # GH#56410
+        dtype = "M8[us]"
+        ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype)
+
+        msg = (
+            r"Incompatible \(high-resolution\) value for dtype='datetime64\[us\]'. "
+            "Explicitly cast before operating"
+        )
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            ser.clip(lower=Timestamp.min, upper=Timestamp.max)
+
+        lower = Timestamp.min.as_unit("us")
+        upper = Timestamp.max.as_unit("us")
+        result = ser.clip(lower=lower, upper=upper)
+        expected = Series([lower, upper], dtype=dtype)
 
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py
index 0965d36e4827d..592dba253532d 100644
--- a/pandas/tests/series/methods/test_fillna.py
+++ b/pandas/tests/series/methods/test_fillna.py
@@ -308,12 +308,7 @@ def test_datetime64_fillna(self):
         "scalar",
         [
             False,
-            pytest.param(
-                True,
-                marks=pytest.mark.xfail(
-                    reason="GH#56410 scalar case not yet addressed"
-                ),
-            ),
+            True,
         ],
     )
     @pytest.mark.parametrize("tz", [None, "UTC"])
@@ -342,12 +337,7 @@ def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar):
         "scalar",
         [
             False,
-            pytest.param(
-                True,
-                marks=pytest.mark.xfail(
-                    reason="GH#56410 scalar case not yet addressed"
-                ),
-            ),
+            True,
         ],
     )
     def test_timedelta64_fillna_mismatched_reso_no_rounding(self, scalar):

From 191a56c32578be7ae7d231108abbe4ce1c4378e9 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 23 Apr 2024 22:50:15 +0530
Subject: [PATCH 022/100] DOC: Enforce Numpy Docstring Validation for
 pandas.HDFStore.put (#58384)

* DOC: add SA01 and PR01 to HDFStore.put

* DOC: remove SA01 and PR01 of HDFStore.put
---
 ci/code_checks.sh     |  1 -
 pandas/io/pytables.py | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index cf21ae92496ac..5993fabfc9d6c 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -118,7 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.DatetimeTZDtype.unit SA01" \
         -i "pandas.Grouper PR02" \
-        -i "pandas.HDFStore.put PR01,SA01" \
         -i "pandas.HDFStore.walk SA01" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.T SA01" \
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 0af5c753977bd..75e9b779e5094 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -1144,12 +1144,27 @@ def put(
             Write DataFrame index as a column.
         append : bool, default False
             This will force Table format, append the input data to the existing.
+        complib : default None
+            This parameter is currently not accepted.
+        complevel : int, 0-9, default None
+            Specifies a compression level for data.
+            A value of 0 or None disables compression.
+        min_itemsize : int, dict, or None
+            Dict of columns that specify minimum str sizes.
+        nan_rep : str
+            Str to use as str nan representation.
         data_columns : list of columns or True, default None
             List of columns to create as data columns, or True to use all columns.
             See `here
             <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
         encoding : str, default None
             Provide an encoding for strings.
+        errors : str, default 'strict'
+            The error handling scheme to use for encoding errors.
+            The default is 'strict' meaning that encoding errors raise a
+            UnicodeEncodeError.  Other possible values are 'ignore', 'replace' and
+            'xmlcharrefreplace' as well as any other name registered with
+            codecs.register_error that can handle UnicodeEncodeErrors.
         track_times : bool, default True
             Parameter is propagated to 'create_table' method of 'PyTables'.
             If set to False it enables to have the same h5 files (same hashes)
@@ -1157,6 +1172,11 @@ def put(
         dropna : bool, default False, optional
             Remove missing values.
 
+        See Also
+        --------
+        HDFStore.info : Prints detailed information on the store.
+        HDFStore.get_storer : Returns the storer object for a key.
+
         Examples
         --------
         >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

From bd9c09b4331f890fc9fb4698deaf2d168060941b Mon Sep 17 00:00:00 2001
From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>
Date: Tue, 23 Apr 2024 20:21:58 +0300
Subject: [PATCH 023/100] DEPR: to_pytimedelta return Index[object] (#58383)

* DEPR: to_pytimedelta return Index[object]

* ignore doctest warning

---------

Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/conftest.py                            |  1 +
 pandas/core/indexes/accessors.py              | 20 +++++++++++++++++++
 pandas/tests/extension/test_arrow.py          |  8 ++++++--
 .../series/accessors/test_cat_accessor.py     |  3 +++
 .../series/accessors/test_dt_accessor.py      |  4 +++-
 6 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 4213cc8e6cfcf..02e4aba667408 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -199,6 +199,7 @@ Other Deprecations
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`,  :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
 - Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
 - Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
+- Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`)
 - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
 -
 
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 34489bb70575a..21100178262c8 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -157,6 +157,7 @@ def pytest_collection_modifyitems(items, config) -> None:
         ("SeriesGroupBy.fillna", "SeriesGroupBy.fillna is deprecated"),
         ("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
         ("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
+        ("to_pytimedelta", "The behavior of TimedeltaProperties.to_pytimedelta"),
         # Docstring divides by zero to show behavior difference
         ("missing.mask_zero_div_zero", "divide by zero encountered"),
         (
diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
index 2bb234e174563..3dcd1fedc8d64 100644
--- a/pandas/core/indexes/accessors.py
+++ b/pandas/core/indexes/accessors.py
@@ -9,10 +9,12 @@
     NoReturn,
     cast,
 )
+import warnings
 
 import numpy as np
 
 from pandas._libs import lib
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     is_integer_dtype,
@@ -210,6 +212,15 @@ def _delegate_method(self, name: str, *args, **kwargs):
         return result
 
     def to_pytimedelta(self):
+        # GH 57463
+        warnings.warn(
+            f"The behavior of {type(self).__name__}.to_pytimedelta is deprecated, "
+            "in a future version this will return a Series containing python "
+            "datetime.timedelta objects instead of an ndarray. To retain the "
+            "old behavior, call `np.array` on the result",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
         return cast(ArrowExtensionArray, self._parent.array)._dt_to_pytimedelta()
 
     def to_pydatetime(self) -> Series:
@@ -462,6 +473,15 @@ def to_pytimedelta(self) -> np.ndarray:
         datetime.timedelta(days=2), datetime.timedelta(days=3),
         datetime.timedelta(days=4)], dtype=object)
         """
+        # GH 57463
+        warnings.warn(
+            f"The behavior of {type(self).__name__}.to_pytimedelta is deprecated, "
+            "in a future version this will return a Series containing python "
+            "datetime.timedelta objects instead of an ndarray. To retain the "
+            "old behavior, call `np.array` on the result",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
         return self._get_values().to_pytimedelta()
 
     @property
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 9b2251d0b7d4a..79440b55dd5dd 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -2861,12 +2861,16 @@ def test_dt_to_pytimedelta():
     data = [timedelta(1, 2, 3), timedelta(1, 2, 4)]
     ser = pd.Series(data, dtype=ArrowDtype(pa.duration("ns")))
 
-    result = ser.dt.to_pytimedelta()
+    msg = "The behavior of ArrowTemporalProperties.to_pytimedelta is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = ser.dt.to_pytimedelta()
     expected = np.array(data, dtype=object)
     tm.assert_numpy_array_equal(result, expected)
     assert all(type(res) is timedelta for res in result)
 
-    expected = ser.astype("timedelta64[ns]").dt.to_pytimedelta()
+    msg = "The behavior of TimedeltaProperties.to_pytimedelta is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = ser.astype("timedelta64[ns]").dt.to_pytimedelta()
     tm.assert_numpy_array_equal(result, expected)
 
 
diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py
index ca2768efd5c68..ce8ea27ea1fa2 100644
--- a/pandas/tests/series/accessors/test_cat_accessor.py
+++ b/pandas/tests/series/accessors/test_cat_accessor.py
@@ -200,6 +200,9 @@ def test_dt_accessor_api_for_categorical(self, idx):
             if func == "to_period" and getattr(idx, "tz", None) is not None:
                 # dropping TZ
                 warn_cls.append(UserWarning)
+            elif func == "to_pytimedelta":
+                # GH 57463
+                warn_cls.append(FutureWarning)
             if warn_cls:
                 warn_cls = tuple(warn_cls)
             else:
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index 5f0057ac50b47..8c60f7beb317d 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -192,7 +192,9 @@ def test_dt_namespace_accessor_timedelta(self):
             assert isinstance(result, DataFrame)
             tm.assert_index_equal(result.index, ser.index)
 
-            result = ser.dt.to_pytimedelta()
+            msg = "The behavior of TimedeltaProperties.to_pytimedelta is deprecated"
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                result = ser.dt.to_pytimedelta()
             assert isinstance(result, np.ndarray)
             assert result.dtype == object
 

From 9b7d09d69e252e6afff4d991728713a541e03045 Mon Sep 17 00:00:00 2001
From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>
Date: Tue, 23 Apr 2024 20:23:16 +0300
Subject: [PATCH 024/100] TST: No longer produce test_stata.dta file after
 running test suite (#58381)

Use tmp_path fixture

Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com>
---
 pandas/tests/io/test_stata.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 43c62237c6786..2650f351e2203 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -1962,7 +1962,7 @@ def test_writer_118_exceptions(self, temp_file):
         "dtype_backend",
         ["numpy_nullable", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))],
     )
-    def test_read_write_ea_dtypes(self, dtype_backend, temp_file):
+    def test_read_write_ea_dtypes(self, dtype_backend, temp_file, tmp_path):
         df = DataFrame(
             {
                 "a": [1, 2, None],
@@ -1974,7 +1974,8 @@ def test_read_write_ea_dtypes(self, dtype_backend, temp_file):
             index=pd.Index([0, 1, 2], name="index"),
         )
         df = df.convert_dtypes(dtype_backend=dtype_backend)
-        df.to_stata("test_stata.dta", version=118)
+        stata_path = tmp_path / "test_stata.dta"
+        df.to_stata(stata_path, version=118)
 
         df.to_stata(temp_file)
         written_and_read_again = self.read_dta(temp_file)

From 23dd1f12aea8bfd503ea86ce1850de817cf0fe43 Mon Sep 17 00:00:00 2001
From: Nrezhang <102526155+Nrezhang@users.noreply.github.com>
Date: Tue, 23 Apr 2024 13:25:32 -0400
Subject: [PATCH 025/100] #58324 (#58379)

---
 doc/source/user_guide/style.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
index f831723f44931..43da43a983429 100644
--- a/doc/source/user_guide/style.ipynb
+++ b/doc/source/user_guide/style.ipynb
@@ -1908,7 +1908,7 @@
     "- Provide an API that is pleasing to use interactively and is \"good enough\" for many tasks\n",
     "- Provide the foundations for dedicated libraries to build on\n",
     "\n",
-    "If you build a great library on top of this, let us know and we'll [link](https://pandas.pydata.org/pandas-docs/stable/ecosystem.html) to it.\n",
+    "If you build a great library on top of this, let us know and we'll [link](https://pandas.pydata.org/community/ecosystem.html) to it.\n",
     "\n",
     "### Subclassing\n",
     "\n",

From ffca68426fe32c61428aaec02e2283063148ed47 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 24 Apr 2024 00:04:15 +0530
Subject: [PATCH 026/100] DOC: Enforce Numpy Docstring Validation for
 pandas.HDFStore.walk (#58386)

* DOC: add SA01 to HDFStore.walk

* DOC: remove SA01 of HDFStore.walk
---
 ci/code_checks.sh     | 1 -
 pandas/io/pytables.py | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 5993fabfc9d6c..24dacd6b48a42 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -118,7 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.DatetimeTZDtype.unit SA01" \
         -i "pandas.Grouper PR02" \
-        -i "pandas.HDFStore.walk SA01" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.T SA01" \
         -i "pandas.Index.append PR07,RT03,SA01" \
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 75e9b779e5094..d585c59dd5581 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -1595,6 +1595,10 @@ def walk(self, where: str = "/") -> Iterator[tuple[str, list[str], list[str]]]:
         leaves : list
             Names (strings) of the pandas objects contained in `path`.
 
+        See Also
+        --------
+        HDFStore.info : Prints detailed information on the store.
+
         Examples
         --------
         >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

From 9d5c88e52ac1a652e8392003a8aa4cdb52bc29f6 Mon Sep 17 00:00:00 2001
From: bdwzhangumich <112042021+bdwzhangumich@users.noreply.github.com>
Date: Tue, 23 Apr 2024 14:07:42 -0600
Subject: [PATCH 027/100] ENH: Implement cummax and cummin in _accumulate() for
 ordered Categorical arrays (#58360)

* Added tests with and without np.nan

* Added tests for cummin and cummax

* Fixed series tests expected series, rewrote categorical arrays to use pd.Categorical

* Fixed cat not defined error and misspelling

* Implement _accumulate for Categorical

* fixed misspellings in tests

* fixed expected categories on tests

* Updated whatsnew

* Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Removed testing for _accumulate.

* Moved categorical_accumulations.py logic to categorical.py

* Assigned expected results to expected variable; Added pytest.mark.parametrize to test_cummax_cummin_ordered_categorical_nan with skipna and expected data

---------

Co-authored-by: Christopher Xiang <xiangc@umich.edu>
Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Co-authored-by: Chris Xiang <124408670+xiangchris@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst         |  1 +
 pandas/core/arrays/categorical.py      | 23 ++++++++++++
 pandas/tests/series/test_cumulative.py | 52 ++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 02e4aba667408..9a432e03e9cf4 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -41,6 +41,7 @@ Other enhancements
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
 - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
+- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 8d6880fc2acb3..6a3cf4590568c 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -6,6 +6,7 @@
 from shutil import get_terminal_size
 from typing import (
     TYPE_CHECKING,
+    Callable,
     Literal,
     cast,
     overload,
@@ -2508,6 +2509,28 @@ def equals(self, other: object) -> bool:
             return np.array_equal(self._codes, other._codes)
         return False
 
+    def _accumulate(self, name: str, skipna: bool = True, **kwargs) -> Self:
+        func: Callable
+        if name == "cummin":
+            func = np.minimum.accumulate
+        elif name == "cummax":
+            func = np.maximum.accumulate
+        else:
+            raise TypeError(f"Accumulation {name} not supported for {type(self)}")
+        self.check_for_ordered(name)
+
+        codes = self.codes.copy()
+        mask = self.isna()
+        if func == np.minimum.accumulate:
+            codes[mask] = np.iinfo(codes.dtype.type).max
+        # no need to change codes for maximum because codes[mask] is already -1
+        if not skipna:
+            mask = np.maximum.accumulate(mask)
+
+        codes = func(codes)
+        codes[mask] = -1
+        return self._simple_new(codes, dtype=self._dtype)
+
     @classmethod
     def _concat_same_type(cls, to_concat: Sequence[Self], axis: AxisInt = 0) -> Self:
         from pandas.core.dtypes.concat import union_categoricals
diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py
index 9b7b08127a550..a9d5486139b46 100644
--- a/pandas/tests/series/test_cumulative.py
+++ b/pandas/tests/series/test_cumulative.py
@@ -170,6 +170,58 @@ def test_cummethods_bool_in_object_dtype(self, method, expected):
         result = getattr(ser, method)()
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "method, order",
+        [
+            ["cummax", "abc"],
+            ["cummin", "cba"],
+        ],
+    )
+    def test_cummax_cummin_on_ordered_categorical(self, method, order):
+        # GH#52335
+        cat = pd.CategoricalDtype(list(order), ordered=True)
+        ser = pd.Series(
+            list("ababcab"),
+            dtype=cat,
+        )
+        result = getattr(ser, method)()
+        expected = pd.Series(
+            list("abbbccc"),
+            dtype=cat,
+        )
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "skip, exp",
+        [
+            [True, ["a", np.nan, "b", "b", "c"]],
+            [False, ["a", np.nan, np.nan, np.nan, np.nan]],
+        ],
+    )
+    @pytest.mark.parametrize(
+        "method, order",
+        [
+            ["cummax", "abc"],
+            ["cummin", "cba"],
+        ],
+    )
+    def test_cummax_cummin_ordered_categorical_nan(self, skip, exp, method, order):
+        # GH#52335
+        cat = pd.CategoricalDtype(list(order), ordered=True)
+        ser = pd.Series(
+            ["a", np.nan, "b", "a", "c"],
+            dtype=cat,
+        )
+        result = getattr(ser, method)(skipna=skip)
+        expected = pd.Series(
+            exp,
+            dtype=cat,
+        )
+        tm.assert_series_equal(
+            result,
+            expected,
+        )
+
     def test_cumprod_timedelta(self):
         # GH#48111
         ser = pd.Series([pd.Timedelta(days=1), pd.Timedelta(days=3)])

From 8aa4f0eb5a7a456f9476ff4b1bd6743ca25c949b Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 24 Apr 2024 01:38:30 +0530
Subject: [PATCH 028/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeTZDtype.unit (#58387)

* DOC: add SA01 to DatetimeTZDtype.tz

* DOC: remove SA01 of DatetimeTZDtype.unit
---
 ci/code_checks.sh            | 1 -
 pandas/core/dtypes/dtypes.py | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 24dacd6b48a42..066c7176fcc34 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -116,7 +116,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeIndex.tz_convert RT03" \
         -i "pandas.DatetimeTZDtype SA01" \
         -i "pandas.DatetimeTZDtype.tz SA01" \
-        -i "pandas.DatetimeTZDtype.unit SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.T SA01" \
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 98e689528744e..0a97a0d03c22a 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -793,6 +793,10 @@ def unit(self) -> str_type:
         """
         The precision of the datetime data.
 
+        See Also
+        --------
+        DatetimeTZDtype.tz : Retrieves the timezone.
+
         Examples
         --------
         >>> from zoneinfo import ZoneInfo

From e9b0a3c914088ce1f89cde16c61f61807ccc6730 Mon Sep 17 00:00:00 2001
From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>
Date: Wed, 24 Apr 2024 02:30:51 +0300
Subject: [PATCH 029/100] CLN: Enforce empty bool indexer deprecation (#58390)

* CLN: Enforce empty bool indexer deprecation

* Add whatsnew entry
---
 doc/source/whatsnew/v3.0.0.rst    | 1 +
 pandas/core/indexes/base.py       | 9 +++------
 pandas/tests/indexes/test_base.py | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 9a432e03e9cf4..781b3b2282a87 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -222,6 +222,7 @@ Removal of prior version deprecations/changes
 - Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`)
 - Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
 - Disallow constructing a :class:`arrays.SparseArray` with scalar data (:issue:`53039`)
+- Disallow indexing an :class:`Index` with a boolean indexer of length zero, it now raises ``ValueError`` (:issue:`55820`)
 - Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
 - Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`)
 - Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index d2129c54fabc4..5654111132b5e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5033,12 +5033,9 @@ def __getitem__(self, key):
 
             if not isinstance(self.dtype, ExtensionDtype):
                 if len(key) == 0 and len(key) != len(self):
-                    warnings.warn(
-                        "Using a boolean indexer with length 0 on an Index with "
-                        "length greater than 0 is deprecated and will raise in a "
-                        "future version.",
-                        FutureWarning,
-                        stacklevel=find_stack_level(),
+                    raise ValueError(
+                        "The length of the boolean indexer cannot be 0 "
+                        "when the Index has length greater than 0."
                     )
 
         result = getitem(key)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 3a2d04d3ffdc2..301c4794be4ef 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -481,7 +481,7 @@ def test_empty_fancy(self, index, dtype, request, using_infer_string):
 
         assert index[[]].identical(empty_index)
         if dtype == np.bool_:
-            with tm.assert_produces_warning(FutureWarning, match="is deprecated"):
+            with pytest.raises(ValueError, match="length of the boolean indexer"):
                 assert index[empty_arr].identical(empty_index)
         else:
             assert index[empty_arr].identical(empty_index)

From b6c15ea2cb8b50035be5b111cd656d6983d00788 Mon Sep 17 00:00:00 2001
From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>
Date: Wed, 24 Apr 2024 19:18:02 +0300
Subject: [PATCH 030/100] BUG: Let check_exact_index default to True for
 integers (#58189)

* Default check_exact_index to True for integers

* Fix pyright issue

* fix logic for multiindex

* Pre-commit stuff

* Address review comments

---------

Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/_testing/asserters.py                  | 22 +++++++++-
 pandas/tests/util/test_assert_series_equal.py | 41 +++++++++++++++++--
 3 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 781b3b2282a87..027c692c6c89e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -159,6 +159,7 @@ Other API changes
 - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)
 - pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
 - pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)
+- when comparing the indexes in :func:`testing.assert_series_equal`, check_exact defaults to True if an :class:`Index` is of integer dtypes. (:issue:`57386`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.deprecations:
diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
index 3aacd3099c334..543d7944e4c5d 100644
--- a/pandas/_testing/asserters.py
+++ b/pandas/_testing/asserters.py
@@ -861,12 +861,19 @@ def assert_series_equal(
     check_names : bool, default True
         Whether to check the Series and Index names attribute.
     check_exact : bool, default False
-        Whether to compare number exactly.
+        Whether to compare number exactly. This also applies when checking
+        Index equivalence.
 
         .. versionchanged:: 2.2.0
 
             Defaults to True for integer dtypes if none of
             ``check_exact``, ``rtol`` and ``atol`` are specified.
+
+        .. versionchanged:: 3.0.0
+
+            check_exact for comparing the Indexes defaults to True by
+            checking if an Index is of integer dtypes.
+
     check_datetimelike_compat : bool, default False
         Compare datetime-like which is comparable ignoring dtype.
     check_categorical : bool, default True
@@ -902,7 +909,6 @@ def assert_series_equal(
     >>> tm.assert_series_equal(a, b)
     """
     __tracebackhide__ = True
-    check_exact_index = False if check_exact is lib.no_default else check_exact
     if (
         check_exact is lib.no_default
         and rtol is lib.no_default
@@ -914,8 +920,20 @@ def assert_series_equal(
             or is_numeric_dtype(right.dtype)
             and not is_float_dtype(right.dtype)
         )
+        left_index_dtypes = (
+            [left.index.dtype] if left.index.nlevels == 1 else left.index.dtypes
+        )
+        right_index_dtypes = (
+            [right.index.dtype] if right.index.nlevels == 1 else right.index.dtypes
+        )
+        check_exact_index = all(
+            dtype.kind in "iu" for dtype in left_index_dtypes
+        ) or all(dtype.kind in "iu" for dtype in right_index_dtypes)
     elif check_exact is lib.no_default:
         check_exact = False
+        check_exact_index = False
+    else:
+        check_exact_index = check_exact
 
     rtol = rtol if rtol is not lib.no_default else 1.0e-5
     atol = atol if atol is not lib.no_default else 1.0e-8
diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
index 0b3bc07c17452..f75f48157aad2 100644
--- a/pandas/tests/util/test_assert_series_equal.py
+++ b/pandas/tests/util/test_assert_series_equal.py
@@ -475,9 +475,44 @@ def test_assert_series_equal_int_tol():
     )
 
 
-def test_assert_series_equal_index_exact_default():
+@pytest.mark.parametrize(
+    "left_idx, right_idx",
+    [
+        (
+            pd.Index([0, 0.2, 0.4, 0.6, 0.8, 1]),
+            pd.Index(np.linspace(0, 1, 6)),
+        ),
+        (
+            pd.MultiIndex.from_arrays([[0, 0, 0, 0, 1, 1], [0, 0.2, 0.4, 0.6, 0.8, 1]]),
+            pd.MultiIndex.from_arrays([[0, 0, 0, 0, 1, 1], np.linspace(0, 1, 6)]),
+        ),
+        (
+            pd.MultiIndex.from_arrays(
+                [["a", "a", "a", "b", "b", "b"], [1, 2, 3, 4, 5, 10000000000001]]
+            ),
+            pd.MultiIndex.from_arrays(
+                [["a", "a", "a", "b", "b", "b"], [1, 2, 3, 4, 5, 10000000000002]]
+            ),
+        ),
+        pytest.param(
+            pd.Index([1, 2, 3, 4, 5, 10000000000001]),
+            pd.Index([1, 2, 3, 4, 5, 10000000000002]),
+            marks=pytest.mark.xfail(reason="check_exact_index defaults to True"),
+        ),
+        pytest.param(
+            pd.MultiIndex.from_arrays(
+                [[0, 0, 0, 0, 1, 1], [1, 2, 3, 4, 5, 10000000000001]]
+            ),
+            pd.MultiIndex.from_arrays(
+                [[0, 0, 0, 0, 1, 1], [1, 2, 3, 4, 5, 10000000000002]]
+            ),
+            marks=pytest.mark.xfail(reason="check_exact_index defaults to True"),
+        ),
+    ],
+)
+def test_assert_series_equal_check_exact_index_default(left_idx, right_idx):
     # GH#57067
-    ser1 = Series(np.zeros(6, dtype=int), [0, 0.2, 0.4, 0.6, 0.8, 1])
-    ser2 = Series(np.zeros(6, dtype=int), np.linspace(0, 1, 6))
+    ser1 = Series(np.zeros(6, dtype=int), left_idx)
+    ser2 = Series(np.zeros(6, dtype=int), right_idx)
     tm.assert_series_equal(ser1, ser2)
     tm.assert_frame_equal(ser1.to_frame(), ser2.to_frame())

From c342e9f0be5bae1895f60e6afc9435d0afb087ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dominik=20Smr=C5=BE?= <dom.smrz@gmail.com>
Date: Wed, 24 Apr 2024 18:19:00 +0200
Subject: [PATCH 031/100] Extend eval test of standard functions to cover
 python engine. (#58393)

Extend eval test of ops to cover pandas engine.
---
 pandas/tests/computation/test_eval.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 8f14c562fa7c3..f7d1fcfa3e469 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1609,22 +1609,20 @@ def eval(self, *args, **kwargs):
         kwargs["level"] = kwargs.pop("level", 0) + 1
         return pd.eval(*args, **kwargs)
 
-    @pytest.mark.skipif(
-        not NUMEXPR_INSTALLED, reason="Unary ops only implemented for numexpr"
-    )
+    @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     @pytest.mark.parametrize("fn", _unary_math_ops)
-    def test_unary_functions(self, fn):
+    def test_unary_functions(self, fn, engine, parser):
         df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
         a = df.a
 
         expr = f"{fn}(a)"
-        got = self.eval(expr)
+        got = self.eval(expr, engine=engine, parser=parser)
         with np.errstate(all="ignore"):
             expect = getattr(np, fn)(a)
         tm.assert_series_equal(got, expect, check_names=False)
 
     @pytest.mark.parametrize("fn", _binary_math_ops)
-    def test_binary_functions(self, fn):
+    def test_binary_functions(self, fn, engine, parser):
         df = DataFrame(
             {
                 "a": np.random.default_rng(2).standard_normal(10),
@@ -1635,7 +1633,7 @@ def test_binary_functions(self, fn):
         b = df.b
 
         expr = f"{fn}(a, b)"
-        got = self.eval(expr)
+        got = self.eval(expr, engine=engine, parser=parser)
         with np.errstate(all="ignore"):
             expect = getattr(np, fn)(a, b)
         tm.assert_almost_equal(got, expect, check_names=False)

From ea2f857be39fe2b6c360178a5d63b8ea7173a5ed Mon Sep 17 00:00:00 2001
From: Gianluca Ficarelli <26835404+GianlucaFicarelli@users.noreply.github.com>
Date: Wed, 24 Apr 2024 18:19:40 +0200
Subject: [PATCH 032/100] PERF: MultiIndex.memory_usage shouldn't trigger the
 index engine (#58385)

* PERF: MultiIndex.memory_usage shouldn't trigger the index engine

Ignore the index engine when it isn't already cached.

* Move test, sort whatsnew
---
 doc/source/whatsnew/v3.0.0.rst        |  1 +
 pandas/core/indexes/base.py           |  5 +++--
 pandas/core/indexes/multi.py          |  5 +++--
 pandas/tests/indexes/test_old_base.py | 24 ++++++++++++++++++++++++
 4 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 027c692c6c89e..ca97e2b6ffb6b 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -333,6 +333,7 @@ Performance improvements
 - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
 - Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`)
 - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
+- Performance improvement in :meth:`MultiIndex.memory_usage` to ignore the index engine when it isn't already cached. (:issue:`58385`)
 - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
 - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
 - Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 5654111132b5e..e08b585920779 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4863,8 +4863,9 @@ def _from_join_target(self, result: np.ndarray) -> ArrayLike:
     def memory_usage(self, deep: bool = False) -> int:
         result = self._memory_usage(deep=deep)
 
-        # include our engine hashtable
-        result += self._engine.sizeof(deep=deep)
+        # include our engine hashtable, only if it's already cached
+        if "_engine" in self._cache:
+            result += self._engine.sizeof(deep=deep)
         return result
 
     @final
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 21ce9b759f2df..c8e16fad00d5b 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1391,8 +1391,9 @@ def _nbytes(self, deep: bool = False) -> int:
         names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
         result = level_nbytes + label_nbytes + names_nbytes
 
-        # include our engine hashtable
-        result += self._engine.sizeof(deep=deep)
+        # include our engine hashtable, only if it's already cached
+        if "_engine" in self._cache:
+            result += self._engine.sizeof(deep=deep)
         return result
 
     # --------------------------------------------------------------------
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 9b4470021cc1d..b929616c814ee 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -326,6 +326,30 @@ def test_memory_usage(self, index):
         if index.inferred_type == "object":
             assert result3 > result2
 
+    def test_memory_usage_doesnt_trigger_engine(self, index):
+        index._cache.clear()
+        assert "_engine" not in index._cache
+
+        res_without_engine = index.memory_usage()
+        assert "_engine" not in index._cache
+
+        # explicitly load and cache the engine
+        _ = index._engine
+        assert "_engine" in index._cache
+
+        res_with_engine = index.memory_usage()
+
+        # the empty engine doesn't affect the result even when initialized with values,
+        # because engine.sizeof() doesn't consider the content of engine.values
+        assert res_with_engine == res_without_engine
+
+        if len(index) == 0:
+            assert res_without_engine == 0
+            assert res_with_engine == 0
+        else:
+            assert res_without_engine > 0
+            assert res_with_engine > 0
+
     def test_argsort(self, index):
         if isinstance(index, CategoricalIndex):
             pytest.skip(f"{type(self).__name__} separately tested")

From 9e7565ac0e1886f7ae27981ef67561563326ddd6 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 24 Apr 2024 21:50:25 +0530
Subject: [PATCH 033/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.unique (#58399)

* DOC: add RT03 to pandas.Index.unique

* DOC: remove pandas.Index.unique
---
 ci/code_checks.sh           | 1 -
 pandas/core/indexes/base.py | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 066c7176fcc34..101d650a0e768 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -153,7 +153,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.take PR01,PR07" \
         -i "pandas.Index.to_list RT03" \
         -i "pandas.Index.union PR07,RT03,SA01" \
-        -i "pandas.Index.unique RT03" \
         -i "pandas.Index.view GL08" \
         -i "pandas.Int16Dtype SA01" \
         -i "pandas.Int32Dtype SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e08b585920779..2bb0aedb8bd84 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2647,6 +2647,7 @@ def unique(self, level: Hashable | None = None) -> Self:
         Returns
         -------
         Index
+            Unique values in the index.
 
         See Also
         --------

From ba60432eda7f7ea0479eb63aae43ac680a2b8678 Mon Sep 17 00:00:00 2001
From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>
Date: Wed, 24 Apr 2024 19:25:35 +0300
Subject: [PATCH 034/100] TST: Added match argument for most uses of
 tm.assert_produces_warning (#58396)

* Fix for all FutureWarnings

* Add match for most warnings

* Cleaner code

---------

Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com>
---
 .../development/contributing_codebase.rst     |  5 +--
 .../tests/arrays/sparse/test_constructors.py  |  6 ++--
 pandas/tests/arrays/test_datetimelike.py      |  2 +-
 pandas/tests/computation/test_eval.py         |  5 +--
 pandas/tests/dtypes/test_common.py            |  2 +-
 pandas/tests/dtypes/test_generic.py           |  2 +-
 pandas/tests/frame/indexing/test_indexing.py  |  2 +-
 pandas/tests/frame/indexing/test_setitem.py   |  5 ++-
 pandas/tests/frame/methods/test_to_dict.py    |  2 +-
 pandas/tests/frame/test_arithmetic.py         |  7 ++--
 pandas/tests/frame/test_reductions.py         |  2 +-
 .../tests/indexes/base_class/test_setops.py   |  4 +--
 .../datetimes/methods/test_to_period.py       | 20 ++++-------
 pandas/tests/indexes/multi/test_setops.py     |  2 +-
 pandas/tests/indexes/test_base.py             |  4 +--
 pandas/tests/indexes/test_index_new.py        | 11 ++----
 pandas/tests/indexes/test_setops.py           |  2 +-
 pandas/tests/internals/test_internals.py      | 12 ++++---
 pandas/tests/io/formats/test_css.py           | 29 +++++++--------
 pandas/tests/io/formats/test_to_excel.py      |  2 +-
 .../tests/io/json/test_json_table_schema.py   |  2 +-
 pandas/tests/io/test_clipboard.py             |  2 +-
 pandas/tests/io/test_common.py                |  2 +-
 pandas/tests/io/test_compression.py           |  2 +-
 pandas/tests/io/test_sql.py                   |  2 +-
 pandas/tests/io/test_stata.py                 | 35 ++++++++++++-------
 pandas/tests/plotting/frame/test_frame.py     | 10 +++---
 .../plotting/frame/test_frame_subplots.py     |  4 +--
 pandas/tests/plotting/test_boxplot_method.py  | 11 +++---
 pandas/tests/reductions/test_reductions.py    |  2 +-
 pandas/tests/reshape/merge/test_merge.py      |  5 +--
 .../timestamp/methods/test_to_pydatetime.py   |  3 +-
 .../tests/scalar/timestamp/test_timestamp.py  |  3 +-
 pandas/tests/series/test_arithmetic.py        |  5 +--
 pandas/tests/test_expressions.py              | 13 +++----
 pandas/tests/test_optional_dependency.py      |  6 ++--
 pandas/tests/tools/test_to_datetime.py        |  5 ++-
 pandas/tests/window/test_expanding.py         |  4 ++-
 pandas/tests/window/test_rolling_quantile.py  |  4 ++-
 39 files changed, 130 insertions(+), 116 deletions(-)

diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
index 39e279fd5c917..28129440b86d7 100644
--- a/doc/source/development/contributing_codebase.rst
+++ b/doc/source/development/contributing_codebase.rst
@@ -557,11 +557,12 @@ is being raised, using ``pytest.raises`` instead.
 Testing a warning
 ^^^^^^^^^^^^^^^^^
 
-Use ``tm.assert_produces_warning`` as a context manager to check that a block of code raises a warning.
+Use ``tm.assert_produces_warning`` as a context manager to check that a block of code raises a warning
+and specify the warning message using the ``match`` argument.
 
 .. code-block:: python
 
-    with tm.assert_produces_warning(DeprecationWarning):
+    with tm.assert_produces_warning(DeprecationWarning, match="the warning message"):
         pd.deprecated_function()
 
 If a warning should specifically not happen in a block of code, pass ``False`` into the context manager.
diff --git a/pandas/tests/arrays/sparse/test_constructors.py b/pandas/tests/arrays/sparse/test_constructors.py
index 012ff1da0d431..0bf3ab77e9eed 100644
--- a/pandas/tests/arrays/sparse/test_constructors.py
+++ b/pandas/tests/arrays/sparse/test_constructors.py
@@ -90,13 +90,13 @@ def test_constructor_warns_when_losing_timezone(self):
         dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
 
         expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))
-
-        with tm.assert_produces_warning(UserWarning):
+        msg = "loses timezone information"
+        with tm.assert_produces_warning(UserWarning, match=msg):
             result = SparseArray(dti)
 
         tm.assert_sp_array_equal(result, expected)
 
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match=msg):
             result = SparseArray(pd.Series(dti))
 
         tm.assert_sp_array_equal(result, expected)
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index cfc04b5c91354..22c63af59a47c 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -778,7 +778,7 @@ def test_to_period_2d(self, arr1d):
         arr2d = arr1d.reshape(1, -1)
 
         warn = None if arr1d.tz is None else UserWarning
-        with tm.assert_produces_warning(warn):
+        with tm.assert_produces_warning(warn, match="will drop timezone information"):
             result = arr2d.to_period("D")
             expected = arr1d.to_period("D").reshape(1, -1)
         tm.assert_period_array_equal(result, expected)
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index f7d1fcfa3e469..ebbb31205e264 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1014,7 +1014,8 @@ def test_performance_warning_for_poor_alignment(
         else:
             seen = False
 
-        with tm.assert_produces_warning(seen):
+        msg = "Alignment difference on axis 1 is larger than an order of magnitude"
+        with tm.assert_produces_warning(seen, match=msg):
             pd.eval("df + s", engine=engine, parser=parser)
 
         s = Series(np.random.default_rng(2).standard_normal(1000))
@@ -1036,7 +1037,7 @@ def test_performance_warning_for_poor_alignment(
         else:
             wrn = False
 
-        with tm.assert_produces_warning(wrn) as w:
+        with tm.assert_produces_warning(wrn, match=msg) as w:
             pd.eval("df + s", engine=engine, parser=parser)
 
             if not is_python_engine and performance_warning:
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index c34c97b6e4f04..f47815ee059af 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -797,5 +797,5 @@ def test_pandas_dtype_numpy_warning():
 
 def test_pandas_dtype_ea_not_instance():
     # GH 31356 GH 54592
-    with tm.assert_produces_warning(UserWarning):
+    with tm.assert_produces_warning(UserWarning, match="without any arguments"):
         assert pandas_dtype(CategoricalDtype) == CategoricalDtype()
diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py
index 02c827853b29d..261f86bfb0326 100644
--- a/pandas/tests/dtypes/test_generic.py
+++ b/pandas/tests/dtypes/test_generic.py
@@ -124,7 +124,7 @@ def test_setattr_warnings():
         #  this should not raise a warning
         df.two.not_an_index = [1, 2]
 
-    with tm.assert_produces_warning(UserWarning):
+    with tm.assert_produces_warning(UserWarning, match="doesn't allow columns"):
         #  warn when setting column to nonexistent name
         df.four = df.two + 2
         assert df.four.sum() > df.two.sum()
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 5a6fe07aa007b..69e6228d6efde 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -145,7 +145,7 @@ def test_getitem_boolean(self, mixed_float_frame, mixed_int_frame, datetime_fram
         # we are producing a warning that since the passed boolean
         # key is not the same as the given index, we will reindex
         # not sure this is really necessary
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="will be reindexed"):
             indexer_obj = indexer_obj.reindex(datetime_frame.index[::-1])
             subframe_obj = datetime_frame[indexer_obj]
             tm.assert_frame_equal(subframe_obj, subframe)
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index 3f98f49cd1877..ed81e8c8b8129 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -711,7 +711,10 @@ def test_setitem_npmatrix_2d(self):
         df["np-array"] = a
 
         # Instantiation of `np.matrix` gives PendingDeprecationWarning
-        with tm.assert_produces_warning(PendingDeprecationWarning):
+        with tm.assert_produces_warning(
+            PendingDeprecationWarning,
+            match="matrix subclass is not the recommended way to represent matrices",
+        ):
             df["np-matrix"] = np.matrix(a)
 
         tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py
index b8631d95a6399..11adc9f6179ce 100644
--- a/pandas/tests/frame/methods/test_to_dict.py
+++ b/pandas/tests/frame/methods/test_to_dict.py
@@ -166,7 +166,7 @@ def test_to_dict_not_unique_warning(self):
         # GH#16927: When converting to a dict, if a column has a non-unique name
         # it will be dropped, throwing a warning.
         df = DataFrame([[1, 2, 3]], columns=["a", "a", "b"])
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="columns will be omitted"):
             df.to_dict()
 
     @pytest.mark.filterwarnings("ignore::UserWarning")
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index f463b3f94fa55..91b5f905ada22 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -1097,7 +1097,7 @@ def test_binop_other(self, op, value, dtype, switch_numexpr_min_elements):
                     and expr.USE_NUMEXPR
                     and switch_numexpr_min_elements == 0
                 ):
-                    warn = UserWarning  # "evaluating in Python space because ..."
+                    warn = UserWarning
             else:
                 msg = (
                     f"cannot perform __{op.__name__}__ with this "
@@ -1105,17 +1105,16 @@ def test_binop_other(self, op, value, dtype, switch_numexpr_min_elements):
                 )
 
             with pytest.raises(TypeError, match=msg):
-                with tm.assert_produces_warning(warn):
+                with tm.assert_produces_warning(warn, match="evaluating in Python"):
                     op(df, elem.value)
 
         elif (op, dtype) in skip:
             if op in [operator.add, operator.mul]:
                 if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0:
-                    # "evaluating in Python space because ..."
                     warn = UserWarning
                 else:
                     warn = None
-                with tm.assert_produces_warning(warn):
+                with tm.assert_produces_warning(warn, match="evaluating in Python"):
                     op(df, elem.value)
 
             else:
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 8ccd7b2ca83ba..5118561f67338 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -699,7 +699,7 @@ def test_mode_sortwarning(self, using_infer_string):
         expected = DataFrame({"A": ["a", np.nan]})
 
         warning = None if using_infer_string else UserWarning
-        with tm.assert_produces_warning(warning):
+        with tm.assert_produces_warning(warning, match="Unable to sort modes"):
             result = df.mode(dropna=False)
             result = result.sort_values(by="A").reset_index(drop=True)
 
diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py
index 49c6a91236db7..d57df82b2358c 100644
--- a/pandas/tests/indexes/base_class/test_setops.py
+++ b/pandas/tests/indexes/base_class/test_setops.py
@@ -84,13 +84,13 @@ def test_union_sort_other_incomparable(self):
         # https://github.com/pandas-dev/pandas/issues/24959
         idx = Index([1, pd.Timestamp("2000")])
         # default (sort=None)
-        with tm.assert_produces_warning(RuntimeWarning):
+        with tm.assert_produces_warning(RuntimeWarning, match="not supported between"):
             result = idx.union(idx[:1])
 
         tm.assert_index_equal(result, idx)
 
         # sort=None
-        with tm.assert_produces_warning(RuntimeWarning):
+        with tm.assert_produces_warning(RuntimeWarning, match="not supported between"):
             result = idx.union(idx[:1], sort=None)
         tm.assert_index_equal(result, idx)
 
diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py
index 05e9a294d74a6..5b2cc55d6dc56 100644
--- a/pandas/tests/indexes/datetimes/methods/test_to_period.py
+++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py
@@ -117,10 +117,10 @@ def test_to_period_infer(self):
             freq="5min",
         )
 
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="drop timezone info"):
             pi1 = rng.to_period("5min")
 
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="drop timezone info"):
             pi2 = rng.to_period()
 
         tm.assert_index_equal(pi1, pi2)
@@ -143,8 +143,7 @@ def test_to_period_millisecond(self):
             ]
         )
 
-        with tm.assert_produces_warning(UserWarning):
-            # warning that timezone info will be lost
+        with tm.assert_produces_warning(UserWarning, match="drop timezone info"):
             period = index.to_period(freq="ms")
         assert 2 == len(period)
         assert period[0] == Period("2007-01-01 10:11:12.123Z", "ms")
@@ -158,8 +157,7 @@ def test_to_period_microsecond(self):
             ]
         )
 
-        with tm.assert_produces_warning(UserWarning):
-            # warning that timezone info will be lost
+        with tm.assert_produces_warning(UserWarning, match="drop timezone info"):
             period = index.to_period(freq="us")
         assert 2 == len(period)
         assert period[0] == Period("2007-01-01 10:11:12.123456Z", "us")
@@ -172,10 +170,7 @@ def test_to_period_microsecond(self):
     def test_to_period_tz(self, tz):
         ts = date_range("1/1/2000", "2/1/2000", tz=tz)
 
-        with tm.assert_produces_warning(UserWarning):
-            # GH#21333 warning that timezone info will be lost
-            # filter warning about freq deprecation
-
+        with tm.assert_produces_warning(UserWarning, match="drop timezone info"):
             result = ts.to_period()[0]
             expected = ts[0].to_period(ts.freq)
 
@@ -183,8 +178,7 @@ def test_to_period_tz(self, tz):
 
         expected = date_range("1/1/2000", "2/1/2000").to_period()
 
-        with tm.assert_produces_warning(UserWarning):
-            # GH#21333 warning that timezone info will be lost
+        with tm.assert_produces_warning(UserWarning, match="drop timezone info"):
             result = ts.to_period(ts.freq)
 
         tm.assert_index_equal(result, expected)
@@ -193,7 +187,7 @@ def test_to_period_tz(self, tz):
     def test_to_period_tz_utc_offset_consistency(self, tz):
         # GH#22905
         ts = date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1")
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="drop timezone info"):
             result = ts.to_period()[0]
             expected = ts[0].to_period(ts.freq)
             assert result == expected
diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
index 9354984538c58..47f21cc7f8182 100644
--- a/pandas/tests/indexes/multi/test_setops.py
+++ b/pandas/tests/indexes/multi/test_setops.py
@@ -382,7 +382,7 @@ def test_union_sort_other_incomparable():
     idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
 
     # default, sort=None
-    with tm.assert_produces_warning(RuntimeWarning):
+    with tm.assert_produces_warning(RuntimeWarning, match="are unorderable"):
         result = idx.union(idx[:1])
     tm.assert_index_equal(result, idx)
 
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 301c4794be4ef..04858643d97b1 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1065,10 +1065,10 @@ def test_outer_join_sort(self):
         left_index = Index(np.random.default_rng(2).permutation(15))
         right_index = date_range("2020-01-01", periods=10)
 
-        with tm.assert_produces_warning(RuntimeWarning):
+        with tm.assert_produces_warning(RuntimeWarning, match="not supported between"):
             result = left_index.join(right_index, how="outer")
 
-        with tm.assert_produces_warning(RuntimeWarning):
+        with tm.assert_produces_warning(RuntimeWarning, match="not supported between"):
             expected = left_index.astype(object).union(right_index.astype(object))
 
         tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py
index 21cb0b8723d59..b544ebac43ece 100644
--- a/pandas/tests/indexes/test_index_new.py
+++ b/pandas/tests/indexes/test_index_new.py
@@ -142,25 +142,18 @@ def test_constructor_infer_nat_dt_like(
         data = [ctor]
         data.insert(pos, nulls_fixture)
 
-        warn = None
         if nulls_fixture is NA:
             expected = Index([NA, NaT])
             mark = pytest.mark.xfail(reason="Broken with np.NaT ctor; see GH 31884")
             request.applymarker(mark)
-            # GH#35942 numpy will emit a DeprecationWarning within the
-            #  assert_index_equal calls.  Since we can't do anything
-            #  about it until GH#31884 is fixed, we suppress that warning.
-            warn = DeprecationWarning
 
         result = Index(data)
 
-        with tm.assert_produces_warning(warn):
-            tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)
 
         result = Index(np.array(data, dtype=object))
 
-        with tm.assert_produces_warning(warn):
-            tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize("swap_objs", [True, False])
     def test_constructor_mixed_nat_objs_infers_object(self, swap_objs):
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 9a3471fe526c1..8fd349dacf9e9 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -882,7 +882,7 @@ def test_difference_incomparable(self, opname):
         b = Index([2, Timestamp("1999"), 1])
         op = operator.methodcaller(opname, b)
 
-        with tm.assert_produces_warning(RuntimeWarning):
+        with tm.assert_produces_warning(RuntimeWarning, match="not supported between"):
             # sort=None, the default
             result = op(a)
         expected = Index([3, Timestamp("2000"), 2, Timestamp("1999")])
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 43bcf84f901b1..749e2c4a86b55 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -1280,19 +1280,20 @@ def test_interval_can_hold_element(self, dtype, element):
         # `elem` to not have the same length as `arr`
         ii2 = IntervalIndex.from_breaks(arr[:-1], closed="neither")
         elem = element(ii2)
-        with tm.assert_produces_warning(FutureWarning):
+        msg = "Setting an item of incompatible dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             self.check_series_setitem(elem, ii, False)
         assert not blk._can_hold_element(elem)
 
         ii3 = IntervalIndex.from_breaks([Timestamp(1), Timestamp(3), Timestamp(4)])
         elem = element(ii3)
-        with tm.assert_produces_warning(FutureWarning):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             self.check_series_setitem(elem, ii, False)
         assert not blk._can_hold_element(elem)
 
         ii4 = IntervalIndex.from_breaks([Timedelta(1), Timedelta(3), Timedelta(4)])
         elem = element(ii4)
-        with tm.assert_produces_warning(FutureWarning):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             self.check_series_setitem(elem, ii, False)
         assert not blk._can_hold_element(elem)
 
@@ -1312,12 +1313,13 @@ def test_period_can_hold_element(self, element):
         # `elem` to not have the same length as `arr`
         pi2 = pi.asfreq("D")[:-1]
         elem = element(pi2)
-        with tm.assert_produces_warning(FutureWarning):
+        msg = "Setting an item of incompatible dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             self.check_series_setitem(elem, pi, False)
 
         dti = pi.to_timestamp("s")[:-1]
         elem = element(dti)
-        with tm.assert_produces_warning(FutureWarning):
+        with tm.assert_produces_warning(FutureWarning, match=msg):
             self.check_series_setitem(elem, pi, False)
 
     def check_can_hold_element(self, obj, elem, inplace: bool):
diff --git a/pandas/tests/io/formats/test_css.py b/pandas/tests/io/formats/test_css.py
index 8bf9aa4ac04d3..c4ecb48006cb1 100644
--- a/pandas/tests/io/formats/test_css.py
+++ b/pandas/tests/io/formats/test_css.py
@@ -38,30 +38,31 @@ def test_css_parse_normalisation(name, norm, abnorm):
 
 
 @pytest.mark.parametrize(
-    "invalid_css,remainder",
+    "invalid_css,remainder,msg",
     [
         # No colon
-        ("hello-world", ""),
-        ("border-style: solid; hello-world", "border-style: solid"),
+        ("hello-world", "", "expected a colon"),
+        ("border-style: solid; hello-world", "border-style: solid", "expected a colon"),
         (
             "border-style: solid; hello-world; font-weight: bold",
             "border-style: solid; font-weight: bold",
+            "expected a colon",
         ),
         # Unclosed string fail
         # Invalid size
-        ("font-size: blah", "font-size: 1em"),
-        ("font-size: 1a2b", "font-size: 1em"),
-        ("font-size: 1e5pt", "font-size: 1em"),
-        ("font-size: 1+6pt", "font-size: 1em"),
-        ("font-size: 1unknownunit", "font-size: 1em"),
-        ("font-size: 10", "font-size: 1em"),
-        ("font-size: 10 pt", "font-size: 1em"),
+        ("font-size: blah", "font-size: 1em", "Unhandled size"),
+        ("font-size: 1a2b", "font-size: 1em", "Unhandled size"),
+        ("font-size: 1e5pt", "font-size: 1em", "Unhandled size"),
+        ("font-size: 1+6pt", "font-size: 1em", "Unhandled size"),
+        ("font-size: 1unknownunit", "font-size: 1em", "Unhandled size"),
+        ("font-size: 10", "font-size: 1em", "Unhandled size"),
+        ("font-size: 10 pt", "font-size: 1em", "Unhandled size"),
         # Too many args
-        ("border-top: 1pt solid red green", "border-top: 1pt solid green"),
+        ("border-top: 1pt solid red green", "border-top: 1pt solid green", "Too many"),
     ],
 )
-def test_css_parse_invalid(invalid_css, remainder):
-    with tm.assert_produces_warning(CSSWarning):
+def test_css_parse_invalid(invalid_css, remainder, msg):
+    with tm.assert_produces_warning(CSSWarning, match=msg):
         assert_same_resolution(invalid_css, remainder)
 
 
@@ -120,7 +121,7 @@ def test_css_side_shorthands(shorthand, expansions):
         {top: "1pt", right: "4pt", bottom: "2pt", left: "0pt"},
     )
 
-    with tm.assert_produces_warning(CSSWarning):
+    with tm.assert_produces_warning(CSSWarning, match="Could not expand"):
         assert_resolves(f"{shorthand}: 1pt 1pt 1pt 1pt 1pt", {})
 
 
diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py
index 3b782713eed6c..b40201b9ba1e6 100644
--- a/pandas/tests/io/formats/test_to_excel.py
+++ b/pandas/tests/io/formats/test_to_excel.py
@@ -325,7 +325,7 @@ def test_css_to_excel_bad_colors(input_color):
     if input_color is not None:
         expected["fill"] = {"patternType": "solid"}
 
-    with tm.assert_produces_warning(CSSWarning):
+    with tm.assert_produces_warning(CSSWarning, match="Unhandled color format"):
         convert = CSSToExcelConverter()
         assert expected == convert(css)
 
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index ec49b7644ea0e..a0d5b3a741aaf 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -639,7 +639,7 @@ def test_warns_non_roundtrippable_names(self, idx):
         # GH 19130
         df = DataFrame(index=idx)
         df.index.name = "index"
-        with tm.assert_produces_warning():
+        with tm.assert_produces_warning(UserWarning, match="not round-trippable"):
             set_default_names(df)
 
     def test_timestamp_in_columns(self):
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index 5f19c15817ce7..babbddafa3b49 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -222,7 +222,7 @@ def test_excel_sep_warning(self, df):
 
     # Separator is ignored when excel=False and should produce a warning
     def test_copy_delim_warning(self, df):
-        with tm.assert_produces_warning():
+        with tm.assert_produces_warning(UserWarning, match="ignores the sep argument"):
             df.to_clipboard(excel=False, sep="\t")
 
     # Tests that the default behavior of to_clipboard is tab
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index f5880d8a894f8..ad729d2346a3b 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -463,7 +463,7 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
             index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
         )
         with tm.ensure_clean() as path:
-            with tm.assert_produces_warning(UnicodeWarning):
+            with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
                 df.to_csv(path, compression=compression_, encoding=encoding)
 
             # reading should fail (otherwise we wouldn't need the warning)
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 3a58dda9e8dc4..00082be7e07e8 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -133,7 +133,7 @@ def test_compression_warning(compression_only):
     )
     with tm.ensure_clean() as path:
         with icom.get_handle(path, "w", compression=compression_only) as handles:
-            with tm.assert_produces_warning(RuntimeWarning):
+            with tm.assert_produces_warning(RuntimeWarning, match="has no effect"):
                 df.to_csv(handles.handle, compression=compression_only)
 
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 3083fa24ba8b5..af77972d9fd26 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2602,7 +2602,7 @@ def close(self):
             self.conn.close()
 
     with contextlib.closing(MockSqliteConnection(":memory:")) as conn:
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="only supports SQLAlchemy"):
             sql.read_sql("SELECT 1", conn)
 
 
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 2650f351e2203..d7fb3c0049965 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -189,11 +189,12 @@ def test_read_dta2(self, datapath):
         path2 = datapath("io", "data", "stata", "stata2_115.dta")
         path3 = datapath("io", "data", "stata", "stata2_117.dta")
 
-        with tm.assert_produces_warning(UserWarning):
+        msg = "Leaving in Stata Internal Format"
+        with tm.assert_produces_warning(UserWarning, match=msg):
             parsed_114 = self.read_dta(path1)
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match=msg):
             parsed_115 = self.read_dta(path2)
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match=msg):
             parsed_117 = self.read_dta(path3)
             # FIXME: don't leave commented-out
             # 113 is buggy due to limits of date format support in Stata
@@ -478,7 +479,8 @@ def test_read_write_dta11(self, temp_file):
         formatted = formatted.astype(np.int32)
 
         path = temp_file
-        with tm.assert_produces_warning(InvalidColumnName):
+        msg = "Not all pandas column names were valid Stata variable names"
+        with tm.assert_produces_warning(InvalidColumnName, match=msg):
             original.to_stata(path, convert_dates=None)
 
         written_and_read_again = self.read_dta(path)
@@ -515,7 +517,8 @@ def test_read_write_dta12(self, version, temp_file):
         formatted = formatted.astype(np.int32)
 
         path = temp_file
-        with tm.assert_produces_warning(InvalidColumnName):
+        msg = "Not all pandas column names were valid Stata variable names"
+        with tm.assert_produces_warning(InvalidColumnName, match=msg):
             original.to_stata(path, convert_dates=None, version=version)
             # should get a warning for that format.
 
@@ -612,7 +615,8 @@ def test_numeric_column_names(self, temp_file):
         original.index.name = "index"
         path = temp_file
         # should get a warning for that format.
-        with tm.assert_produces_warning(InvalidColumnName):
+        msg = "Not all pandas column names were valid Stata variable names"
+        with tm.assert_produces_warning(InvalidColumnName, match=msg):
             original.to_stata(path)
 
         written_and_read_again = self.read_dta(path)
@@ -672,7 +676,7 @@ def test_large_value_conversion(self, temp_file):
         original = DataFrame({"s0": s0, "s1": s1, "s2": s2, "s3": s3})
         original.index.name = "index"
         path = temp_file
-        with tm.assert_produces_warning(PossiblePrecisionLoss):
+        with tm.assert_produces_warning(PossiblePrecisionLoss, match="from int64 to"):
             original.to_stata(path)
 
         written_and_read_again = self.read_dta(path)
@@ -687,7 +691,8 @@ def test_dates_invalid_column(self, temp_file):
         original = DataFrame([datetime(2006, 11, 19, 23, 13, 20)])
         original.index.name = "index"
         path = temp_file
-        with tm.assert_produces_warning(InvalidColumnName):
+        msg = "Not all pandas column names were valid Stata variable names"
+        with tm.assert_produces_warning(InvalidColumnName, match=msg):
             original.to_stata(path, convert_dates={0: "tc"})
 
         written_and_read_again = self.read_dta(path)
@@ -1111,7 +1116,8 @@ def test_categorical_warnings_and_errors(self, temp_file):
             [["a"], ["b"], ["c"], ["d"], [1]], columns=["Too_long"]
         ).astype("category")
 
-        with tm.assert_produces_warning(ValueLabelTypeMismatch):
+        msg = "data file created has not lost information due to duplicate labels"
+        with tm.assert_produces_warning(ValueLabelTypeMismatch, match=msg):
             original.to_stata(path)
             # should get a warning for mixed content
 
@@ -1732,7 +1738,8 @@ def test_convert_strl_name_swap(self, temp_file):
         )
         original.index.name = "index"
 
-        with tm.assert_produces_warning(InvalidColumnName):
+        msg = "Not all pandas column names were valid Stata variable names"
+        with tm.assert_produces_warning(InvalidColumnName, match=msg):
             path = temp_file
             original.to_stata(path, convert_strl=["long", 1], version=117)
             reread = self.read_dta(path)
@@ -2139,8 +2146,9 @@ def test_chunked_categorical(version, temp_file):
 def test_chunked_categorical_partial(datapath):
     dta_file = datapath("io", "data", "stata", "stata-dta-partially-labeled.dta")
     values = ["a", "b", "a", "b", 3.0]
+    msg = "series with value labels are not fully labeled"
     with StataReader(dta_file, chunksize=2) as reader:
-        with tm.assert_produces_warning(CategoricalConversionWarning):
+        with tm.assert_produces_warning(CategoricalConversionWarning, match=msg):
             for i, block in enumerate(reader):
                 assert list(block.cats) == values[2 * i : 2 * (i + 1)]
                 if i < 2:
@@ -2148,7 +2156,7 @@ def test_chunked_categorical_partial(datapath):
                 else:
                     idx = pd.Index([3.0], dtype="float64")
                 tm.assert_index_equal(block.cats.cat.categories, idx)
-    with tm.assert_produces_warning(CategoricalConversionWarning):
+    with tm.assert_produces_warning(CategoricalConversionWarning, match=msg):
         with StataReader(dta_file, chunksize=5) as reader:
             large_chunk = reader.__next__()
     direct = read_stata(dta_file)
@@ -2304,7 +2312,8 @@ def test_non_categorical_value_label_name_conversion(temp_file):
         "_1__2_": {3: "three"},
     }
 
-    with tm.assert_produces_warning(InvalidColumnName):
+    msg = "Not all pandas column names were valid Stata variable names"
+    with tm.assert_produces_warning(InvalidColumnName, match=msg):
         data.to_stata(temp_file, value_labels=value_labels)
 
     with StataReader(temp_file) as reader:
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
index 65c9083d9fe2b..c30cb96fef252 100644
--- a/pandas/tests/plotting/frame/test_frame.py
+++ b/pandas/tests/plotting/frame/test_frame.py
@@ -2001,7 +2001,7 @@ def _check(axes):
         plt.close("all")
 
         gs, axes = _generate_4_axes_via_gridspec()
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             axes = df.plot(subplots=True, ax=axes, sharex=True)
         _check(axes)
 
@@ -2065,7 +2065,7 @@ def _check(axes):
         plt.close("all")
 
         gs, axes = _generate_4_axes_via_gridspec()
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             axes = df.plot(subplots=True, ax=axes, sharey=True)
 
         gs.tight_layout(plt.gcf())
@@ -2186,7 +2186,7 @@ def _get_horizontal_grid():
 
         # vertical / subplots / sharex=True / sharey=True
         ax1, ax2 = _get_vertical_grid()
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True)
         assert len(axes[0].lines) == 1
         assert len(axes[1].lines) == 1
@@ -2202,7 +2202,7 @@ def _get_horizontal_grid():
 
         # horizontal / subplots / sharex=True / sharey=True
         ax1, ax2 = _get_horizontal_grid()
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True)
         assert len(axes[0].lines) == 1
         assert len(axes[1].lines) == 1
@@ -2252,7 +2252,7 @@ def _get_boxed_grid():
 
         # subplots / sharex=True / sharey=True
         axes = _get_boxed_grid()
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True)
         for ax in axes:
             assert len(ax.lines) == 1
diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py
index 511266d5786c5..a98f4b56ebf4d 100644
--- a/pandas/tests/plotting/frame/test_frame_subplots.py
+++ b/pandas/tests/plotting/frame/test_frame_subplots.py
@@ -335,7 +335,7 @@ def test_subplots_multiple_axes_2_dim(self, layout, exp_layout):
             np.random.default_rng(2).random((10, 4)),
             index=list(string.ascii_letters[:10]),
         )
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="layout keyword is ignored"):
             returned = df.plot(
                 subplots=True, ax=axes, layout=layout, sharex=False, sharey=False
             )
@@ -501,7 +501,7 @@ def test_df_subplots_patterns_minorticks_1st_ax_hidden(self):
             columns=list("AB"),
         )
         _, axes = plt.subplots(2, 1)
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             axes = df.plot(subplots=True, ax=axes, sharex=True)
         for ax in axes:
             assert len(ax.lines) == 1
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
index f8029a1c1ee40..573f95eed15ef 100644
--- a/pandas/tests/plotting/test_boxplot_method.py
+++ b/pandas/tests/plotting/test_boxplot_method.py
@@ -129,7 +129,8 @@ def test_boxplot_legacy2_with_multi_col(self):
         df["Y"] = Series(["A"] * 10)
         # Multiple columns with an ax argument should use same figure
         fig, ax = mpl.pyplot.subplots()
-        with tm.assert_produces_warning(UserWarning):
+        msg = "the figure containing the passed axes is being cleared"
+        with tm.assert_produces_warning(UserWarning, match=msg):
             axes = df.boxplot(
                 column=["Col1", "Col2"], by="X", ax=ax, return_type="axes"
             )
@@ -607,7 +608,7 @@ def test_grouped_box_multiple_axes(self, hist_df):
         # passes multiple axes to plot, hist or boxplot
         # location should be changed if other test is added
         # which has earlier alphabetical order
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             _, axes = mpl.pyplot.subplots(2, 2)
             df.groupby("category").boxplot(column="height", return_type="axes", ax=axes)
             _check_axes_shape(mpl.pyplot.gcf().axes, axes_num=4, layout=(2, 2))
@@ -617,7 +618,7 @@ def test_grouped_box_multiple_axes_on_fig(self, hist_df):
         # GH 6970, GH 7069
         df = hist_df
         fig, axes = mpl.pyplot.subplots(2, 3)
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             returned = df.boxplot(
                 column=["height", "weight", "category"],
                 by="gender",
@@ -630,7 +631,7 @@ def test_grouped_box_multiple_axes_on_fig(self, hist_df):
         assert returned[0].figure is fig
 
         # draw on second row
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
             returned = df.groupby("classroom").boxplot(
                 column=["height", "weight", "category"], return_type="axes", ax=axes[1]
             )
@@ -647,7 +648,7 @@ def test_grouped_box_multiple_axes_ax_error(self, hist_df):
         _, axes = mpl.pyplot.subplots(2, 3)
         with pytest.raises(ValueError, match=msg):
             # pass different number of axes from required
-            with tm.assert_produces_warning(UserWarning):
+            with tm.assert_produces_warning(UserWarning, match="sharex and sharey"):
                 axes = df.groupby("classroom").boxplot(ax=axes)
 
     def test_fontsize(self):
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 46753b668a8b0..422ed8d4f3d2b 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -1558,7 +1558,7 @@ def test_mode_sortwarning(self):
         expected = Series(["foo", np.nan])
         s = Series([1, "foo", "foo", np.nan, np.nan])
 
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match="Unable to sort modes"):
             result = s.mode(dropna=False)
             result = result.sort_values().reset_index(drop=True)
 
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 7ab8ee24bd194..5c5c06dea0008 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -1565,11 +1565,12 @@ def test_merge_on_ints_floats_warning(self):
         B = DataFrame({"Y": [1.1, 2.5, 3.0]})
         expected = DataFrame({"X": [3], "Y": [3.0]})
 
-        with tm.assert_produces_warning(UserWarning):
+        msg = "the float values are not equal to their int representation"
+        with tm.assert_produces_warning(UserWarning, match=msg):
             result = A.merge(B, left_on="X", right_on="Y")
             tm.assert_frame_equal(result, expected)
 
-        with tm.assert_produces_warning(UserWarning):
+        with tm.assert_produces_warning(UserWarning, match=msg):
             result = B.merge(A, left_on="Y", right_on="X")
             tm.assert_frame_equal(result, expected[["Y", "X"]])
 
diff --git a/pandas/tests/scalar/timestamp/methods/test_to_pydatetime.py b/pandas/tests/scalar/timestamp/methods/test_to_pydatetime.py
index 57f57e56201c8..be6ec7dbc24c7 100644
--- a/pandas/tests/scalar/timestamp/methods/test_to_pydatetime.py
+++ b/pandas/tests/scalar/timestamp/methods/test_to_pydatetime.py
@@ -24,7 +24,8 @@ def test_to_pydatetime_nonzero_nano(self):
         ts = Timestamp("2011-01-01 9:00:00.123456789")
 
         # Warn the user of data loss (nanoseconds).
-        with tm.assert_produces_warning(UserWarning):
+        msg = "Discarding nonzero nanoseconds in conversion"
+        with tm.assert_produces_warning(UserWarning, match=msg):
             expected = datetime(2011, 1, 1, 9, 0, 0, 123456)
             result = ts.to_pydatetime()
             assert result == expected
diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
index ea970433464fc..79fd285073983 100644
--- a/pandas/tests/scalar/timestamp/test_timestamp.py
+++ b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -501,8 +501,7 @@ def test_to_period_tz_warning(self):
         # GH#21333 make sure a warning is issued when timezone
         # info is lost
         ts = Timestamp("2009-04-15 16:17:18", tz="US/Eastern")
-        with tm.assert_produces_warning(UserWarning):
-            # warning that timezone info will be lost
+        with tm.assert_produces_warning(UserWarning, match="drop timezone information"):
             ts.to_period("D")
 
     def test_to_numpy_alias(self):
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index 44bf3475b85a6..f0930a831e98d 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -359,12 +359,13 @@ def test_add_list_to_masked_array_boolean(self, request):
             else None
         )
         ser = Series([True, None, False], dtype="boolean")
-        with tm.assert_produces_warning(warning):
+        msg = "operator is not supported by numexpr for the bool dtype"
+        with tm.assert_produces_warning(warning, match=msg):
             result = ser + [True, None, True]
         expected = Series([True, None, True], dtype="boolean")
         tm.assert_series_equal(result, expected)
 
-        with tm.assert_produces_warning(warning):
+        with tm.assert_produces_warning(warning, match=msg):
             result = [True, None, True] + ser
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
index 68dcc1a18eda7..8f275345a7819 100644
--- a/pandas/tests/test_expressions.py
+++ b/pandas/tests/test_expressions.py
@@ -339,35 +339,36 @@ def test_bool_ops_warn_on_arithmetic(self, op_str, opname, monkeypatch):
             # raises TypeError
             return
 
+        msg = "operator is not supported by numexpr"
         with monkeypatch.context() as m:
             m.setattr(expr, "_MIN_ELEMENTS", 5)
             with option_context("compute.use_numexpr", True):
-                with tm.assert_produces_warning():
+                with tm.assert_produces_warning(UserWarning, match=msg):
                     r = f(df, df)
                     e = fe(df, df)
                     tm.assert_frame_equal(r, e)
 
-                with tm.assert_produces_warning():
+                with tm.assert_produces_warning(UserWarning, match=msg):
                     r = f(df.a, df.b)
                     e = fe(df.a, df.b)
                     tm.assert_series_equal(r, e)
 
-                with tm.assert_produces_warning():
+                with tm.assert_produces_warning(UserWarning, match=msg):
                     r = f(df.a, True)
                     e = fe(df.a, True)
                     tm.assert_series_equal(r, e)
 
-                with tm.assert_produces_warning():
+                with tm.assert_produces_warning(UserWarning, match=msg):
                     r = f(False, df.a)
                     e = fe(False, df.a)
                     tm.assert_series_equal(r, e)
 
-                with tm.assert_produces_warning():
+                with tm.assert_produces_warning(UserWarning, match=msg):
                     r = f(False, df)
                     e = fe(False, df)
                     tm.assert_frame_equal(r, e)
 
-                with tm.assert_produces_warning():
+                with tm.assert_produces_warning(UserWarning, match=msg):
                     r = f(df, True)
                     e = fe(df, True)
                     tm.assert_frame_equal(r, e)
diff --git a/pandas/tests/test_optional_dependency.py b/pandas/tests/test_optional_dependency.py
index 52b5f636b1254..9127981d1845d 100644
--- a/pandas/tests/test_optional_dependency.py
+++ b/pandas/tests/test_optional_dependency.py
@@ -42,7 +42,7 @@ def test_bad_version(monkeypatch):
     result = import_optional_dependency("fakemodule", min_version="0.8")
     assert result is module
 
-    with tm.assert_produces_warning(UserWarning):
+    with tm.assert_produces_warning(UserWarning, match=match):
         result = import_optional_dependency("fakemodule", errors="warn")
     assert result is None
 
@@ -53,7 +53,7 @@ def test_bad_version(monkeypatch):
     with pytest.raises(ImportError, match="Pandas requires version '1.1.0'"):
         import_optional_dependency("fakemodule", min_version="1.1.0")
 
-    with tm.assert_produces_warning(UserWarning):
+    with tm.assert_produces_warning(UserWarning, match="Pandas requires version"):
         result = import_optional_dependency(
             "fakemodule", errors="warn", min_version="1.1.0"
         )
@@ -81,7 +81,7 @@ def test_submodule(monkeypatch):
     with pytest.raises(ImportError, match=match):
         import_optional_dependency("fakemodule.submodule")
 
-    with tm.assert_produces_warning(UserWarning):
+    with tm.assert_produces_warning(UserWarning, match=match):
         result = import_optional_dependency("fakemodule.submodule", errors="warn")
     assert result is None
 
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index b59dd194cac27..7ce02c12ac1ca 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -1738,7 +1738,10 @@ def test_unit(self, cache):
     def test_unit_str(self, cache):
         # GH 57051
         # Test that strs aren't dropping precision to 32-bit accidentally.
-        with tm.assert_produces_warning(FutureWarning):
+        with tm.assert_produces_warning(
+            FutureWarning,
+            match="'to_datetime' with 'unit' when parsing strings is deprecated",
+        ):
             res = to_datetime(["1704660000"], unit="s", origin="unix")
         expected = to_datetime([1704660000], unit="s", origin="unix")
         tm.assert_index_equal(res, expected)
diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py
index d375010aff3cc..510a69a2ff3e4 100644
--- a/pandas/tests/window/test_expanding.py
+++ b/pandas/tests/window/test_expanding.py
@@ -696,5 +696,7 @@ def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
 def test_keyword_quantile_deprecated():
     # GH #52550
     ser = Series([1, 2, 3, 4])
-    with tm.assert_produces_warning(FutureWarning):
+    with tm.assert_produces_warning(
+        FutureWarning, match="the 'quantile' keyword is deprecated, use 'q' instead"
+    ):
         ser.expanding().quantile(quantile=0.5)
diff --git a/pandas/tests/window/test_rolling_quantile.py b/pandas/tests/window/test_rolling_quantile.py
index d5a7010923563..1604d72d4f9b1 100644
--- a/pandas/tests/window/test_rolling_quantile.py
+++ b/pandas/tests/window/test_rolling_quantile.py
@@ -178,5 +178,7 @@ def test_center_reindex_frame(frame, q):
 def test_keyword_quantile_deprecated():
     # GH #52550
     s = Series([1, 2, 3, 4])
-    with tm.assert_produces_warning(FutureWarning):
+    with tm.assert_produces_warning(
+        FutureWarning, match="the 'quantile' keyword is deprecated, use 'q' instead"
+    ):
         s.rolling(2).quantile(quantile=0.4)

From 41014db0e802bd9d2ae6326d6314c65ecad9b28d Mon Sep 17 00:00:00 2001
From: Zhengbo Wang <2736230899@qq.com>
Date: Thu, 25 Apr 2024 00:30:24 +0800
Subject: [PATCH 035/100] BUG: Ignore warning for duplicate columns in
 `to_dict` when orient='tight' (#58335)

* Ignore warning for duplicate columns in to_dict when orient='tight'

* Add whatsnew

* Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>

* Update whatsnew and redefine duplicate columns

* Use assert instead

* assert not raise and equal

---------

Co-authored-by: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst             |  1 +
 pandas/core/methods/to_dict.py             |  2 +-
 pandas/tests/frame/methods/test_to_dict.py | 14 ++++++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index ca97e2b6ffb6b..59cc709359a8d 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -416,6 +416,7 @@ MultiIndex
 I/O
 ^^^
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
+- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py
index 57e03dedc384d..84202a4fcc840 100644
--- a/pandas/core/methods/to_dict.py
+++ b/pandas/core/methods/to_dict.py
@@ -148,7 +148,7 @@ def to_dict(
         Return a collections.abc.MutableMapping object representing the
         DataFrame. The resulting transformation depends on the `orient` parameter.
     """
-    if not df.columns.is_unique:
+    if orient != "tight" and not df.columns.is_unique:
         warnings.warn(
             "DataFrame columns are not unique, some columns will be omitted.",
             UserWarning,
diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py
index 11adc9f6179ce..0272b679e85a2 100644
--- a/pandas/tests/frame/methods/test_to_dict.py
+++ b/pandas/tests/frame/methods/test_to_dict.py
@@ -513,6 +513,20 @@ def test_to_dict_masked_native_python(self):
         result = df.to_dict(orient="records")
         assert isinstance(result[0]["a"], int)
 
+    def test_to_dict_tight_no_warning_with_duplicate_column(self):
+        # GH#58281
+        df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "A"])
+        with tm.assert_produces_warning(None):
+            result = df.to_dict(orient="tight")
+        expected = {
+            "index": [0, 1, 2],
+            "columns": ["A", "A"],
+            "data": [[1, 2], [3, 4], [5, 6]],
+            "index_names": [None],
+            "column_names": [None],
+        }
+        assert result == expected
+
 
 @pytest.mark.parametrize(
     "val", [Timestamp(2020, 1, 1), Timedelta(1), Period("2020"), Interval(1, 2)]

From 2536d3a736eea96b9da8b774e671516eb8f25f4a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 24 Apr 2024 07:26:56 -1000
Subject: [PATCH 036/100] CI: Fix npdev failures (#58389)

* CI: Fix npdev failures

* Use unique index, make array writable

* Update pandas/_libs/hashtable_class_helper.pxi.in

* Update pandas/tests/arrays/test_datetimelike.py

* Update pandas/tests/arrays/test_datetimelike.py
---
 pandas/tests/arrays/test_datetimelike.py | 8 ++++++--
 pandas/tests/extension/base/missing.py   | 2 ++
 pandas/tests/indexes/test_base.py        | 4 ++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 22c63af59a47c..3d8f8d791b763 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -661,7 +661,9 @@ def test_array_interface(self, datetime_index):
         assert result is expected
         tm.assert_numpy_array_equal(result, expected)
         result = np.array(arr, dtype="datetime64[ns]")
-        assert result is not expected
+        if not np_version_gt2:
+            # TODO: GH 57739
+            assert result is not expected
         tm.assert_numpy_array_equal(result, expected)
 
         # to object dtype
@@ -976,7 +978,9 @@ def test_array_interface(self, timedelta_index):
         assert result is expected
         tm.assert_numpy_array_equal(result, expected)
         result = np.array(arr, dtype="timedelta64[ns]")
-        assert result is not expected
+        if not np_version_gt2:
+            # TODO: GH 57739
+            assert result is not expected
         tm.assert_numpy_array_equal(result, expected)
 
         # to object dtype
diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
index 4b9234a9904a2..cee565d4f7c1e 100644
--- a/pandas/tests/extension/base/missing.py
+++ b/pandas/tests/extension/base/missing.py
@@ -27,7 +27,9 @@ def test_isna_returns_copy(self, data_missing, na_func):
         expected = result.copy()
         mask = getattr(result, na_func)()
         if isinstance(mask.dtype, pd.SparseDtype):
+            # TODO: GH 57739
             mask = np.array(mask)
+            mask.flags.writeable = True
 
         mask[:] = True
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 04858643d97b1..2e94961b673f8 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -71,8 +71,8 @@ def test_constructor_casting(self, index):
         tm.assert_contains_all(arr, new_index)
         tm.assert_index_equal(index, new_index)
 
-    @pytest.mark.parametrize("index", ["string"], indirect=True)
-    def test_constructor_copy(self, index, using_infer_string):
+    def test_constructor_copy(self, using_infer_string):
+        index = Index(list("abc"), name="name")
         arr = np.array(index)
         new_index = Index(arr, copy=True, name="name")
         assert isinstance(new_index, Index)

From a52728a87a91d45f8352ee588ce32b32aac774de Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 24 Apr 2024 12:39:03 -0700
Subject: [PATCH 037/100] DEPR: to_datetime string behavior with unit (#58407)

* DEPR: to_datetime string behavior with unit

* remove outdated test
---
 doc/source/whatsnew/v3.0.0.rst         |   1 +
 pandas/_libs/tslib.pyi                 |   6 +-
 pandas/_libs/tslib.pyx                 | 127 +++----------------------
 pandas/core/tools/datetimes.py         |   9 +-
 pandas/tests/tools/test_to_datetime.py |  33 +++----
 5 files changed, 35 insertions(+), 141 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 59cc709359a8d..dee793f5ef002 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -214,6 +214,7 @@ Removal of prior version deprecations/changes
 - :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`)
 - :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`)
 - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
+- :func:`to_datetime` with a ``unit`` specified no longer parses strings into floats, instead parses them the same way as without ``unit`` (:issue:`50735`)
 - :meth:`DataFrame.groupby` with ``as_index=False`` and aggregation methods will no longer exclude from the result the groupings that do not arise from the input (:issue:`49519`)
 - :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`)
 - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi
index 5a340c1d88bc4..7e3372a80db9d 100644
--- a/pandas/_libs/tslib.pyi
+++ b/pandas/_libs/tslib.pyi
@@ -11,11 +11,6 @@ def format_array_from_datetime(
     na_rep: str | float = ...,
     reso: int = ...,  # NPY_DATETIMEUNIT
 ) -> npt.NDArray[np.object_]: ...
-def array_with_unit_to_datetime(
-    values: npt.NDArray[np.object_],
-    unit: str,
-    errors: str = ...,
-) -> tuple[np.ndarray, tzinfo | None]: ...
 def first_non_null(values: np.ndarray) -> int: ...
 def array_to_datetime(
     values: npt.NDArray[np.object_],
@@ -24,6 +19,7 @@ def array_to_datetime(
     yearfirst: bool = ...,
     utc: bool = ...,
     creso: int = ...,
+    unit_for_numerics: str | None = ...,
 ) -> tuple[np.ndarray, tzinfo | None]: ...
 
 # returned ndarray may be object dtype or datetime64[ns]
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index aecf9f2e46bd4..dca3ba0ce49b3 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -1,7 +1,3 @@
-import warnings
-
-from pandas.util._exceptions import find_stack_level
-
 cimport cython
 
 from datetime import timezone
@@ -234,117 +230,6 @@ def format_array_from_datetime(
     return result
 
 
-def array_with_unit_to_datetime(
-    ndarray[object] values,
-    str unit,
-    str errors="coerce"
-):
-    """
-    Convert the ndarray to datetime according to the time unit.
-
-    This function converts an array of objects into a numpy array of
-    datetime64[ns]. It returns the converted array
-    and also returns the timezone offset
-
-    if errors:
-      - raise: return converted values or raise OutOfBoundsDatetime
-          if out of range on the conversion or
-          ValueError for other conversions (e.g. a string)
-      - ignore: return non-convertible values as the same unit
-      - coerce: NaT for non-convertibles
-
-    Parameters
-    ----------
-    values : ndarray
-         Date-like objects to convert.
-    unit : str
-         Time unit to use during conversion.
-    errors : str, default 'raise'
-         Error behavior when parsing.
-
-    Returns
-    -------
-    result : ndarray of m8 values
-    tz : parsed timezone offset or None
-    """
-    cdef:
-        Py_ssize_t i, n=len(values)
-        bint is_coerce = errors == "coerce"
-        bint is_raise = errors == "raise"
-        ndarray[int64_t] iresult
-        tzinfo tz = None
-        double fval
-
-    assert is_coerce or is_raise
-
-    if unit == "ns":
-        result, tz = array_to_datetime(
-            values.astype(object, copy=False),
-            errors=errors,
-            creso=NPY_FR_ns,
-        )
-        return result, tz
-
-    result = np.empty(n, dtype="M8[ns]")
-    iresult = result.view("i8")
-
-    for i in range(n):
-        val = values[i]
-
-        try:
-            if checknull_with_nat_and_na(val):
-                iresult[i] = NPY_NAT
-
-            elif is_integer_object(val) or is_float_object(val):
-
-                if val != val or val == NPY_NAT:
-                    iresult[i] = NPY_NAT
-                else:
-                    iresult[i] = cast_from_unit(val, unit)
-
-            elif isinstance(val, str):
-                if len(val) == 0 or val in nat_strings:
-                    iresult[i] = NPY_NAT
-
-                else:
-
-                    try:
-                        fval = float(val)
-                    except ValueError:
-                        raise ValueError(
-                            f"non convertible value {val} with the unit '{unit}'"
-                        )
-                    warnings.warn(
-                        "The behavior of 'to_datetime' with 'unit' when parsing "
-                        "strings is deprecated. In a future version, strings will "
-                        "be parsed as datetime strings, matching the behavior "
-                        "without a 'unit'. To retain the old behavior, explicitly "
-                        "cast ints or floats to numeric type before calling "
-                        "to_datetime.",
-                        FutureWarning,
-                        stacklevel=find_stack_level(),
-                    )
-
-                    iresult[i] = cast_from_unit(fval, unit)
-
-            else:
-                # TODO: makes more sense as TypeError, but that would be an
-                #  API change.
-                raise ValueError(
-                    f"unit='{unit}' not valid with non-numerical val='{val}'"
-                )
-
-        except (ValueError, TypeError) as err:
-            if is_raise:
-                err.args = (f"{err}, at position {i}",)
-                raise
-            else:
-                # is_coerce
-                iresult[i] = NPY_NAT
-
-    return result, tz
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def first_non_null(values: ndarray) -> int:
@@ -376,6 +261,7 @@ cpdef array_to_datetime(
     bint yearfirst=False,
     bint utc=False,
     NPY_DATETIMEUNIT creso=NPY_FR_ns,
+    str unit_for_numerics=None,
 ):
     """
     Converts a 1D array of date-like values to a numpy array of either:
@@ -404,6 +290,7 @@ cpdef array_to_datetime(
         indicator whether the dates should be UTC
     creso : NPY_DATETIMEUNIT, default NPY_FR_ns
         Set to NPY_FR_GENERIC to infer a resolution.
+    unit_for_numerics : str, default "ns"
 
     Returns
     -------
@@ -434,6 +321,13 @@ cpdef array_to_datetime(
         abbrev = "ns"
     else:
         abbrev = npy_unit_to_abbrev(creso)
+
+    if unit_for_numerics is not None:
+        # either creso or unit_for_numerics should be passed, not both
+        assert creso == NPY_FR_ns
+    else:
+        unit_for_numerics = abbrev
+
     result = np.empty((<object>values).shape, dtype=f"M8[{abbrev}]")
     iresult = result.view("i8").ravel()
 
@@ -485,7 +379,8 @@ cpdef array_to_datetime(
                         creso = state.creso
 
                     # we now need to parse this as if unit=abbrev
-                    iresult[i] = cast_from_unit(val, abbrev, out_reso=creso)
+                    iresult[i] = cast_from_unit(val, unit_for_numerics, out_reso=creso)
+
                     state.found_other = True
 
             elif isinstance(val, str):
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index df7a6cdb1ea52..b01cdb335ec46 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -481,7 +481,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
     """
     arg = extract_array(arg, extract_numpy=True)
 
-    # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime
+    # GH#30050 pass an ndarray to tslib.array_to_datetime
     # because it expects an ndarray argument
     if isinstance(arg, IntegerArray):
         arr = arg.astype(f"datetime64[{unit}]")
@@ -519,7 +519,12 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
             tz_parsed = None
         else:
             arg = arg.astype(object, copy=False)
-            arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
+            arr, tz_parsed = tslib.array_to_datetime(
+                arg,
+                utc=utc,
+                errors=errors,
+                unit_for_numerics=unit,
+            )
 
     result = DatetimeIndex(arr, name=name)
     if not isinstance(result, DatetimeIndex):
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 7ce02c12ac1ca..f4042acd05dc3 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -1705,22 +1705,24 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
         # GH#50301
         # Match Timestamp behavior in disallowing non-round floats with
         #  Y or M unit
-        warn_msg = "strings will be parsed as datetime strings"
         msg = f"Conversion of non-round float with unit={unit} is ambiguous"
         with pytest.raises(ValueError, match=msg):
             to_datetime([1.5], unit=unit, errors="raise")
         with pytest.raises(ValueError, match=msg):
             to_datetime(np.array([1.5]), unit=unit, errors="raise")
+
+        msg = r"Given date string \"1.5\" not likely a datetime, at position 0"
         with pytest.raises(ValueError, match=msg):
-            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-                to_datetime(["1.5"], unit=unit, errors="raise")
+            to_datetime(["1.5"], unit=unit, errors="raise")
 
         res = to_datetime([1.5], unit=unit, errors="coerce")
         expected = Index([NaT], dtype="M8[ns]")
         tm.assert_index_equal(res, expected)
 
-        with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-            res = to_datetime(["1.5"], unit=unit, errors="coerce")
+        # In 3.0, the string "1.5" is parsed as as it would be without unit,
+        #  which fails. With errors="coerce" this becomes NaT.
+        res = to_datetime(["1.5"], unit=unit, errors="coerce")
+        expected = to_datetime([NaT])
         tm.assert_index_equal(res, expected)
 
         # round floats are OK
@@ -1735,17 +1737,6 @@ def test_unit(self, cache):
         with pytest.raises(ValueError, match=msg):
             to_datetime([1], unit="D", format="%Y%m%d", cache=cache)
 
-    def test_unit_str(self, cache):
-        # GH 57051
-        # Test that strs aren't dropping precision to 32-bit accidentally.
-        with tm.assert_produces_warning(
-            FutureWarning,
-            match="'to_datetime' with 'unit' when parsing strings is deprecated",
-        ):
-            res = to_datetime(["1704660000"], unit="s", origin="unix")
-        expected = to_datetime([1704660000], unit="s", origin="unix")
-        tm.assert_index_equal(res, expected)
-
     def test_unit_array_mixed_nans(self, cache):
         values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
 
@@ -1774,7 +1765,7 @@ def test_unit_array_mixed_nans_large_int(self, cache):
     def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache):
         # if we have a string, then we raise a ValueError
         # and NOT an OutOfBoundsDatetime
-        msg = "non convertible value foo with the unit 's'"
+        msg = "Unknown datetime string format, unable to parse: foo, at position 0"
         with pytest.raises(ValueError, match=msg):
             to_datetime("foo", errors="raise", unit="s", cache=cache)
 
@@ -1909,7 +1900,13 @@ def test_to_datetime_unit_na_values(self):
 
     @pytest.mark.parametrize("bad_val", ["foo", 111111111])
     def test_to_datetime_unit_invalid(self, bad_val):
-        msg = f"{bad_val} with the unit 'D'"
+        if bad_val == "foo":
+            msg = (
+                "Unknown datetime string format, unable to parse: "
+                f"{bad_val}, at position 2"
+            )
+        else:
+            msg = "cannot convert input 111111111 with the unit 'D', at position 2"
         with pytest.raises(ValueError, match=msg):
             to_datetime([1, 2, bad_val], unit="D")
 

From 53609a79be3b5ef378d9cc2efe167e09714a953e Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Wed, 24 Apr 2024 15:39:48 -0400
Subject: [PATCH 038/100] DOC: fix SA01 error for DatetimeIndex: day_of_year,
 is_leap_year, inferred_freq (#58406)

* DOC: fix SA01 error for DatetimeIndex: day_of_year, is_leap_year, inferred_freq

* fixing line to long error

* Fixing: EXPECTED TO FAIL, BUT NOT FAILING errors
---
 ci/code_checks.sh                  |  8 --------
 pandas/core/arrays/datetimelike.py |  5 +++++
 pandas/core/arrays/datetimes.py    | 12 ++++++++++++
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 101d650a0e768..7c97408cee559 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -102,13 +102,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.to_parquet RT03" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
-        -i "pandas.DatetimeIndex.day_of_year SA01" \
-        -i "pandas.DatetimeIndex.dayofyear SA01" \
         -i "pandas.DatetimeIndex.freqstr SA01" \
         -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
         -i "pandas.DatetimeIndex.indexer_between_time RT03" \
-        -i "pandas.DatetimeIndex.inferred_freq SA01" \
-        -i "pandas.DatetimeIndex.is_leap_year SA01" \
         -i "pandas.DatetimeIndex.snap PR01,RT03" \
         -i "pandas.DatetimeIndex.std PR01,RT03" \
         -i "pandas.DatetimeIndex.to_period RT03" \
@@ -264,14 +260,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.ceil PR01,PR02" \
         -i "pandas.Series.dt.components SA01" \
         -i "pandas.Series.dt.day_name PR01,PR02" \
-        -i "pandas.Series.dt.day_of_year SA01" \
-        -i "pandas.Series.dt.dayofyear SA01" \
         -i "pandas.Series.dt.days SA01" \
         -i "pandas.Series.dt.days_in_month SA01" \
         -i "pandas.Series.dt.daysinmonth SA01" \
         -i "pandas.Series.dt.floor PR01,PR02" \
         -i "pandas.Series.dt.freq GL08" \
-        -i "pandas.Series.dt.is_leap_year SA01" \
         -i "pandas.Series.dt.microseconds SA01" \
         -i "pandas.Series.dt.month_name PR01,PR02" \
         -i "pandas.Series.dt.nanoseconds SA01" \
@@ -400,7 +393,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.TimedeltaIndex.as_unit RT03,SA01" \
         -i "pandas.TimedeltaIndex.components SA01" \
         -i "pandas.TimedeltaIndex.days SA01" \
-        -i "pandas.TimedeltaIndex.inferred_freq SA01" \
         -i "pandas.TimedeltaIndex.microseconds SA01" \
         -i "pandas.TimedeltaIndex.nanoseconds SA01" \
         -i "pandas.TimedeltaIndex.seconds SA01" \
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 974289160b145..ff8b16b3361ee 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -908,6 +908,11 @@ def inferred_freq(self) -> str | None:
 
         Returns None if it can't autodetect the frequency.
 
+        See Also
+        --------
+        DatetimeIndex.freqstr : Return the frequency object as a string if it's set,
+            otherwise None.
+
         Examples
         --------
         For DatetimeIndex:
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index be087e19ce7b6..25c7f926d19a8 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1835,6 +1835,11 @@ def isocalendar(self) -> DataFrame:
         """
         The ordinal day of the year.
 
+        See Also
+        --------
+        DatetimeIndex.dayofweek : The day of the week with Monday=0, Sunday=6.
+        DatetimeIndex.day : The day of the datetime.
+
         Examples
         --------
         For Series:
@@ -2155,6 +2160,13 @@ def isocalendar(self) -> DataFrame:
         Series or ndarray
              Booleans indicating if dates belong to a leap year.
 
+        See Also
+        --------
+        DatetimeIndex.is_year_end : Indicate whether the date is the
+            last day of the year.
+        DatetimeIndex.is_year_start : Indicate whether the date is the first
+            day of a year.
+
         Examples
         --------
         This method is available on Series with datetime values under

From 661d7f044bb09da2f963707b25aabee485dd0bc8 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 25 Apr 2024 01:10:54 +0530
Subject: [PATCH 039/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.to_list (#58398)

* DOC: added RT03 to pandas.Index.to_list

* DOC: remove pandas.Index.to_list

* DOC: remove pandas.Series.tolist
---
 ci/code_checks.sh   | 2 --
 pandas/core/base.py | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 7c97408cee559..bf7423dfe5825 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -147,7 +147,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.str PR01,SA01" \
         -i "pandas.Index.symmetric_difference PR07,RT03,SA01" \
         -i "pandas.Index.take PR01,PR07" \
-        -i "pandas.Index.to_list RT03" \
         -i "pandas.Index.union PR07,RT03,SA01" \
         -i "pandas.Index.view GL08" \
         -i "pandas.Int16Dtype SA01" \
@@ -368,7 +367,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.swaplevel SA01" \
         -i "pandas.Series.to_dict SA01" \
         -i "pandas.Series.to_frame SA01" \
-        -i "pandas.Series.to_list RT03" \
         -i "pandas.Series.to_markdown SA01" \
         -i "pandas.Series.to_string SA01" \
         -i "pandas.Series.truediv PR07" \
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 424f0609dd485..d716a9ffb7bcc 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -789,6 +789,7 @@ def tolist(self) -> list:
         Returns
         -------
         list
+            List containing the values as Python or pandas scalers.
 
         See Also
         --------

From 4f7cb743533d21d3025f9b4fd2f4f1854977cc63 Mon Sep 17 00:00:00 2001
From: Carlo Barth <carlo@huq.io>
Date: Wed, 24 Apr 2024 21:41:58 +0200
Subject: [PATCH 040/100] Fix/time series interpolation is wrong 21351 (#56515)

* fix: Fixes wrong doctest output in `pandas.core.resample.Resampler.interpolate` and the related explanation about consideration of anchor points when interpolating downsampled series with non-aligned result index.

* Resolved merge conflicts

* fix: Fixes wrong test case assumption for interpolation

Fixes assumption in `test_interp_basic_with_non_range_index`. If the index is [1, 2, 3, 5] and values are [1, 2, np.nan, 4], it is wrong to expect that interpolation will result in 3 for the missing value in case of linear interpolation. It will rather be 2.666...

* fix: Make sure frequency indexes are preserved with new interpolation approach

* fix: Fixes new-style up-sampling interpolation for MultiIndexes resulting from groupby-operations

* fix: Fixes wrong test case assumption when using linear interpolation on series with datetime index using business days only (test case `pandas.tests.series.methods.test_interpolate.TestSeriesInterpolateData.test_interpolate`).

* fix: Fixes wrong test case assumption when using linear interpolation on irregular index (test case `pandas.tests.series.methods.test_interpolate.TestSeriesInterpolateData.test_nan_irregular_index`).

* fix: Adds test skips for interpolation methods that require scipy if scipy is not installed

* fix: Makes sure keyword arguments "downcast" is not passed to scipy interpolation methods that are not using `interp1d` or spline.

* fix: Adjusted expected warning type in `test_groupby_resample_interpolate_off_grid`.

* fix: Fixes failing interpolation on groupby if the index has `name`=None. Adds this check to an existing test case.

* Trigger Actions

* feat: Raise error on attempt to interpolate a MultiIndex data frame, providing a useful error message that describes a working alternative syntax. Fixed related test cases and added test that makes sure the error is raised.

* Apply suggestions from code review

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* refactor: Adjusted error type assertion in test case

* refactor: Removed unused parametrization definitions and switched to direct parametrization for interpolation methods in tests.

* fix: Adds forgotten "@" before pytest.mark.parametrize

* refactor: Apply suggestions from code review

* refactor: Switched to ficture params syntax for test case parametrization

* Update pandas/tests/resample/test_time_grouper.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/tests/resample/test_base.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* refactor: Fixes too long line

* tests: Fixes test that fails due to unimportant index name comparison

* docs: Added entry in whatsnew

* Empty-Commit

* Empty-Commit

* Empty-Commit

* docs: Sorted whatsnew

* docs: Adjusted bug fix note and moved it to the right section

---------

Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst                |   1 +
 pandas/core/missing.py                        |  14 +-
 pandas/core/resample.py                       |  68 ++++++++--
 .../tests/frame/methods/test_interpolate.py   |   2 +-
 pandas/tests/resample/test_base.py            |  73 +++++++++++
 pandas/tests/resample/test_time_grouper.py    | 120 +++++++++++++-----
 .../tests/series/methods/test_interpolate.py  |   9 +-
 7 files changed, 239 insertions(+), 48 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index dee793f5ef002..c77348b365370 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -438,6 +438,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
 - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
+- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
 - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
 
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 9fef78d9f8c3d..039d868bccd16 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -314,7 +314,16 @@ def get_interp_index(method, index: Index) -> Index:
         # prior default
         from pandas import Index
 
-        index = Index(np.arange(len(index)))
+        if isinstance(index.dtype, DatetimeTZDtype) or lib.is_np_dtype(
+            index.dtype, "mM"
+        ):
+            # Convert datetime-like indexes to int64
+            index = Index(index.view("i8"))
+
+        elif not is_numeric_dtype(index.dtype):
+            # We keep behavior consistent with prior versions of pandas for
+            # non-numeric, non-datetime indexes
+            index = Index(range(len(index)))
     else:
         methods = {"index", "values", "nearest", "time"}
         is_numeric_or_datetime = (
@@ -616,6 +625,9 @@ def _interpolate_scipy_wrapper(
         terp = alt_methods.get(method, None)
         if terp is None:
             raise ValueError(f"Can not interpolate with method={method}.")
+
+        # Make sure downcast is not in kwargs for alt methods
+        kwargs.pop("downcast", None)
         new_y = terp(x, y, new_x, **kwargs)
     return new_y
 
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 86d1f55f38c05..ccbe25fdae841 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -80,6 +80,7 @@
     TimedeltaIndex,
     timedelta_range,
 )
+from pandas.core.reshape.concat import concat
 
 from pandas.tseries.frequencies import (
     is_subperiod,
@@ -885,30 +886,59 @@ def interpolate(
         Freq: 500ms, dtype: float64
 
         Internal reindexing with ``asfreq()`` prior to interpolation leads to
-        an interpolated timeseries on the basis the reindexed timestamps (anchors).
-        Since not all datapoints from original series become anchors,
-        it can lead to misleading interpolation results as in the following example:
+        an interpolated timeseries on the basis of the reindexed timestamps
+        (anchors). It is assured that all available datapoints from original
+        series become anchors, so it also works for resampling-cases that lead
+        to non-aligned timestamps, as in the following example:
 
         >>> series.resample("400ms").interpolate("linear")
         2023-03-01 07:00:00.000    1.0
-        2023-03-01 07:00:00.400    1.2
-        2023-03-01 07:00:00.800    1.4
-        2023-03-01 07:00:01.200    1.6
-        2023-03-01 07:00:01.600    1.8
+        2023-03-01 07:00:00.400    0.2
+        2023-03-01 07:00:00.800   -0.6
+        2023-03-01 07:00:01.200   -0.4
+        2023-03-01 07:00:01.600    0.8
         2023-03-01 07:00:02.000    2.0
-        2023-03-01 07:00:02.400    2.2
-        2023-03-01 07:00:02.800    2.4
-        2023-03-01 07:00:03.200    2.6
-        2023-03-01 07:00:03.600    2.8
+        2023-03-01 07:00:02.400    1.6
+        2023-03-01 07:00:02.800    1.2
+        2023-03-01 07:00:03.200    1.4
+        2023-03-01 07:00:03.600    2.2
         2023-03-01 07:00:04.000    3.0
         Freq: 400ms, dtype: float64
 
-        Note that the series erroneously increases between two anchors
+        Note that the series correctly decreases between two anchors
         ``07:00:00`` and ``07:00:02``.
         """
         assert downcast is lib.no_default  # just checking coverage
         result = self._upsample("asfreq")
-        return result.interpolate(
+
+        # If the original data has timestamps which are not aligned with the
+        # target timestamps, we need to add those points back to the data frame
+        # that is supposed to be interpolated. This does not work with
+        # PeriodIndex, so we skip this case. GH#21351
+        obj = self._selected_obj
+        is_period_index = isinstance(obj.index, PeriodIndex)
+
+        # Skip this step for PeriodIndex
+        if not is_period_index:
+            final_index = result.index
+            if isinstance(final_index, MultiIndex):
+                raise NotImplementedError(
+                    "Direct interpolation of MultiIndex data frames is not "
+                    "supported. If you tried to resample and interpolate on a "
+                    "grouped data frame, please use:\n"
+                    "`df.groupby(...).apply(lambda x: x.resample(...)."
+                    "interpolate(...), include_groups=False)`"
+                    "\ninstead, as resampling and interpolation has to be "
+                    "performed for each group independently."
+                )
+
+            missing_data_points_index = obj.index.difference(final_index)
+            if len(missing_data_points_index) > 0:
+                result = concat(
+                    [result, obj.loc[missing_data_points_index]]
+                ).sort_index()
+
+        result_interpolated = result.interpolate(
             method=method,
             axis=axis,
             limit=limit,
@@ -919,6 +949,18 @@ def interpolate(
             **kwargs,
         )
 
+        # No further steps if the original data has a PeriodIndex
+        if is_period_index:
+            return result_interpolated
+
+        # Make sure that original data points which do not align with the
+        # resampled index are removed
+        result_interpolated = result_interpolated.loc[final_index]
+
+        # Make sure frequency indexes are preserved
+        result_interpolated.index = final_index
+        return result_interpolated
+
     @final
     def asfreq(self, fill_value=None):
         """
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index 0a9d059736e6f..cdb9ff8a67b6b 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -109,7 +109,7 @@ def test_interp_basic_with_non_range_index(self, using_infer_string):
         else:
             result = df.set_index("C").interpolate()
             expected = df.set_index("C")
-            expected.loc[3, "A"] = 3
+            expected.loc[3, "A"] = 2.66667
             expected.loc[5, "B"] = 9
             tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index 9cd51b95d6efd..3428abacd509e 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -25,6 +25,29 @@
 from pandas.core.resample import _asfreq_compat
 
 
+@pytest.fixture(
+    params=[
+        "linear",
+        "time",
+        "index",
+        "values",
+        "nearest",
+        "zero",
+        "slinear",
+        "quadratic",
+        "cubic",
+        "barycentric",
+        "krogh",
+        "from_derivatives",
+        "piecewise_polynomial",
+        "pchip",
+        "akima",
+    ],
+)
+def all_1d_no_arg_interpolation_methods(request):
+    return request.param
+
+
 @pytest.mark.parametrize("freq", ["2D", "1h"])
 @pytest.mark.parametrize(
     "index",
@@ -91,6 +114,56 @@ def test_resample_interpolate(index):
     tm.assert_frame_equal(result, expected)
 
 
+def test_resample_interpolate_regular_sampling_off_grid(
+    all_1d_no_arg_interpolation_methods,
+):
+    pytest.importorskip("scipy")
+    # GH#21351
+    index = date_range("2000-01-01 00:01:00", periods=5, freq="2h")
+    ser = Series(np.arange(5.0), index)
+
+    method = all_1d_no_arg_interpolation_methods
+    # Resample to 1 hour sampling and interpolate with the given method
+    ser_resampled = ser.resample("1h").interpolate(method)
+
+    # Check that none of the resampled values are NaN, except the first one
+    # which lies 1 minute before the first actual data point
+    assert np.isnan(ser_resampled.iloc[0])
+    assert not ser_resampled.iloc[1:].isna().any()
+
+    if method not in ["nearest", "zero"]:
+        # Check that the resampled values are close to the expected values
+        # except for methods with known inaccuracies
+        assert np.all(
+            np.isclose(ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1)
+        )
+
+
+def test_resample_interpolate_irregular_sampling(all_1d_no_arg_interpolation_methods):
+    pytest.importorskip("scipy")
+    # GH#21351
+    ser = Series(
+        np.linspace(0.0, 1.0, 5),
+        index=DatetimeIndex(
+            [
+                "2000-01-01 00:00:03",
+                "2000-01-01 00:00:22",
+                "2000-01-01 00:00:24",
+                "2000-01-01 00:00:31",
+                "2000-01-01 00:00:39",
+            ]
+        ),
+    )
+
+    # Resample to 5 second sampling and interpolate with the given method
+    ser_resampled = ser.resample("5s").interpolate(all_1d_no_arg_interpolation_methods)
+
+    # Check that none of the resampled values are NaN, except the first one
+    # which lies 3 seconds before the first actual data point
+    assert np.isnan(ser_resampled.iloc[0])
+    assert not ser_resampled.iloc[1:].isna().any()
+
+
 def test_raises_on_non_datetimelike_index():
     # this is a non datetimelike index
     xp = DataFrame()
diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
index 11ad9240527d5..5f5a54c4d92a3 100644
--- a/pandas/tests/resample/test_time_grouper.py
+++ b/pandas/tests/resample/test_time_grouper.py
@@ -333,26 +333,98 @@ def test_upsample_sum(method, method_args, expected_values):
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_resample_interpolate():
+@pytest.fixture
+def groupy_test_df():
+    return DataFrame(
+        {"price": [10, 11, 9], "volume": [50, 60, 50]},
+        index=date_range("01/01/2018", periods=3, freq="W"),
+    )
+
+
+def test_groupby_resample_interpolate_raises(groupy_test_df):
+    # GH 35325
+
+    # Make a copy of the test data frame that has index.name=None
+    groupy_test_df_without_index_name = groupy_test_df.copy()
+    groupy_test_df_without_index_name.index.name = None
+
+    dfs = [groupy_test_df, groupy_test_df_without_index_name]
+
+    for df in dfs:
+        msg = "DataFrameGroupBy.resample operated on the grouping columns"
+        with tm.assert_produces_warning(DeprecationWarning, match=msg):
+            with pytest.raises(
+                NotImplementedError,
+                match="Direct interpolation of MultiIndex data frames is "
+                "not supported",
+            ):
+                df.groupby("volume").resample("1D").interpolate(method="linear")
+
+
+def test_groupby_resample_interpolate_with_apply_syntax(groupy_test_df):
     # GH 35325
-    d = {"price": [10, 11, 9], "volume": [50, 60, 50]}
 
-    df = DataFrame(d)
+    # Make a copy of the test data frame that has index.name=None
+    groupy_test_df_without_index_name = groupy_test_df.copy()
+    groupy_test_df_without_index_name.index.name = None
 
-    df["week_starting"] = date_range("01/01/2018", periods=3, freq="W")
+    dfs = [groupy_test_df, groupy_test_df_without_index_name]
 
-    msg = "DataFrameGroupBy.resample operated on the grouping columns"
-    with tm.assert_produces_warning(DeprecationWarning, match=msg):
-        result = (
-            df.set_index("week_starting")
-            .groupby("volume")
-            .resample("1D")
-            .interpolate(method="linear")
+    for df in dfs:
+        result = df.groupby("volume").apply(
+            lambda x: x.resample("1d").interpolate(method="linear"),
+            include_groups=False,
         )
 
-    volume = [50] * 15 + [60]
-    week_starting = list(date_range("2018-01-07", "2018-01-21")) + [
-        Timestamp("2018-01-14")
+        volume = [50] * 15 + [60]
+        week_starting = list(date_range("2018-01-07", "2018-01-21")) + [
+            Timestamp("2018-01-14")
+        ]
+        expected_ind = pd.MultiIndex.from_arrays(
+            [volume, week_starting],
+            names=["volume", df.index.name],
+        )
+
+        expected = DataFrame(
+            data={
+                "price": [
+                    10.0,
+                    9.928571428571429,
+                    9.857142857142858,
+                    9.785714285714286,
+                    9.714285714285714,
+                    9.642857142857142,
+                    9.571428571428571,
+                    9.5,
+                    9.428571428571429,
+                    9.357142857142858,
+                    9.285714285714286,
+                    9.214285714285714,
+                    9.142857142857142,
+                    9.071428571428571,
+                    9.0,
+                    11.0,
+                ]
+            },
+            index=expected_ind,
+        )
+        tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_resample_interpolate_with_apply_syntax_off_grid(groupy_test_df):
+    """Similar test as test_groupby_resample_interpolate_with_apply_syntax but
+    with resampling that results in missing anchor points when interpolating.
+    See GH#21351."""
+    # GH#21351
+    result = groupy_test_df.groupby("volume").apply(
+        lambda x: x.resample("265h").interpolate(method="linear"), include_groups=False
+    )
+
+    volume = [50, 50, 60]
+    week_starting = [
+        Timestamp("2018-01-07"),
+        Timestamp("2018-01-18 01:00:00"),
+        Timestamp("2018-01-14"),
     ]
     expected_ind = pd.MultiIndex.from_arrays(
         [volume, week_starting],
@@ -363,24 +435,10 @@ def test_groupby_resample_interpolate():
         data={
             "price": [
                 10.0,
-                9.928571428571429,
-                9.857142857142858,
-                9.785714285714286,
-                9.714285714285714,
-                9.642857142857142,
-                9.571428571428571,
-                9.5,
-                9.428571428571429,
-                9.357142857142858,
-                9.285714285714286,
-                9.214285714285714,
-                9.142857142857142,
-                9.071428571428571,
-                9.0,
+                9.21131,
                 11.0,
-            ],
-            "volume": [50.0] * 15 + [60],
+            ]
         },
         index=expected_ind,
     )
-    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(result, expected, check_names=False)
diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py
index 1008c2c87dc9e..ff7f8d0b7fa72 100644
--- a/pandas/tests/series/methods/test_interpolate.py
+++ b/pandas/tests/series/methods/test_interpolate.py
@@ -94,7 +94,12 @@ def test_interpolate(self, datetime_series):
         ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index)
 
         ts_copy = ts.copy()
-        ts_copy[5:10] = np.nan
+
+        # Set data between Tuesday and Thursday to NaN for 2 consecutive weeks.
+        # Linear interpolation should fill in the missing values correctly,
+        # as the index is equally-spaced within each week.
+        ts_copy[1:4] = np.nan
+        ts_copy[6:9] = np.nan
 
         linear_interp = ts_copy.interpolate(method="linear")
         tm.assert_series_equal(linear_interp, ts)
@@ -265,7 +270,7 @@ def test_nan_interpolate(self, kwargs):
     def test_nan_irregular_index(self):
         s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
         result = s.interpolate()
-        expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9])
+        expected = Series([1.0, 2.0, 2.6666666666666665, 4.0], index=[1, 3, 5, 9])
         tm.assert_series_equal(result, expected)
 
     def test_nan_str_index(self):

From 6320c8bb3287fd603dc7e014daf8d695a510024b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 24 Apr 2024 16:26:28 -0700
Subject: [PATCH 041/100] REF: use maybe_convert_objects in pd.array (#56484)

* REF: use maybe_convert_objects in pd.array

* lint fixups

* Update pandas/_libs/lib.pyx

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/_libs/lib.pyx                  |  43 ++++++++---
 pandas/core/construction.py           | 100 +++++++++++++++-----------
 pandas/tests/arrays/test_array.py     |  16 +++++
 pandas/tests/dtypes/test_inference.py |   4 +-
 4 files changed, 109 insertions(+), 54 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 24afbe3a07bf1..5b6d83ba8e9ee 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2628,7 +2628,11 @@ def maybe_convert_objects(ndarray[object] objects,
                 seen.object_ = True
                 break
         elif val is C_NA:
-            seen.object_ = True
+            if convert_to_nullable_dtype:
+                seen.null_ = True
+                mask[i] = True
+            else:
+                seen.object_ = True
             continue
         else:
             seen.object_ = True
@@ -2691,6 +2695,12 @@ def maybe_convert_objects(ndarray[object] objects,
             dtype = StringDtype(storage="pyarrow_numpy")
             return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
 
+        elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):
+            from pandas.core.arrays.string_ import StringDtype
+
+            dtype = StringDtype()
+            return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
+
         seen.object_ = True
     elif seen.interval_:
         if is_interval_array(objects):
@@ -2734,12 +2744,12 @@ def maybe_convert_objects(ndarray[object] objects,
         return objects
 
     if seen.bool_:
-        if seen.is_bool:
-            # is_bool property rules out everything else
-            return bools.view(np.bool_)
-        elif convert_to_nullable_dtype and seen.is_bool_or_na:
+        if convert_to_nullable_dtype and seen.is_bool_or_na:
             from pandas.core.arrays import BooleanArray
             return BooleanArray(bools.view(np.bool_), mask)
+        elif seen.is_bool:
+            # is_bool property rules out everything else
+            return bools.view(np.bool_)
         seen.object_ = True
 
     if not seen.object_:
@@ -2752,11 +2762,11 @@ def maybe_convert_objects(ndarray[object] objects,
                     result = floats
                 elif seen.int_ or seen.uint_:
                     if convert_to_nullable_dtype:
-                        from pandas.core.arrays import IntegerArray
+                        # Below we will wrap in IntegerArray
                         if seen.uint_:
-                            result = IntegerArray(uints, mask)
+                            result = uints
                         else:
-                            result = IntegerArray(ints, mask)
+                            result = ints
                     else:
                         result = floats
                 elif seen.nan_:
@@ -2771,7 +2781,6 @@ def maybe_convert_objects(ndarray[object] objects,
                         result = uints
                     else:
                         result = ints
-
         else:
             # don't cast int to float, etc.
             if seen.null_:
@@ -2794,6 +2803,22 @@ def maybe_convert_objects(ndarray[object] objects,
                     else:
                         result = ints
 
+        # TODO: do these after the itemsize check?
+        if (result is ints or result is uints) and convert_to_nullable_dtype:
+            from pandas.core.arrays import IntegerArray
+
+            # Set these values to 1 to be deterministic, match
+            #  IntegerArray._internal_fill_value
+            result[mask] = 1
+            result = IntegerArray(result, mask)
+        elif result is floats and convert_to_nullable_dtype:
+            from pandas.core.arrays import FloatingArray
+
+            # Set these values to 1.0 to be deterministic, match
+            #  FloatingArray._internal_fill_value
+            result[mask] = 1.0
+            result = FloatingArray(result, mask)
+
         if result is uints or result is ints or result is floats or result is complexes:
             # cast to the largest itemsize when all values are NumPy scalars
             if itemsize_max > 0 and itemsize_max != result.dtype.itemsize:
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index ec49340e9a516..2718e9819cdf8 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -7,11 +7,8 @@
 
 from __future__ import annotations
 
-from collections.abc import Sequence
 from typing import (
     TYPE_CHECKING,
-    Optional,
-    Union,
     cast,
     overload,
 )
@@ -23,17 +20,9 @@
 
 from pandas._libs import lib
 from pandas._libs.tslibs import (
-    Period,
     get_supported_dtype,
     is_supported_dtype,
 )
-from pandas._typing import (
-    AnyArrayLike,
-    ArrayLike,
-    Dtype,
-    DtypeObj,
-    T,
-)
 
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import (
@@ -46,6 +35,7 @@
     maybe_promote,
 )
 from pandas.core.dtypes.common import (
+    ensure_object,
     is_list_like,
     is_object_dtype,
     is_string_dtype,
@@ -63,11 +53,25 @@
 import pandas.core.common as com
 
 if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from pandas._typing import (
+        AnyArrayLike,
+        ArrayLike,
+        Dtype,
+        DtypeObj,
+        T,
+    )
+
     from pandas import (
         Index,
         Series,
     )
-    from pandas.core.arrays.base import ExtensionArray
+    from pandas.core.arrays import (
+        DatetimeArray,
+        ExtensionArray,
+        TimedeltaArray,
+    )
 
 
 def array(
@@ -286,9 +290,7 @@ def array(
         ExtensionArray,
         FloatingArray,
         IntegerArray,
-        IntervalArray,
         NumpyExtensionArray,
-        PeriodArray,
         TimedeltaArray,
     )
     from pandas.core.arrays.string_ import StringDtype
@@ -320,46 +322,58 @@ def array(
         return cls._from_sequence(data, dtype=dtype, copy=copy)
 
     if dtype is None:
-        inferred_dtype = lib.infer_dtype(data, skipna=True)
-        if inferred_dtype == "period":
-            period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data)
-            return PeriodArray._from_sequence(period_data, copy=copy)
-
-        elif inferred_dtype == "interval":
-            return IntervalArray(data, copy=copy)
-
-        elif inferred_dtype.startswith("datetime"):
-            # datetime, datetime64
-            try:
-                return DatetimeArray._from_sequence(data, copy=copy)
-            except ValueError:
-                # Mixture of timezones, fall back to NumpyExtensionArray
-                pass
-
-        elif inferred_dtype.startswith("timedelta"):
-            # timedelta, timedelta64
-            return TimedeltaArray._from_sequence(data, copy=copy)
-
-        elif inferred_dtype == "string":
+        was_ndarray = isinstance(data, np.ndarray)
+        # error: Item "Sequence[object]" of "Sequence[object] | ExtensionArray |
+        # ndarray[Any, Any]" has no attribute "dtype"
+        if not was_ndarray or data.dtype == object:  # type: ignore[union-attr]
+            result = lib.maybe_convert_objects(
+                ensure_object(data),
+                convert_non_numeric=True,
+                convert_to_nullable_dtype=True,
+                dtype_if_all_nat=None,
+            )
+            result = ensure_wrapped_if_datetimelike(result)
+            if isinstance(result, np.ndarray):
+                if len(result) == 0 and not was_ndarray:
+                    # e.g. empty list
+                    return FloatingArray._from_sequence(data, dtype="Float64")
+                return NumpyExtensionArray._from_sequence(
+                    data, dtype=result.dtype, copy=copy
+                )
+            if result is data and copy:
+                return result.copy()
+            return result
+
+        data = cast(np.ndarray, data)
+        result = ensure_wrapped_if_datetimelike(data)
+        if result is not data:
+            result = cast("DatetimeArray | TimedeltaArray", result)
+            if copy and result.dtype == data.dtype:
+                return result.copy()
+            return result
+
+        if data.dtype.kind in "SU":
             # StringArray/ArrowStringArray depending on pd.options.mode.string_storage
             dtype = StringDtype()
             cls = dtype.construct_array_type()
             return cls._from_sequence(data, dtype=dtype, copy=copy)
 
-        elif inferred_dtype == "integer":
+        elif data.dtype.kind in "iu":
             return IntegerArray._from_sequence(data, copy=copy)
-        elif inferred_dtype == "empty" and not hasattr(data, "dtype") and not len(data):
-            return FloatingArray._from_sequence(data, copy=copy)
-        elif (
-            inferred_dtype in ("floating", "mixed-integer-float")
-            and getattr(data, "dtype", None) != np.float16
-        ):
+        elif data.dtype.kind == "f":
             # GH#44715 Exclude np.float16 bc FloatingArray does not support it;
             #  we will fall back to NumpyExtensionArray.
+            if data.dtype == np.float16:
+                return NumpyExtensionArray._from_sequence(
+                    data, dtype=data.dtype, copy=copy
+                )
             return FloatingArray._from_sequence(data, copy=copy)
 
-        elif inferred_dtype == "boolean":
+        elif data.dtype.kind == "b":
             return BooleanArray._from_sequence(data, dtype="boolean", copy=copy)
+        else:
+            # e.g. complex
+            return NumpyExtensionArray._from_sequence(data, dtype=data.dtype, copy=copy)
 
     # Pandas overrides NumPy for
     #   1. datetime64[ns,us,ms,s]
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
index 50dafb5dbbb06..857509e18fa8e 100644
--- a/pandas/tests/arrays/test_array.py
+++ b/pandas/tests/arrays/test_array.py
@@ -220,6 +220,14 @@ def test_dt64_array(dtype_unit):
             .construct_array_type()
             ._from_sequence(["a", None], dtype=pd.StringDtype()),
         ),
+        (
+            # numpy array with string dtype
+            np.array(["a", "b"], dtype=str),
+            None,
+            pd.StringDtype()
+            .construct_array_type()
+            ._from_sequence(["a", "b"], dtype=pd.StringDtype()),
+        ),
         # Boolean
         (
             [True, None],
@@ -247,6 +255,14 @@ def test_dt64_array(dtype_unit):
             "category",
             pd.Categorical([pd.Period("2000", "D"), pd.Period("2001", "D")]),
         ),
+        # Complex
+        (
+            np.array([complex(1), complex(2)], dtype=np.complex128),
+            None,
+            NumpyExtensionArray(
+                np.array([complex(1), complex(2)], dtype=np.complex128)
+            ),
+        ),
     ],
 )
 def test_array(data, dtype, expected):
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 668e7192c0e52..f4282c9c7ac3a 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -936,9 +936,9 @@ def test_maybe_convert_objects_bool_nan(self):
     def test_maybe_convert_objects_nullable_boolean(self):
         # GH50047
         arr = np.array([True, False], dtype=object)
-        exp = np.array([True, False])
+        exp = BooleanArray._from_sequence([True, False], dtype="boolean")
         out = lib.maybe_convert_objects(arr, convert_to_nullable_dtype=True)
-        tm.assert_numpy_array_equal(out, exp)
+        tm.assert_extension_array_equal(out, exp)
 
         arr = np.array([True, False, pd.NaT], dtype=object)
         exp = np.array([True, False, pd.NaT], dtype=object)

From 2c8c0e2210dcf57875a9b991cf68fcd082271446 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 25 Apr 2024 22:43:16 +0530
Subject: [PATCH 042/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.item (#58400)

* DOC: add SA01 to pandas.Index.item

* DOC: remove pandas.Index.item

* DOC: remove pandas.Series.item

---------

Co-authored-by: aBiR1D <abirdas2048@gmail.com>
---
 ci/code_checks.sh   | 2 --
 pandas/core/base.py | 5 +++++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index bf7423dfe5825..570ea1272758a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -134,7 +134,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.inferred_type SA01" \
         -i "pandas.Index.insert PR07,RT03,SA01" \
         -i "pandas.Index.intersection PR07,RT03,SA01" \
-        -i "pandas.Index.item SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.memory_usage RT03" \
         -i "pandas.Index.names GL08" \
@@ -288,7 +287,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.is_monotonic_decreasing SA01" \
         -i "pandas.Series.is_monotonic_increasing SA01" \
         -i "pandas.Series.is_unique SA01" \
-        -i "pandas.Series.item SA01" \
         -i "pandas.Series.kurt RT03,SA01" \
         -i "pandas.Series.kurtosis RT03,SA01" \
         -i "pandas.Series.le PR07,SA01" \
diff --git a/pandas/core/base.py b/pandas/core/base.py
index d716a9ffb7bcc..ab27248308d74 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -398,6 +398,11 @@ def item(self):
         ValueError
             If the data is not length = 1.
 
+        See Also
+        --------
+        Index.values : Returns an array representing the data in the Index.
+        Series.head : Returns the first `n` rows.
+
         Examples
         --------
         >>> s = pd.Series([1])

From 8a9325fa6343f01fd3c9795283a84a160a52643d Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 25 Apr 2024 22:48:20 +0530
Subject: [PATCH 043/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.fillna (#58417)

* DOC: add RT03 to pandas.Index.fillna

* DOC: remove pandas.Index.fillna
---
 ci/code_checks.sh           | 1 -
 pandas/core/indexes/base.py | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 570ea1272758a..51745b208c786 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -124,7 +124,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.dropna RT03,SA01" \
         -i "pandas.Index.duplicated RT03" \
         -i "pandas.Index.empty GL08" \
-        -i "pandas.Index.fillna RT03" \
         -i "pandas.Index.get_indexer PR07,SA01" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2bb0aedb8bd84..ffc228d57a95b 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2578,6 +2578,7 @@ def fillna(self, value):
         Returns
         -------
         Index
+           NA/NaN values replaced with `value`.
 
         See Also
         --------

From a0977f5b5d9614441b908409272eb97e211332ec Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 25 Apr 2024 22:48:53 +0530
Subject: [PATCH 044/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeIndex.tzconvert (#58416)

* DOC: remove pandas.DatetimeIndex.tz_convert

* DOC: add RT03 to pandas.DatetimeIndex.tz_convert

* DOC: removed RT03 from pandas.Series.dt.tz_convert
---
 ci/code_checks.sh               | 3 +--
 pandas/core/arrays/datetimes.py | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 51745b208c786..e2d125ad1fc68 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -109,7 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeIndex.std PR01,RT03" \
         -i "pandas.DatetimeIndex.to_period RT03" \
         -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \
-        -i "pandas.DatetimeIndex.tz_convert RT03" \
         -i "pandas.DatetimeTZDtype SA01" \
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.Grouper PR02" \
@@ -272,7 +271,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.strftime PR01,PR02" \
         -i "pandas.Series.dt.to_period PR01,PR02,RT03" \
         -i "pandas.Series.dt.total_seconds PR01" \
-        -i "pandas.Series.dt.tz_convert PR01,PR02,RT03" \
+        -i "pandas.Series.dt.tz_convert PR01,PR02" \
         -i "pandas.Series.dt.tz_localize PR01,PR02" \
         -i "pandas.Series.dt.unit GL08" \
         -i "pandas.Series.dtype SA01" \
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 25c7f926d19a8..106064ade8344 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -867,6 +867,7 @@ def tz_convert(self, tz) -> Self:
         Returns
         -------
         Array or Index
+            Datetme Array/Index with target `tz`.
 
         Raises
         ------

From 12e47e96a81d65d3a781363b49d05787a5572d58 Mon Sep 17 00:00:00 2001
From: Pascal Corpet <pcorpet@users.noreply.github.com>
Date: Thu, 25 Apr 2024 19:51:33 +0200
Subject: [PATCH 045/100] [Typing] Enhance the WriteExcelBuffer protocol to be
 compatible with io.BinaryIO (#58422)

TYP: Enhance the WriteExcelBuffer protocol to be compatible with io.BinaryIO
---
 pandas/_typing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 172b30c59fc13..ef68018f2721a 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -314,7 +314,7 @@ def readline(self) -> bytes: ...
 
 
 class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
-    def truncate(self, size: int | None = ...) -> int: ...
+    def truncate(self, size: int | None = ..., /) -> int: ...
 
 
 class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol):

From cbbe3a26b4dbcebff5e68f361a46bc0f2610b2ff Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Thu, 25 Apr 2024 16:18:00 -0400
Subject: [PATCH 046/100] DOC: Fix DataFrame.reorder_levels SA01 error (#58431)

---
 ci/code_checks.sh    | 1 -
 pandas/core/frame.py | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index e2d125ad1fc68..3286cb74c3119 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -92,7 +92,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.plot PR02,SA01" \
         -i "pandas.DataFrame.prod RT03" \
         -i "pandas.DataFrame.product RT03" \
-        -i "pandas.DataFrame.reorder_levels SA01" \
         -i "pandas.DataFrame.sem PR01,RT03,SA01" \
         -i "pandas.DataFrame.skew RT03,SA01" \
         -i "pandas.DataFrame.sparse PR01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e8a0e37b70145..618218a70b557 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7696,6 +7696,10 @@ def reorder_levels(self, order: Sequence[int | str], axis: Axis = 0) -> DataFram
         DataFrame
             DataFrame with indices or columns with reordered levels.
 
+        See Also
+        --------
+            DataFrame.swaplevel : Swap levels i and j in a MultiIndex.
+
         Examples
         --------
         >>> data = {

From 926a9c35fc8ae448be5dea0239ea1da1013a043a Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Thu, 25 Apr 2024 16:19:00 -0400
Subject: [PATCH 047/100] DOC: Fix RT03 errors for DataFrame.infer_objects,
 DataFrame.hist, DataFrame.to_parquet (#58429)

* Fix RT03 errors

* Fix RT03 errors
---
 ci/code_checks.sh        | 4 ----
 pandas/core/frame.py     | 3 +++
 pandas/core/generic.py   | 1 +
 pandas/plotting/_core.py | 1 +
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 3286cb74c3119..44017c575a516 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -81,8 +81,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.CategoricalIndex.ordered SA01" \
         -i "pandas.DataFrame.__dataframe__ SA01" \
         -i "pandas.DataFrame.at_time PR01" \
-        -i "pandas.DataFrame.hist RT03" \
-        -i "pandas.DataFrame.infer_objects RT03" \
         -i "pandas.DataFrame.kurt RT03,SA01" \
         -i "pandas.DataFrame.kurtosis RT03,SA01" \
         -i "pandas.DataFrame.max RT03" \
@@ -99,7 +97,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.sum RT03" \
         -i "pandas.DataFrame.swaplevel SA01" \
         -i "pandas.DataFrame.to_markdown SA01" \
-        -i "pandas.DataFrame.to_parquet RT03" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.DatetimeIndex.freqstr SA01" \
         -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
@@ -280,7 +277,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.ge PR07,SA01" \
         -i "pandas.Series.gt PR07,SA01" \
         -i "pandas.Series.hasnans SA01" \
-        -i "pandas.Series.infer_objects RT03" \
         -i "pandas.Series.is_monotonic_decreasing SA01" \
         -i "pandas.Series.is_monotonic_increasing SA01" \
         -i "pandas.Series.is_unique SA01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 618218a70b557..9fbbc2c08efaa 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2876,6 +2876,9 @@ def to_parquet(
         Returns
         -------
         bytes if no path argument is provided else None
+            Returns the DataFrame converted to the binary parquet format as bytes if no
+            path argument. Returns None and writes the DataFrame to the specified
+            location in the Parquet format if the path argument is provided.
 
         See Also
         --------
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a7f155ec93524..121f49cb7d1cf 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6579,6 +6579,7 @@ def infer_objects(self, copy: bool | lib.NoDefault = lib.no_default) -> Self:
         Returns
         -------
         same type as input object
+            Returns an object of the same type as the input object.
 
         See Also
         --------
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 60bb45d3ac1dc..ea5daf02b7252 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -233,6 +233,7 @@ def hist_frame(
     Returns
     -------
     matplotlib.Axes or numpy.ndarray of them
+        Returns a AxesSubplot object a numpy array of AxesSubplot objects.
 
     See Also
     --------

From 1fec924f9fb4096e80c9a732a62686a4ec275d8c Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 26 Apr 2024 01:49:55 +0530
Subject: [PATCH 048/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeIndex.indexer_between_time (#58415)

* DOC: add RT03 to pandas.DatetimeIndex.indexer_between_time

* DOC: remove pandas.DatetimeIndex.indexer_between_time
---
 ci/code_checks.sh                | 1 -
 pandas/core/indexes/datetimes.py | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 44017c575a516..740814151aaf4 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -100,7 +100,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.DatetimeIndex.freqstr SA01" \
         -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
-        -i "pandas.DatetimeIndex.indexer_between_time RT03" \
         -i "pandas.DatetimeIndex.snap PR01,RT03" \
         -i "pandas.DatetimeIndex.std PR01,RT03" \
         -i "pandas.DatetimeIndex.to_period RT03" \
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 6d5f32774f485..951455b627fbd 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -759,6 +759,7 @@ def indexer_between_time(
         Returns
         -------
         np.ndarray[np.intp]
+            Index locations of values between particular times of day.
 
         See Also
         --------

From 114845c952c3d3405c897b4566b584fec94373fe Mon Sep 17 00:00:00 2001
From: William Andrea <22385371+wjandrea@users.noreply.github.com>
Date: Thu, 25 Apr 2024 16:21:30 -0400
Subject: [PATCH 049/100] DOC: Fix "versionadded" for case_when (#58426)

Fix "versionadded" for case_when

Tag was on parameter instead of function itself.
---
 pandas/core/series.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index a72eb8e261e65..c1920312489c9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -5359,6 +5359,8 @@ def case_when(
         """
         Replace values where the conditions are True.
 
+        .. versionadded:: 2.2.0
+
         Parameters
         ----------
         caselist : A list of tuples of conditions and expected replacements
@@ -5376,8 +5378,6 @@ def case_when(
             must not change the input Series
             (though pandas doesn`t check it).
 
-            .. versionadded:: 2.2.0
-
         Returns
         -------
         Series

From 8f33ae0219d9c7b1260745d6090fd46a545e4fc4 Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Thu, 25 Apr 2024 16:23:47 -0400
Subject: [PATCH 050/100] DOC: fixing SA01 error for Index: T and empty
 (#58430)

* DOC: fixing SA01 error for Index: T and empty

* fixing EXPECTED TO FAIL, BUT NOT FAILING error
---
 ci/code_checks.sh   |  4 ----
 pandas/core/base.py | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 740814151aaf4..49089e903c8ba 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -108,7 +108,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
-        -i "pandas.Index.T SA01" \
         -i "pandas.Index.append PR07,RT03,SA01" \
         -i "pandas.Index.copy PR07,SA01" \
         -i "pandas.Index.difference PR07,RT03,SA01" \
@@ -117,7 +116,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.droplevel RT03,SA01" \
         -i "pandas.Index.dropna RT03,SA01" \
         -i "pandas.Index.duplicated RT03" \
-        -i "pandas.Index.empty GL08" \
         -i "pandas.Index.get_indexer PR07,SA01" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
@@ -229,7 +227,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.RangeIndex.step SA01" \
         -i "pandas.RangeIndex.stop SA01" \
         -i "pandas.Series SA01" \
-        -i "pandas.Series.T SA01" \
         -i "pandas.Series.__iter__ RT03,SA01" \
         -i "pandas.Series.add PR07" \
         -i "pandas.Series.at_time PR01" \
@@ -270,7 +267,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.tz_localize PR01,PR02" \
         -i "pandas.Series.dt.unit GL08" \
         -i "pandas.Series.dtype SA01" \
-        -i "pandas.Series.empty GL08" \
         -i "pandas.Series.eq PR07,SA01" \
         -i "pandas.Series.floordiv PR07" \
         -i "pandas.Series.ge PR07,SA01" \
diff --git a/pandas/core/base.py b/pandas/core/base.py
index ab27248308d74..72d8c1b837398 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -309,6 +309,10 @@ def transpose(self, *args, **kwargs) -> Self:
         doc="""
         Return the transpose, which is by definition self.
 
+        See Also
+        --------
+        Index : Immutable sequence used for indexing and alignment.
+
         Examples
         --------
         For Series:
@@ -691,6 +695,40 @@ def to_numpy(
     @final
     @property
     def empty(self) -> bool:
+        """
+        Indicator whether Index is empty.
+
+        Returns
+        -------
+        bool
+            If Index is empty, return True, if not return False.
+
+        See Also
+        --------
+        Index.size : Return the number of elements in the underlying data.
+
+        Examples
+        --------
+        >>> idx_empty = pd.Index([1, 2, 3])
+        >>> idx_empty
+        Index([1, 2, 3], dtype='int64')
+        >>> idx_empty.empty
+        False
+
+        >>> idx_empty = pd.Index([])
+        >>> idx_empty
+        Index([], dtype='object')
+        >>> idx_empty.empty
+        True
+
+        If we only have NaNs in our DataFrame, it is not considered empty!
+
+        >>> idx_empty = pd.Index([np.nan, np.nan])
+        >>> idx_empty
+        Index([nan, nan], dtype='float64')
+        >>> idx_empty.empty
+        False
+        """
         return not self.size
 
     @doc(op="max", oppose="min", value="largest")

From a149abd4d71ac07975b6e849a219c1db676eeceb Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 26 Apr 2024 01:54:39 +0530
Subject: [PATCH 051/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeTZDtype.tz (#58401)

* DOC: add SA01 to pandas.DatetimeTZDtype.tz

* DOC: remove pandas.DatetimeTZDtype.tz
---
 ci/code_checks.sh            | 1 -
 pandas/core/dtypes/dtypes.py | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 49089e903c8ba..c1d60c4d9900a 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -105,7 +105,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeIndex.to_period RT03" \
         -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \
         -i "pandas.DatetimeTZDtype SA01" \
-        -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.append PR07,RT03,SA01" \
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 0a97a0d03c22a..5ff7ca33d18bd 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -811,6 +811,10 @@ def tz(self) -> tzinfo:
         """
         The timezone.
 
+        See Also
+        --------
+        DatetimeTZDtype.unit : Retrieves precision of the datetime data.
+
         Examples
         --------
         >>> from zoneinfo import ZoneInfo

From 39363cfe531648a35b806d187e1fb3a39a0c0203 Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Thu, 25 Apr 2024 16:26:56 -0400
Subject: [PATCH 052/100] DOC: ficing RT03 errors for Index: drop_duplicates
 and memory_usage (#58434)

---
 ci/code_checks.sh           | 2 --
 pandas/core/base.py         | 1 +
 pandas/core/indexes/base.py | 1 +
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index c1d60c4d9900a..b912a40e6d04e 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -111,7 +111,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.copy PR07,SA01" \
         -i "pandas.Index.difference PR07,RT03,SA01" \
         -i "pandas.Index.drop PR07,SA01" \
-        -i "pandas.Index.drop_duplicates RT03" \
         -i "pandas.Index.droplevel RT03,SA01" \
         -i "pandas.Index.dropna RT03,SA01" \
         -i "pandas.Index.duplicated RT03" \
@@ -125,7 +124,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.insert PR07,RT03,SA01" \
         -i "pandas.Index.intersection PR07,RT03,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
-        -i "pandas.Index.memory_usage RT03" \
         -i "pandas.Index.names GL08" \
         -i "pandas.Index.nunique RT03" \
         -i "pandas.Index.putmask PR01,RT03" \
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 72d8c1b837398..f535f0c55415a 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1170,6 +1170,7 @@ def _memory_usage(self, deep: bool = False) -> int:
         Returns
         -------
         bytes used
+            Returns memory usage of the values in the Index in bytes.
 
         See Also
         --------
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ffc228d57a95b..ace082fba609a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2684,6 +2684,7 @@ def drop_duplicates(self, *, keep: DropKeep = "first") -> Self:
         Returns
         -------
         Index
+            A new Index object with the duplicate values removed.
 
         See Also
         --------

From 7c836ed2ecaec55b788aedf053b74ee2a84685da Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 26 Apr 2024 01:59:26 +0530
Subject: [PATCH 053/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeIndex.freqstr (#58309)

* DOC: add SA01 to pandas.DatetimeIndex.freqstr

* DOC: remove pandas.DatetimeIndex.freqstr

* DOC: removed pandas.PeriodIndex.freqstr
---
 ci/code_checks.sh                  | 2 --
 pandas/core/arrays/datetimelike.py | 5 +++++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index b912a40e6d04e..9aae477ca1af3 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -98,7 +98,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.swaplevel SA01" \
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
-        -i "pandas.DatetimeIndex.freqstr SA01" \
         -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
         -i "pandas.DatetimeIndex.snap PR01,RT03" \
         -i "pandas.DatetimeIndex.std PR01,RT03" \
@@ -203,7 +202,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.PeriodIndex.dayofyear SA01" \
         -i "pandas.PeriodIndex.days_in_month SA01" \
         -i "pandas.PeriodIndex.daysinmonth SA01" \
-        -i "pandas.PeriodIndex.freqstr SA01" \
         -i "pandas.PeriodIndex.from_fields PR07,SA01" \
         -i "pandas.PeriodIndex.from_ordinals SA01" \
         -i "pandas.PeriodIndex.hour SA01" \
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index ff8b16b3361ee..ab17ae43215d2 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -875,6 +875,11 @@ def freqstr(self) -> str | None:
         """
         Return the frequency object as a string if it's set, otherwise None.
 
+        See Also
+        --------
+        DatetimeIndex.inferred_freq : Returns a string representing a frequency
+            generated by infer_freq.
+
         Examples
         --------
         For DatetimeIndex:

From 87b5a827c6178216732057e866095dd1eb99f8c3 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 26 Apr 2024 23:23:26 +0530
Subject: [PATCH 054/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeTZDtype (#58402)

* DOC: add SA01 to pandas.DatetimeTZDtype

* DOC: remove pandas.DatetimeTZDtype

* DOC: add .

* DOC: delete tz and tz_convert
---
 ci/code_checks.sh            | 1 -
 pandas/core/dtypes/dtypes.py | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 9aae477ca1af3..2ae74cfbe6e2e 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -103,7 +103,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeIndex.std PR01,RT03" \
         -i "pandas.DatetimeIndex.to_period RT03" \
         -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \
-        -i "pandas.DatetimeTZDtype SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.append PR07,RT03,SA01" \
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 5ff7ca33d18bd..778b6bd6f3f18 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -717,6 +717,11 @@ class DatetimeTZDtype(PandasExtensionDtype):
     ZoneInfoNotFoundError
         When the requested timezone cannot be found.
 
+    See Also
+    --------
+    numpy.datetime64 : Numpy data type for datetime.
+    datetime.datetime : Python datetime object.
+
     Examples
     --------
     >>> from zoneinfo import ZoneInfo

From 362278a1c4a6022b57be73d7d73a293c1c0abd76 Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Fri, 26 Apr 2024 13:54:25 -0400
Subject: [PATCH 055/100] DOC: Fix Index.inferred type SA01 and
 Index.slice_locs RT03 errors (#58435)

Fix Index.inferred type SA01 and Index.slice_locs RT03 errors
---
 ci/code_checks.sh           | 2 --
 pandas/core/indexes/base.py | 6 ++++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2ae74cfbe6e2e..f7eb16b4a85b5 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -118,7 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
         -i "pandas.Index.get_slice_bound PR07" \
         -i "pandas.Index.identical PR01,SA01" \
-        -i "pandas.Index.inferred_type SA01" \
         -i "pandas.Index.insert PR07,RT03,SA01" \
         -i "pandas.Index.intersection PR07,RT03,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
@@ -128,7 +127,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.ravel PR01,RT03" \
         -i "pandas.Index.reindex PR07" \
         -i "pandas.Index.slice_indexer PR07,RT03,SA01" \
-        -i "pandas.Index.slice_locs RT03" \
         -i "pandas.Index.str PR01,SA01" \
         -i "pandas.Index.symmetric_difference PR07,RT03,SA01" \
         -i "pandas.Index.take PR01,PR07" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ace082fba609a..61ba2fc7088fd 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2360,6 +2360,10 @@ def inferred_type(self) -> str_t:
         """
         Return a string of the type inferred from the values.
 
+        See Also
+        --------
+        Index.dtype : Return the dtype object of the underlying data.
+
         Examples
         --------
         >>> idx = pd.Index([1, 2, 3])
@@ -6471,6 +6475,8 @@ def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
         Returns
         -------
         tuple[int, int]
+            Returns a tuple of two integers representing the slice locations for the
+            input labels within the index.
 
         See Also
         --------

From 4f35184ac19d942ad1fef9f70ef860d5f6c0ff81 Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Fri, 26 Apr 2024 13:56:19 -0400
Subject: [PATCH 056/100] DOC: Fix RT03 and SA01 errors for Index.droplevel,
 Index.dropna (#58433)

* Fix RT03 and SA01 errors for Index.droplevel, Index.dropna

* Remove line from code_checks.sh
---
 ci/code_checks.sh           |  3 ---
 pandas/core/indexes/base.py | 12 ++++++++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index f7eb16b4a85b5..08dedb1b13a66 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -109,8 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.copy PR07,SA01" \
         -i "pandas.Index.difference PR07,RT03,SA01" \
         -i "pandas.Index.drop PR07,SA01" \
-        -i "pandas.Index.droplevel RT03,SA01" \
-        -i "pandas.Index.dropna RT03,SA01" \
         -i "pandas.Index.duplicated RT03" \
         -i "pandas.Index.get_indexer PR07,SA01" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
@@ -158,7 +156,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.MultiIndex.append PR07,SA01" \
         -i "pandas.MultiIndex.copy PR07,RT03,SA01" \
         -i "pandas.MultiIndex.drop PR07,RT03,SA01" \
-        -i "pandas.MultiIndex.droplevel RT03,SA01" \
         -i "pandas.MultiIndex.dtypes SA01" \
         -i "pandas.MultiIndex.get_indexer PR07,SA01" \
         -i "pandas.MultiIndex.get_level_values SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 61ba2fc7088fd..ebdaaf4be8419 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2093,6 +2093,12 @@ def droplevel(self, level: IndexLabel = 0):
         Returns
         -------
         Index or MultiIndex
+            Returns an Index or MultiIndex object, depending on the resulting index
+            after removing the requested level(s).
+
+        See Also
+        --------
+        Index.dropna : Return Index without NA/NaN values.
 
         Examples
         --------
@@ -2619,6 +2625,12 @@ def dropna(self, how: AnyAll = "any") -> Self:
         Returns
         -------
         Index
+            Returns an Index object after removing NA/NaN values.
+
+        See Also
+        --------
+        Index.fillna : Fill NA/NaN values with the specified value.
+        Index.isna : Detect missing values.
 
         Examples
         --------

From 13771ab411b37df9545b3b6cb16dc776a825eca1 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Fri, 26 Apr 2024 23:32:05 +0530
Subject: [PATCH 057/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeIndex.std (#58439)

* DOC: add PR01,RT03 for pandas.DatetimeIndex.std

* DOC: remove PR01,RT03 for pandas.DatetimeIndex.std
---
 ci/code_checks.sh               |  1 -
 pandas/core/arrays/datetimes.py | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 08dedb1b13a66..2639a7b25f389 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -100,7 +100,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
         -i "pandas.DatetimeIndex.snap PR01,RT03" \
-        -i "pandas.DatetimeIndex.std PR01,RT03" \
         -i "pandas.DatetimeIndex.to_period RT03" \
         -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \
         -i "pandas.Grouper PR02" \
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 106064ade8344..0f59d62339bf2 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -2248,9 +2248,25 @@ def std(
         axis : int, optional
             Axis for the function to be applied on. For :class:`pandas.Series`
             this parameter is unused and defaults to ``None``.
+        dtype : dtype, optional, default None
+            Type to use in computing the standard deviation. For arrays of
+            integer type the default is float64, for arrays of float types
+            it is the same as the array type.
+        out : ndarray, optional, default None
+            Alternative output array in which to place the result. It must have
+            the same shape as the expected output but the type (of the
+            calculated values) will be cast if necessary.
         ddof : int, default 1
             Degrees of Freedom. The divisor used in calculations is `N - ddof`,
             where `N` represents the number of elements.
+        keepdims : bool, optional
+            If this is set to True, the axes which are reduced are left in the
+            result as dimensions with size one. With this option, the result
+            will broadcast correctly against the input array. If the default
+            value is passed, then keepdims will not be passed through to the
+            std method of sub-classes of ndarray, however any non-default value
+            will be. If the sub-class method does not implement keepdims any
+            exceptions will be raised.
         skipna : bool, default True
             Exclude NA/null values. If an entire row/column is ``NA``, the result
             will be ``NA``.
@@ -2258,6 +2274,7 @@ def std(
         Returns
         -------
         Timedelta
+            Standard deviation over requested axis.
 
         See Also
         --------

From a1fc8e8147efb0c7d7e10e674c3ee383b14f2d43 Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Fri, 26 Apr 2024 14:03:47 -0400
Subject: [PATCH 058/100] DOC: fix PR07 and SA01 issue for Index: copy and
 get_slice_bound (#58443)

* DOC: fix PR07 and SA01 issue for Index: copy and get_slice_bound

* ficing line to long error
---
 ci/code_checks.sh           |  2 --
 pandas/core/indexes/base.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2639a7b25f389..26c8ae1298630 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -105,7 +105,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.append PR07,RT03,SA01" \
-        -i "pandas.Index.copy PR07,SA01" \
         -i "pandas.Index.difference PR07,RT03,SA01" \
         -i "pandas.Index.drop PR07,SA01" \
         -i "pandas.Index.duplicated RT03" \
@@ -113,7 +112,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.get_indexer_for PR01,SA01" \
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
-        -i "pandas.Index.get_slice_bound PR07" \
         -i "pandas.Index.identical PR01,SA01" \
         -i "pandas.Index.insert PR07,RT03,SA01" \
         -i "pandas.Index.intersection PR07,RT03,SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ebdaaf4be8419..9acab2642f6be 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1262,12 +1262,19 @@ def copy(
         name : Label, optional
             Set name for new object.
         deep : bool, default False
+            If True attempts to make a deep copy of the Index.
+                Else makes a shallow copy.
 
         Returns
         -------
         Index
             Index refer to new object which is a copy of this object.
 
+        See Also
+        --------
+        Index.delete: Make new Index with passed location(-s) deleted.
+        Index.drop: Make new Index with passed list of labels deleted.
+
         Notes
         -----
         In most cases, there should be no functional difference from using
@@ -6398,7 +6405,10 @@ def get_slice_bound(self, label, side: Literal["left", "right"]) -> int:
         Parameters
         ----------
         label : object
+            The label for which to calculate the slice bound.
         side : {'left', 'right'}
+            if 'left' return leftmost position of given label.
+            if 'right' return one-past-the-rightmost position of given label.
 
         Returns
         -------

From bd84be4aac6f84926ff00c594d5401da7a3dc068 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?=
 <6618166+twoertwein@users.noreply.github.com>
Date: Sun, 28 Apr 2024 23:36:30 -0400
Subject: [PATCH 059/100] TYP: misc return annotations (#58468)

---
 pandas/io/excel/_xlsxwriter.py |  2 +-
 pandas/io/pytables.py          | 39 ++++++++++++++++++++--------------
 pandas/util/_decorators.py     |  4 ++--
 3 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py
index 6eacac8c064fb..b2fd24a670300 100644
--- a/pandas/io/excel/_xlsxwriter.py
+++ b/pandas/io/excel/_xlsxwriter.py
@@ -93,7 +93,7 @@ class _XlsxStyler:
     }
 
     @classmethod
-    def convert(cls, style_dict, num_format_str=None):
+    def convert(cls, style_dict, num_format_str=None) -> dict[str, Any]:
         """
         converts a style_dict to an xlsxwriter format dict
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index d585c59dd5581..5d325397a81ae 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -22,6 +22,7 @@
     Final,
     Literal,
     cast,
+    overload,
 )
 import warnings
 
@@ -593,7 +594,7 @@ def __getitem__(self, key: str):
     def __setitem__(self, key: str, value) -> None:
         self.put(key, value)
 
-    def __delitem__(self, key: str) -> None:
+    def __delitem__(self, key: str) -> int | None:
         return self.remove(key)
 
     def __getattr__(self, name: str):
@@ -1203,7 +1204,7 @@ def put(
             dropna=dropna,
         )
 
-    def remove(self, key: str, where=None, start=None, stop=None) -> None:
+    def remove(self, key: str, where=None, start=None, stop=None) -> int | None:
         """
         Remove pandas object partially by specifying the where condition
 
@@ -1251,14 +1252,12 @@ def remove(self, key: str, where=None, start=None, stop=None) -> None:
         # remove the node
         if com.all_none(where, start, stop):
             s.group._f_remove(recursive=True)
+            return None
 
         # delete from the table
-        else:
-            if not s.is_table:
-                raise ValueError(
-                    "can only remove with where on objects written as tables"
-                )
-            return s.delete(where=where, start=start, stop=stop)
+        if not s.is_table:
+            raise ValueError("can only remove with where on objects written as tables")
+        return s.delete(where=where, start=start, stop=stop)
 
     def append(
         self,
@@ -2895,7 +2894,7 @@ def read(
         columns=None,
         start: int | None = None,
         stop: int | None = None,
-    ):
+    ) -> Series | DataFrame:
         raise NotImplementedError(
             "cannot read on an abstract storer: subclasses should implement"
         )
@@ -2907,7 +2906,7 @@ def write(self, obj, **kwargs) -> None:
 
     def delete(
         self, where=None, start: int | None = None, stop: int | None = None
-    ) -> None:
+    ) -> int | None:
         """
         support fully deleting the node in its entirety (only) - where
         specification must be None
@@ -3601,7 +3600,7 @@ def queryables(self) -> dict[str, Any]:
 
         return dict(d1 + d2 + d3)
 
-    def index_cols(self):
+    def index_cols(self) -> list[tuple[Any, Any]]:
         """return a list of my index cols"""
         # Note: each `i.cname` below is assured to be a str.
         return [(i.axis, i.cname) for i in self.index_axes]
@@ -3731,7 +3730,7 @@ def indexables(self):
         dc = set(self.data_columns)
         base_pos = len(_indexables)
 
-        def f(i, c):
+        def f(i, c: str) -> DataCol:
             assert isinstance(c, str)
             klass = DataCol
             if c in dc:
@@ -3897,7 +3896,7 @@ def get_object(cls, obj, transposed: bool):
         """return the data for this obj"""
         return obj
 
-    def validate_data_columns(self, data_columns, min_itemsize, non_index_axes):
+    def validate_data_columns(self, data_columns, min_itemsize, non_index_axes) -> list:
         """
         take the input data_columns and min_itemize and create a data
         columns spec
@@ -4590,7 +4589,9 @@ def write_data_chunk(
             self.table.append(rows)
             self.table.flush()
 
-    def delete(self, where=None, start: int | None = None, stop: int | None = None):
+    def delete(
+        self, where=None, start: int | None = None, stop: int | None = None
+    ) -> int | None:
         # delete all rows (and return the nrows)
         if where is None or not len(where):
             if start is None and stop is None:
@@ -4918,7 +4919,7 @@ def read(
         columns=None,
         start: int | None = None,
         stop: int | None = None,
-    ):
+    ) -> DataFrame:
         df = super().read(where=where, columns=columns, start=start, stop=stop)
         df = df.set_index(self.levels)
 
@@ -5379,7 +5380,13 @@ def __init__(
             if self.terms is not None:
                 self.condition, self.filter = self.terms.evaluate()
 
-    def generate(self, where):
+    @overload
+    def generate(self, where: dict | list | tuple | str) -> PyTablesExpr: ...
+
+    @overload
+    def generate(self, where: None) -> None: ...
+
+    def generate(self, where: dict | list | tuple | str | None) -> PyTablesExpr | None:
         """where can be a : dict,list,tuple,string"""
         if where is None:
             return None
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index d287fa72d552d..bdfb0b1cad8ae 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -505,7 +505,7 @@ def indent(text: str | None, indents: int = 1) -> str:
 ]
 
 
-def set_module(module):
+def set_module(module) -> Callable[[F], F]:
     """Private decorator for overriding __module__ on a function or class.
 
     Example usage::
@@ -518,7 +518,7 @@ def example():
         assert example.__module__ == "pandas"
     """
 
-    def decorator(func):
+    def decorator(func: F) -> F:
         if module is not None:
             func.__module__ = module
         return func

From 1593fb9f024156b0e69c8a82a0d472720d5c055e Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Sun, 28 Apr 2024 23:52:32 -0400
Subject: [PATCH 060/100] Fix errors for Index.drop, Index.reindex (#58454)

---
 ci/code_checks.sh           | 2 --
 pandas/core/indexes/base.py | 8 ++++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 26c8ae1298630..c06277d66f7a9 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -106,7 +106,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index PR07" \
         -i "pandas.Index.append PR07,RT03,SA01" \
         -i "pandas.Index.difference PR07,RT03,SA01" \
-        -i "pandas.Index.drop PR07,SA01" \
         -i "pandas.Index.duplicated RT03" \
         -i "pandas.Index.get_indexer PR07,SA01" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
@@ -120,7 +119,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.nunique RT03" \
         -i "pandas.Index.putmask PR01,RT03" \
         -i "pandas.Index.ravel PR01,RT03" \
-        -i "pandas.Index.reindex PR07" \
         -i "pandas.Index.slice_indexer PR07,RT03,SA01" \
         -i "pandas.Index.str PR01,SA01" \
         -i "pandas.Index.symmetric_difference PR07,RT03,SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 9acab2642f6be..8ea844d72326c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3953,6 +3953,7 @@ def reindex(
         Parameters
         ----------
         target : an iterable
+            An iterable containing the values to be used for creating the new index.
         method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
             * default: exact matches only.
             * pad / ffill: find the PREVIOUS index value if no exact match.
@@ -6686,6 +6687,8 @@ def drop(
         Parameters
         ----------
         labels : array-like or scalar
+            Array-like object or a scalar value, representing the labels to be removed
+            from the Index.
         errors : {'ignore', 'raise'}, default 'raise'
             If 'ignore', suppress error and existing labels are dropped.
 
@@ -6699,6 +6702,11 @@ def drop(
         KeyError
             If not all of the labels are found in the selected axis
 
+        See Also
+        --------
+        Index.dropna : Return Index without NA/NaN values.
+        Index.drop_duplicates : Return Index with duplicate values removed.
+
         Examples
         --------
         >>> idx = pd.Index(["a", "b", "c"])

From cf0014ad9f7bfccac3cfb87cb66556825dba0bea Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 29 Apr 2024 09:37:59 +0530
Subject: [PATCH 061/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeIndex.to_pydatetime (#58441)

* DOC: add RT03,SA01 for pandas.DatetimeIndex.to_pydatetime

* DOC: remove RT03,SA01 for pandas.DatetimeIndex.to_pydatetime
---
 ci/code_checks.sh               | 1 -
 pandas/core/arrays/datetimes.py | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index c06277d66f7a9..2b418d6655b0b 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -101,7 +101,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
         -i "pandas.DatetimeIndex.snap PR01,RT03" \
         -i "pandas.DatetimeIndex.to_period RT03" \
-        -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.append PR07,RT03,SA01" \
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 0f59d62339bf2..b5048973755bc 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1127,6 +1127,12 @@ def to_pydatetime(self) -> npt.NDArray[np.object_]:
         Returns
         -------
         numpy.ndarray
+            An ndarray of ``datetime.datetime`` objects.
+
+        See Also
+        --------
+        DatetimeIndex.to_julian_date : Converts Datetime Array to float64 ndarray
+            of Julian Dates.
 
         Examples
         --------

From a2bce66d04ed2addfb9782f0e824c60bc0b1b449 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 29 Apr 2024 05:28:46 -0700
Subject: [PATCH 062/100] REF: move MaskedArray subclass attributes to dtypes
 (#58423)

---
 pandas/_libs/lib.pyx           |  4 +--
 pandas/core/arrays/boolean.py  | 10 ++----
 pandas/core/arrays/floating.py | 10 ++----
 pandas/core/arrays/integer.py  | 10 ++----
 pandas/core/arrays/masked.py   | 60 +++++++++++++---------------------
 pandas/core/arrays/numeric.py  |  2 +-
 pandas/core/dtypes/dtypes.py   | 20 ++++++++++++
 7 files changed, 53 insertions(+), 63 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 5b6d83ba8e9ee..4fd68a1593e49 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2808,14 +2808,14 @@ def maybe_convert_objects(ndarray[object] objects,
             from pandas.core.arrays import IntegerArray
 
             # Set these values to 1 to be deterministic, match
-            #  IntegerArray._internal_fill_value
+            #  IntegerDtype._internal_fill_value
             result[mask] = 1
             result = IntegerArray(result, mask)
         elif result is floats and convert_to_nullable_dtype:
             from pandas.core.arrays import FloatingArray
 
             # Set these values to 1.0 to be deterministic, match
-            #  FloatingArray._internal_fill_value
+            #  FloatingDtype._internal_fill_value
             result[mask] = 1.0
             result = FloatingArray(result, mask)
 
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 813b10eef5e4b..a326925545045 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -68,6 +68,9 @@ class BooleanDtype(BaseMaskedDtype):
 
     name: ClassVar[str] = "boolean"
 
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = False
+
     # https://github.com/python/mypy/issues/4125
     # error: Signature of "type" incompatible with supertype "BaseMaskedDtype"
     @property
@@ -293,13 +296,6 @@ class BooleanArray(BaseMaskedArray):
     Length: 3, dtype: boolean
     """
 
-    # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value = False
-    # Fill values used for any/all
-    # Incompatible types in assignment (expression has type "bool", base class
-    # "BaseMaskedArray" defined the type as "<typing special form>")
-    _truthy_value = True  # type: ignore[assignment]
-    _falsey_value = False  # type: ignore[assignment]
     _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
     _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
 
diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
index 653e63e9d1e2d..b3fbf0f92c32d 100644
--- a/pandas/core/arrays/floating.py
+++ b/pandas/core/arrays/floating.py
@@ -23,6 +23,8 @@ class FloatingDtype(NumericDtype):
     The attributes name & type are set when these subclasses are created.
     """
 
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = np.nan
     _default_np_dtype = np.dtype(np.float64)
     _checker = is_float_dtype
 
@@ -113,14 +115,6 @@ class FloatingArray(NumericArray):
 
     _dtype_cls = FloatingDtype
 
-    # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value = np.nan
-    # Fill values used for any/all
-    # Incompatible types in assignment (expression has type "float", base class
-    # "BaseMaskedArray" defined the type as "<typing special form>")
-    _truthy_value = 1.0  # type: ignore[assignment]
-    _falsey_value = 0.0  # type: ignore[assignment]
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} data.
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index dc453f3e37c50..21a9b09227663 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -23,6 +23,8 @@ class IntegerDtype(NumericDtype):
     The attributes name & type are set when these subclasses are created.
     """
 
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = 1
     _default_np_dtype = np.dtype(np.int64)
     _checker = is_integer_dtype
 
@@ -128,14 +130,6 @@ class IntegerArray(NumericArray):
 
     _dtype_cls = IntegerDtype
 
-    # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value = 1
-    # Fill values used for any/all
-    # Incompatible types in assignment (expression has type "int", base class
-    # "BaseMaskedArray" defined the type as "<typing special form>")
-    _truthy_value = 1  # type: ignore[assignment]
-    _falsey_value = 0  # type: ignore[assignment]
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} integer data.
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 190888d281ea9..df794183f67d1 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -5,6 +5,7 @@
     Any,
     Callable,
     Literal,
+    cast,
     overload,
 )
 import warnings
@@ -16,22 +17,6 @@
     missing as libmissing,
 )
 from pandas._libs.tslibs import is_supported_dtype
-from pandas._typing import (
-    ArrayLike,
-    AstypeArg,
-    AxisInt,
-    DtypeObj,
-    FillnaOptions,
-    InterpolateOptions,
-    NpDtype,
-    PositionalIndexer,
-    Scalar,
-    ScalarIndexer,
-    Self,
-    SequenceIndexer,
-    Shape,
-    npt,
-)
 from pandas.compat import (
     IS64,
     is_platform_windows,
@@ -97,6 +82,20 @@
     from pandas._typing import (
         NumpySorter,
         NumpyValueArrayLike,
+        ArrayLike,
+        AstypeArg,
+        AxisInt,
+        DtypeObj,
+        FillnaOptions,
+        InterpolateOptions,
+        NpDtype,
+        PositionalIndexer,
+        Scalar,
+        ScalarIndexer,
+        Self,
+        SequenceIndexer,
+        Shape,
+        npt,
     )
     from pandas._libs.missing import NAType
     from pandas.core.arrays import FloatingArray
@@ -111,16 +110,10 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
     numpy based
     """
 
-    # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value: Scalar
     # our underlying data and mask are each ndarrays
     _data: np.ndarray
     _mask: npt.NDArray[np.bool_]
 
-    # Fill values used for any/all
-    _truthy_value = Scalar  # bool(_truthy_value) = True
-    _falsey_value = Scalar  # bool(_falsey_value) = False
-
     @classmethod
     def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
         result = BaseMaskedArray.__new__(cls)
@@ -155,8 +148,9 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
     @classmethod
     @doc(ExtensionArray._empty)
     def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self:
-        values = np.empty(shape, dtype=dtype.type)
-        values.fill(cls._internal_fill_value)
+        dtype = cast(BaseMaskedDtype, dtype)
+        values: np.ndarray = np.empty(shape, dtype=dtype.type)
+        values.fill(dtype._internal_fill_value)
         mask = np.ones(shape, dtype=bool)
         result = cls(values, mask)
         if not isinstance(result, cls) or dtype != result.dtype:
@@ -917,7 +911,9 @@ def take(
     ) -> Self:
         # we always fill with 1 internally
         # to avoid upcasting
-        data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
+        data_fill_value = (
+            self.dtype._internal_fill_value if isna(fill_value) else fill_value
+        )
         result = take(
             self._data,
             indexer,
@@ -1397,12 +1393,7 @@ def any(
         nv.validate_any((), kwargs)
 
         values = self._data.copy()
-        # error: Argument 3 to "putmask" has incompatible type "object";
-        # expected "Union[_SupportsArray[dtype[Any]],
-        # _NestedSequence[_SupportsArray[dtype[Any]]],
-        # bool, int, float, complex, str, bytes,
-        # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-        np.putmask(values, self._mask, self._falsey_value)  # type: ignore[arg-type]
+        np.putmask(values, self._mask, self.dtype._falsey_value)
         result = values.any()
         if skipna:
             return result
@@ -1490,12 +1481,7 @@ def all(
         nv.validate_all((), kwargs)
 
         values = self._data.copy()
-        # error: Argument 3 to "putmask" has incompatible type "object";
-        # expected "Union[_SupportsArray[dtype[Any]],
-        # _NestedSequence[_SupportsArray[dtype[Any]]],
-        # bool, int, float, complex, str, bytes,
-        # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-        np.putmask(values, self._mask, self._truthy_value)  # type: ignore[arg-type]
+        np.putmask(values, self._mask, self.dtype._truthy_value)
         result = values.all(axis=axis)
 
         if skipna:
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index fe7b32ec9652e..c5e9ed8698ffe 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -221,7 +221,7 @@ def _coerce_to_data_and_mask(
     # we copy as need to coerce here
     if mask.any():
         values = values.copy()
-        values[mask] = cls._internal_fill_value
+        values[mask] = dtype_cls._internal_fill_value
     if inferred_type in ("string", "unicode"):
         # casts from str are always safe since they raise
         # a ValueError if the str cannot be parsed into a float
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 778b6bd6f3f18..8c64a38bc1be3 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -79,6 +79,7 @@
         DtypeObj,
         IntervalClosedType,
         Ordered,
+        Scalar,
         Self,
         npt,
         type_t,
@@ -1551,6 +1552,25 @@ class BaseMaskedDtype(ExtensionDtype):
 
     base = None
     type: type
+    _internal_fill_value: Scalar
+
+    @property
+    def _truthy_value(self):
+        # Fill values used for 'any'
+        if self.kind == "f":
+            return 1.0
+        if self.kind in "iu":
+            return 1
+        return True
+
+    @property
+    def _falsey_value(self):
+        # Fill values used for 'all'
+        if self.kind == "f":
+            return 0.0
+        if self.kind in "iu":
+            return 0
+        return False
 
     @property
     def na_value(self) -> libmissing.NAType:

From 2a7ad2e274c751015f8daf33ccba551770d53b55 Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Mon, 29 Apr 2024 13:30:39 -0400
Subject: [PATCH 063/100] Fix PR07,RT03,SA01 errors for Index.insert,
 Index.intersection (#58456)

* Fix PR07,RT03,SA01 errors for Index.insert, Index.intersection

* Update pandas/core/indexes/base.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/core/indexes/base.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/core/indexes/base.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 ci/code_checks.sh           |  2 --
 pandas/core/indexes/base.py | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2b418d6655b0b..22f12ac0312d1 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -111,8 +111,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
         -i "pandas.Index.identical PR01,SA01" \
-        -i "pandas.Index.insert PR07,RT03,SA01" \
-        -i "pandas.Index.intersection PR07,RT03,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.names GL08" \
         -i "pandas.Index.nunique RT03" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 8ea844d72326c..f0ac8604ccd60 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3082,6 +3082,8 @@ def intersection(self, other, sort: bool = False):
         Parameters
         ----------
         other : Index or array-like
+            An Index or an array-like object containing elements to form the
+            intersection with the original Index.
         sort : True, False or None, default False
             Whether to sort the resulting index.
 
@@ -3093,6 +3095,14 @@ def intersection(self, other, sort: bool = False):
         Returns
         -------
         Index
+            Returns a new Index object with elements common to both the original Index
+            and the `other` Index.
+
+        See Also
+        --------
+        Index.union : Form the union of two Index objects.
+        Index.difference : Return a new Index with elements of index not in other.
+        Index.isin : Return a boolean array where the index values are in values.
 
         Examples
         --------
@@ -6625,11 +6635,19 @@ def insert(self, loc: int, item) -> Index:
         Parameters
         ----------
         loc : int
+            The integer location where the new item will be inserted.
         item : object
+            The new item to be inserted into the Index.
 
         Returns
         -------
         Index
+            Returns a new Index object resulting from inserting the specified item at
+            the specified location within the original Index.
+
+        See Also
+        --------
+        Index.append : Append a collection of Indexes together.
 
         Examples
         --------

From 6af69a0dd14ca9e8b9ba8bb027c73009f0ec3377 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 29 Apr 2024 23:07:09 +0530
Subject: [PATCH 064/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeIndex.to_period (#58440)

* DOC: add RT03 for pandas.DatetimeIndex.to_period

* DOC: remove RT03 for pandas.DatetimeIndex.to_period

* DOC: remove RT03 for pandas.Series.dt.to_period
---
 ci/code_checks.sh               | 3 +--
 pandas/core/arrays/datetimes.py | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 22f12ac0312d1..f22bfe85c5c81 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -100,7 +100,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
         -i "pandas.DatetimeIndex.snap PR01,RT03" \
-        -i "pandas.DatetimeIndex.to_period RT03" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.append PR07,RT03,SA01" \
@@ -242,7 +241,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt.round PR01,PR02" \
         -i "pandas.Series.dt.seconds SA01" \
         -i "pandas.Series.dt.strftime PR01,PR02" \
-        -i "pandas.Series.dt.to_period PR01,PR02,RT03" \
+        -i "pandas.Series.dt.to_period PR01,PR02" \
         -i "pandas.Series.dt.total_seconds PR01" \
         -i "pandas.Series.dt.tz_convert PR01,PR02" \
         -i "pandas.Series.dt.tz_localize PR01,PR02" \
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index b5048973755bc..8747f795bebd8 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1207,6 +1207,7 @@ def to_period(self, freq=None) -> PeriodArray:
         Returns
         -------
         PeriodArray/PeriodIndex
+            Immutable ndarray holding ordinal values at a particular frequency.
 
         Raises
         ------

From 95178690289e3c7278457e31aa289c9c88c77546 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Mon, 29 Apr 2024 23:21:38 +0530
Subject: [PATCH 065/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeIndex.indexer_at_time (#58476)

* DOC: add PR01,RT03 for pandas.DatetimeIndex.indexer_at_time

* DOC: remove pandas.DatetimeIndex.indexer_at_time
---
 ci/code_checks.sh                | 1 -
 pandas/core/indexes/datetimes.py | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index f22bfe85c5c81..58ecae66e1bcc 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -98,7 +98,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.swaplevel SA01" \
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
-        -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \
         -i "pandas.DatetimeIndex.snap PR01,RT03" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 951455b627fbd..742f66aa80728 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -703,10 +703,13 @@ def indexer_at_time(self, time, asof: bool = False) -> npt.NDArray[np.intp]:
             Time passed in either as object (datetime.time) or as string in
             appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",
             "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p").
+        asof : bool, default False
+            This parameter is currently not supported.
 
         Returns
         -------
         np.ndarray[np.intp]
+            Index locations of values at given `time` of day.
 
         See Also
         --------

From 3efe698611f43f5625694cf0d1d00422207eb810 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 29 Apr 2024 08:01:06 -1000
Subject: [PATCH 066/100] PERF: RangeIndex.value_counts/searchsorted/to_numpy
 (#58376)

* Add RangeIndex.value_counts,searchsorted,to_numpy

* Undo engine stuff

* Finish searchsorted, add wahtsnew

* Remove old to_numpy implementation

* Add whatsnew for to_numpy

* add whatsnew number

* Fix typing
---
 doc/source/whatsnew/v3.0.0.rst            |  3 ++
 pandas/core/base.py                       |  4 +-
 pandas/core/indexes/range.py              | 65 +++++++++++++++++++++++
 pandas/tests/indexes/ranges/test_range.py | 33 ++++++++++++
 4 files changed, 102 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index c77348b365370..517510760e9c1 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -340,6 +340,9 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`)
 - Performance improvement in :meth:`RangeIndex.insert` returning a :class:`RangeIndex` instead of a :class:`Index` when the :class:`RangeIndex` is empty. (:issue:`57833`)
 - Performance improvement in :meth:`RangeIndex.round` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57824`)
+- Performance improvement in :meth:`RangeIndex.searchsorted` (:issue:`58376`)
+- Performance improvement in :meth:`RangeIndex.to_numpy` when specifying an ``na_value`` (:issue:`58376`)
+- Performance improvement in :meth:`RangeIndex.value_counts` (:issue:`58376`)
 - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index f535f0c55415a..e54fac3da72a6 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -556,7 +556,6 @@ def array(self) -> ExtensionArray:
         """
         raise AbstractMethodError(self)
 
-    @final
     def to_numpy(
         self,
         dtype: npt.DTypeLike | None = None,
@@ -668,7 +667,7 @@ def to_numpy(
         )
 
         values = self._values
-        if fillna:
+        if fillna and self.hasnans:
             if not can_hold_element(values, na_value):
                 # if we can't hold the na_value asarray either makes a copy or we
                 # error before modifying values. The asarray later on thus won't make
@@ -943,7 +942,6 @@ def _map_values(self, mapper, na_action=None):
 
         return algorithms.map_array(arr, mapper, na_action=na_action)
 
-    @final
     def value_counts(
         self,
         normalize: bool = False,
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 0ba3c22093c69..bd9e8b84fd82a 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -57,9 +57,13 @@
         Dtype,
         JoinHow,
         NaPosition,
+        NumpySorter,
         Self,
         npt,
     )
+
+    from pandas import Series
+
 _empty_range = range(0)
 _dtype_int64 = np.dtype(np.int64)
 
@@ -1359,3 +1363,64 @@ def take(  # type: ignore[override]
                 taken += self.start
 
         return self._shallow_copy(taken, name=self.name)
+
+    def value_counts(
+        self,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        bins=None,
+        dropna: bool = True,
+    ) -> Series:
+        from pandas import Series
+
+        if bins is not None:
+            return super().value_counts(
+                normalize=normalize,
+                sort=sort,
+                ascending=ascending,
+                bins=bins,
+                dropna=dropna,
+            )
+        name = "proportion" if normalize else "count"
+        data: npt.NDArray[np.floating] | npt.NDArray[np.signedinteger] = np.ones(
+            len(self), dtype=np.int64
+        )
+        if normalize:
+            data = data / len(self)
+        return Series(data, index=self.copy(), name=name)
+
+    def searchsorted(  # type: ignore[override]
+        self,
+        value,
+        side: Literal["left", "right"] = "left",
+        sorter: NumpySorter | None = None,
+    ) -> npt.NDArray[np.intp] | np.intp:
+        if side not in {"left", "right"} or sorter is not None:
+            return super().searchsorted(value=value, side=side, sorter=sorter)
+
+        was_scalar = False
+        if is_scalar(value):
+            was_scalar = True
+            array_value = np.array([value])
+        else:
+            array_value = np.asarray(value)
+        if array_value.dtype.kind not in "iu":
+            return super().searchsorted(value=value, side=side, sorter=sorter)
+
+        if flip := (self.step < 0):
+            rng = self._range[::-1]
+            start = rng.start
+            step = rng.step
+            shift = side == "right"
+        else:
+            start = self.start
+            step = self.step
+            shift = side == "left"
+        result = (array_value - start - int(shift)) // step + 1
+        if flip:
+            result = len(self) - result
+        result = np.maximum(np.minimum(result, len(self)), 0)
+        if was_scalar:
+            return np.intp(result.item())
+        return result.astype(np.intp, copy=False)
diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py
index 727edb7ae30ad..1f9df30d60c11 100644
--- a/pandas/tests/indexes/ranges/test_range.py
+++ b/pandas/tests/indexes/ranges/test_range.py
@@ -874,3 +874,36 @@ def test_getitem_integers_return_index():
     result = RangeIndex(0, 10, 2, name="foo")[[0, 1, -1]]
     expected = Index([0, 2, 8], dtype="int64", name="foo")
     tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize("normalize", [True, False])
+@pytest.mark.parametrize(
+    "rng",
+    [
+        range(3),
+        range(0),
+        range(0, 3, 2),
+        range(3, -3, -2),
+    ],
+)
+def test_value_counts(sort, dropna, ascending, normalize, rng):
+    ri = RangeIndex(rng, name="A")
+    result = ri.value_counts(
+        normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
+    )
+    expected = Index(list(rng), name="A").value_counts(
+        normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
+    )
+    tm.assert_series_equal(result, expected, check_index_type=False)
+
+
+@pytest.mark.parametrize("side", ["left", "right"])
+@pytest.mark.parametrize("value", [0, -5, 5, -3, np.array([-5, -3, 0, 5])])
+def test_searchsorted(side, value):
+    ri = RangeIndex(-3, 3, 2)
+    result = ri.searchsorted(value=value, side=side)
+    expected = Index(list(ri)).searchsorted(value=value, side=side)
+    if isinstance(value, int):
+        assert result == expected
+    else:
+        tm.assert_numpy_array_equal(result, expected)

From 72d06124e1c0dfaac288c0efd7ab595f6d92c075 Mon Sep 17 00:00:00 2001
From: Abel Tavares <121238257+abeltavares@users.noreply.github.com>
Date: Mon, 29 Apr 2024 19:04:51 +0100
Subject: [PATCH 067/100] BUG: Series.plot(kind="pie") does not respect ylabel
 argument (#58254)

Co-authored-by: Abel Tavares <abel.tavares@ctw.bmwgroup.com>
---
 doc/source/whatsnew/v3.0.0.rst            | 1 +
 pandas/plotting/_matplotlib/core.py       | 3 ---
 pandas/tests/plotting/frame/test_frame.py | 2 +-
 pandas/tests/plotting/test_series.py      | 2 +-
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 517510760e9c1..a81fb584c8df9 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -42,6 +42,7 @@ Other enhancements
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
 - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
+- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 38a75e741d60e..fffeb9b82492f 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -2077,9 +2077,6 @@ def _make_plot(self, fig: Figure) -> None:
 
         for i, (label, y) in enumerate(self._iter_data(data=self.data)):
             ax = self._get_ax(i)
-            if label is not None:
-                label = pprint_thing(label)
-                ax.set_ylabel(label)
 
             kwds = self.kwds.copy()
 
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
index c30cb96fef252..adb56a40b0071 100644
--- a/pandas/tests/plotting/frame/test_frame.py
+++ b/pandas/tests/plotting/frame/test_frame.py
@@ -1629,7 +1629,7 @@ def test_pie_df_subplots(self):
         for ax in axes:
             _check_text_labels(ax.texts, df.index)
         for ax, ylabel in zip(axes, df.columns):
-            assert ax.get_ylabel() == ylabel
+            assert ax.get_ylabel() == ""
 
     def test_pie_df_labels_colors(self):
         df = DataFrame(
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index 9fbc20e10f5c1..54f09c7007330 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -378,7 +378,7 @@ def test_pie_series(self):
         )
         ax = _check_plot_works(series.plot.pie)
         _check_text_labels(ax.texts, series.index)
-        assert ax.get_ylabel() == "YLABEL"
+        assert ax.get_ylabel() == ""
 
     def test_pie_series_no_label(self):
         series = Series(

From d038da86c37e51fd104f00ce85fde7e620c31b1f Mon Sep 17 00:00:00 2001
From: Jason Mok <106209849+jasonmokk@users.noreply.github.com>
Date: Mon, 29 Apr 2024 13:49:51 -0500
Subject: [PATCH 068/100] TST: Add tests for #55431 (#58367)

* Add tests for #55431

* Fix inconsistent pandas namespace usage

* Fix inconsistent pandas namespace usage again

* Temp disable part of test potentialy due to known bug

* Remove unnecessary comments and adjust implementation

---------

Co-authored-by: Jason Mok <jasonmok@Jasons-Air-4.attlocal.net>
Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 pandas/tests/reshape/test_cut.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py
index 0811c69859c0d..340c5c449aea7 100644
--- a/pandas/tests/reshape/test_cut.py
+++ b/pandas/tests/reshape/test_cut.py
@@ -789,3 +789,17 @@ def test_cut_with_nullable_int64():
     result = cut(series, bins=bins)
 
     tm.assert_series_equal(result, expected)
+
+
+def test_cut_datetime_array_no_attributeerror():
+    # GH 55431
+    ser = Series(to_datetime(["2023-10-06 12:00:00+0000", "2023-10-07 12:00:00+0000"]))
+
+    result = cut(ser.array, bins=2)
+
+    categories = result.categories
+    expected = Categorical.from_codes([0, 1], categories=categories, ordered=True)
+
+    tm.assert_categorical_equal(
+        result, expected, check_dtype=True, check_category_order=True
+    )

From 4c6d9eb4b0037804204e63809e885e4f207b7894 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 30 Apr 2024 00:21:17 +0530
Subject: [PATCH 069/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DatetimeIndex.snap (#58477)

* DOC: add PR01,RT03 for pandas.DatetimeIndex.snap

* DOC: remove pandas.DatetimeIndex.snap
---
 ci/code_checks.sh                | 1 -
 pandas/core/indexes/datetimes.py | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 58ecae66e1bcc..ce53c9fca60e0 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -98,7 +98,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.swaplevel SA01" \
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
-        -i "pandas.DatetimeIndex.snap PR01,RT03" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.append PR07,RT03,SA01" \
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 742f66aa80728..78f04f57029b1 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -451,9 +451,17 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex:
         """
         Snap time stamps to nearest occurring frequency.
 
+        Parameters
+        ----------
+        freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'S'
+            Frequency strings can have multiples, e.g. '5h'. See
+            :ref:`here <timeseries.offset_aliases>` for a list of
+            frequency aliases.
+
         Returns
         -------
         DatetimeIndex
+            Time stamps to nearest occurring `freq`.
 
         See Also
         --------

From 4de300da14bd03da3bd759bebdfcc65570d68094 Mon Sep 17 00:00:00 2001
From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com>
Date: Mon, 29 Apr 2024 21:54:12 +0300
Subject: [PATCH 070/100] DOC: Update Categorical/CategoricalDtype methods'
 docstring to pass docstring validation (#58079)

* Improve docstring for some methods in categorical/categoricaldtype

* Remove cat.ordered

---------

Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com>
---
 ci/code_checks.sh                 | 10 ----------
 pandas/core/arrays/categorical.py | 28 ++++++++++++++++++++++++++++
 pandas/core/dtypes/dtypes.py      |  8 ++++++++
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index ce53c9fca60e0..1724fae98a6e5 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -70,15 +70,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         --format=actions \
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
-        -i "pandas.Categorical.__array__ SA01" \
-        -i "pandas.Categorical.codes SA01" \
-        -i "pandas.Categorical.dtype SA01" \
-        -i "pandas.Categorical.from_codes SA01" \
-        -i "pandas.Categorical.ordered SA01" \
-        -i "pandas.CategoricalDtype.categories SA01" \
-        -i "pandas.CategoricalDtype.ordered SA01" \
-        -i "pandas.CategoricalIndex.codes SA01" \
-        -i "pandas.CategoricalIndex.ordered SA01" \
         -i "pandas.DataFrame.__dataframe__ SA01" \
         -i "pandas.DataFrame.at_time PR01" \
         -i "pandas.DataFrame.kurt RT03,SA01" \
@@ -215,7 +206,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.cat.as_ordered PR01" \
         -i "pandas.Series.cat.as_unordered PR01" \
         -i "pandas.Series.cat.codes SA01" \
-        -i "pandas.Series.cat.ordered SA01" \
         -i "pandas.Series.cat.remove_categories PR01,PR02" \
         -i "pandas.Series.cat.remove_unused_categories PR01" \
         -i "pandas.Series.cat.rename_categories PR01,PR02" \
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6a3cf4590568c..11dea697d9b93 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -497,6 +497,11 @@ def dtype(self) -> CategoricalDtype:
         """
         The :class:`~pandas.api.types.CategoricalDtype` for this instance.
 
+        See Also
+        --------
+        astype : Cast argument to a specified dtype.
+        CategoricalDtype : Type for categorical data.
+
         Examples
         --------
         >>> cat = pd.Categorical(["a", "b"], ordered=True)
@@ -721,6 +726,11 @@ def from_codes(
         -------
         Categorical
 
+        See Also
+        --------
+        codes : The category codes of the categorical.
+        CategoricalIndex : An Index with an underlying ``Categorical``.
+
         Examples
         --------
         >>> dtype = pd.CategoricalDtype(["a", "b"], ordered=True)
@@ -810,6 +820,12 @@ def ordered(self) -> Ordered:
         """
         Whether the categories have an ordered relationship.
 
+        See Also
+        --------
+        set_ordered : Set the ordered attribute.
+        as_ordered : Set the Categorical to be ordered.
+        as_unordered : Set the Categorical to be unordered.
+
         Examples
         --------
         For :class:`pandas.Series`:
@@ -861,6 +877,11 @@ def codes(self) -> np.ndarray:
         ndarray[int]
             A non-writable view of the ``codes`` array.
 
+        See Also
+        --------
+        Categorical.from_codes : Make a Categorical from codes.
+        CategoricalIndex : An Index with an underlying ``Categorical``.
+
         Examples
         --------
         For :class:`pandas.Categorical`:
@@ -1641,6 +1662,9 @@ def __array__(
         """
         The numpy array interface.
 
+        Users should not call this directly. Rather, it is invoked by
+        :func:`numpy.array` and :func:`numpy.asarray`.
+
         Parameters
         ----------
         dtype : np.dtype or None
@@ -1656,6 +1680,10 @@ def __array__(
             if dtype==None (default), the same dtype as
             categorical.categories.dtype.
 
+        See Also
+        --------
+        numpy.asarray : Convert input to numpy.ndarray.
+
         Examples
         --------
 
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 8c64a38bc1be3..e52cbff451700 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -623,6 +623,10 @@ def categories(self) -> Index:
         """
         An ``Index`` containing the unique categories allowed.
 
+        See Also
+        --------
+        ordered : Whether the categories have an ordered relationship.
+
         Examples
         --------
         >>> cat_type = pd.CategoricalDtype(categories=["a", "b"], ordered=True)
@@ -636,6 +640,10 @@ def ordered(self) -> Ordered:
         """
         Whether the categories have an ordered relationship.
 
+        See Also
+        --------
+        categories : An Index containing the unique categories allowed.
+
         Examples
         --------
         >>> cat_type = pd.CategoricalDtype(categories=["a", "b"], ordered=True)

From 2246a78e2a615207ee208bfa4cc3339a67214035 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dominik=20Smr=C5=BE?= <dom.smrz@gmail.com>
Date: Mon, 29 Apr 2024 22:14:06 +0200
Subject: [PATCH 071/100] `pd.eval`: `Series` names are now preserved even for
 `"numexpr"` engine. (#58437)

* Eval: Series names are preserved for numexpr

Series names are now preserved even when using
numexpr engine. Making the behavior consistent
with python engine.

* Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/core/computation/align.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* Update pandas/tests/computation/test_eval.py

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst        |  1 +
 pandas/core/computation/align.py      | 19 ++++++++----
 pandas/core/computation/engines.py    | 11 +++++--
 pandas/tests/computation/test_eval.py | 43 +++++++++++++++------------
 pandas/tests/frame/test_query_eval.py | 16 ++++++----
 5 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index a81fb584c8df9..6ae3a8e00c02f 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -469,6 +469,7 @@ Styler
 Other
 ^^^^^
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
+- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py
index c5562fb0284b7..b4e33b8ac75cb 100644
--- a/pandas/core/computation/align.py
+++ b/pandas/core/computation/align.py
@@ -160,19 +160,24 @@ def align_terms(terms):
         # can't iterate so it must just be a constant or single variable
         if isinstance(terms.value, (ABCSeries, ABCDataFrame)):
             typ = type(terms.value)
-            return typ, _zip_axes_from_type(typ, terms.value.axes)
-        return np.result_type(terms.type), None
+            name = terms.value.name if isinstance(terms.value, ABCSeries) else None
+            return typ, _zip_axes_from_type(typ, terms.value.axes), name
+        return np.result_type(terms.type), None, None
 
     # if all resolved variables are numeric scalars
     if all(term.is_scalar for term in terms):
-        return result_type_many(*(term.value for term in terms)).type, None
+        return result_type_many(*(term.value for term in terms)).type, None, None
+
+    # if all input series have a common name, propagate it to the returned series
+    names = {term.value.name for term in terms if isinstance(term.value, ABCSeries)}
+    name = names.pop() if len(names) == 1 else None
 
     # perform the main alignment
     typ, axes = _align_core(terms)
-    return typ, axes
+    return typ, axes, name
 
 
-def reconstruct_object(typ, obj, axes, dtype):
+def reconstruct_object(typ, obj, axes, dtype, name):
     """
     Reconstruct an object given its type, raw value, and possibly empty
     (None) axes.
@@ -200,7 +205,9 @@ def reconstruct_object(typ, obj, axes, dtype):
     res_t = np.result_type(obj.dtype, dtype)
 
     if not isinstance(typ, partial) and issubclass(typ, PandasObject):
-        return typ(obj, dtype=res_t, **axes)
+        if name is None:
+            return typ(obj, dtype=res_t, **axes)
+        return typ(obj, dtype=res_t, name=name, **axes)
 
     # special case for pathological things like ~True/~False
     if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:
diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py
index 5db05ebe33efd..d2a181cbb3c36 100644
--- a/pandas/core/computation/engines.py
+++ b/pandas/core/computation/engines.py
@@ -54,6 +54,7 @@ def __init__(self, expr) -> None:
         self.expr = expr
         self.aligned_axes = None
         self.result_type = None
+        self.result_name = None
 
     def convert(self) -> str:
         """
@@ -76,12 +77,18 @@ def evaluate(self) -> object:
             The result of the passed expression.
         """
         if not self._is_aligned:
-            self.result_type, self.aligned_axes = align_terms(self.expr.terms)
+            self.result_type, self.aligned_axes, self.result_name = align_terms(
+                self.expr.terms
+            )
 
         # make sure no names in resolvers and locals/globals clash
         res = self._evaluate()
         return reconstruct_object(
-            self.result_type, res, self.aligned_axes, self.expr.terms.return_type
+            self.result_type,
+            res,
+            self.aligned_axes,
+            self.expr.terms.return_type,
+            self.result_name,
         )
 
     @property
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index ebbb31205e264..d8e5908b0c58f 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -737,6 +737,17 @@ def test_and_logic_string_match(self):
         assert pd.eval(f"{event.str.match('hello').a}")
         assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}")
 
+    def test_eval_keep_name(self, engine, parser):
+        df = Series([2, 15, 28], name="a").to_frame()
+        res = df.eval("a + a", engine=engine, parser=parser)
+        expected = Series([4, 30, 56], name="a")
+        tm.assert_series_equal(expected, res)
+
+    def test_eval_unmatching_names(self, engine, parser):
+        variable_name = Series([42], name="series_name")
+        res = pd.eval("variable_name + 0", engine=engine, parser=parser)
+        tm.assert_series_equal(variable_name, res)
+
 
 # -------------------------------------
 # gh-12388: Typecasting rules consistency with python
@@ -1269,14 +1280,12 @@ def test_assignment_explicit(self):
         expected["c"] = expected["a"] + expected["b"]
         tm.assert_frame_equal(df, expected)
 
-    def test_column_in(self):
+    def test_column_in(self, engine):
         # GH 11235
         df = DataFrame({"a": [11], "b": [-32]})
-        result = df.eval("a in [11, -32]")
-        expected = Series([True])
-        # TODO: 2022-01-29: Name check failed with numexpr 2.7.3 in CI
-        # but cannot reproduce locally
-        tm.assert_series_equal(result, expected, check_names=False)
+        result = df.eval("a in [11, -32]", engine=engine)
+        expected = Series([True], name="a")
+        tm.assert_series_equal(result, expected)
 
     @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.")
     def test_assignment_not_inplace(self):
@@ -1505,7 +1514,7 @@ def test_date_boolean(self, engine, parser):
             parser=parser,
         )
         expec = df.dates1 < "20130101"
-        tm.assert_series_equal(res, expec, check_names=False)
+        tm.assert_series_equal(res, expec)
 
     def test_simple_in_ops(self, engine, parser):
         if parser != "python":
@@ -1620,7 +1629,7 @@ def test_unary_functions(self, fn, engine, parser):
         got = self.eval(expr, engine=engine, parser=parser)
         with np.errstate(all="ignore"):
             expect = getattr(np, fn)(a)
-        tm.assert_series_equal(got, expect, check_names=False)
+        tm.assert_series_equal(got, expect)
 
     @pytest.mark.parametrize("fn", _binary_math_ops)
     def test_binary_functions(self, fn, engine, parser):
@@ -1637,7 +1646,7 @@ def test_binary_functions(self, fn, engine, parser):
         got = self.eval(expr, engine=engine, parser=parser)
         with np.errstate(all="ignore"):
             expect = getattr(np, fn)(a, b)
-        tm.assert_almost_equal(got, expect, check_names=False)
+        tm.assert_almost_equal(got, expect)
 
     def test_df_use_case(self, engine, parser):
         df = DataFrame(
@@ -1653,8 +1662,8 @@ def test_df_use_case(self, engine, parser):
             inplace=True,
         )
         got = df.e
-        expect = np.arctan2(np.sin(df.a), df.b)
-        tm.assert_series_equal(got, expect, check_names=False)
+        expect = np.arctan2(np.sin(df.a), df.b).rename("e")
+        tm.assert_series_equal(got, expect)
 
     def test_df_arithmetic_subexpression(self, engine, parser):
         df = DataFrame(
@@ -1665,8 +1674,8 @@ def test_df_arithmetic_subexpression(self, engine, parser):
         )
         df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True)
         got = df.e
-        expect = np.sin(df.a + df.b)
-        tm.assert_series_equal(got, expect, check_names=False)
+        expect = np.sin(df.a + df.b).rename("e")
+        tm.assert_series_equal(got, expect)
 
     @pytest.mark.parametrize(
         "dtype, expect_dtype",
@@ -1690,10 +1699,10 @@ def test_result_types(self, dtype, expect_dtype, engine, parser):
         assert df.a.dtype == dtype
         df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True)
         got = df.b
-        expect = np.sin(df.a)
+        expect = np.sin(df.a).rename("b")
         assert expect.dtype == got.dtype
         assert expect_dtype == got.dtype
-        tm.assert_series_equal(got, expect, check_names=False)
+        tm.assert_series_equal(got, expect)
 
     def test_undefined_func(self, engine, parser):
         df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
@@ -1898,10 +1907,6 @@ def test_equals_various(other):
     df = DataFrame({"A": ["a", "b", "c"]}, dtype=object)
     result = df.eval(f"A == {other}")
     expected = Series([False, False, False], name="A")
-    if USE_NUMEXPR:
-        # https://github.com/pandas-dev/pandas/issues/10239
-        # lose name with numexpr engine. Remove when that's fixed.
-        expected.name = None
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 94e8e469f21e7..643d342b052a4 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -58,26 +58,26 @@ def test_query_default(self, df, expected1, expected2):
         result = df.query("A>0")
         tm.assert_frame_equal(result, expected1)
         result = df.eval("A+1")
-        tm.assert_series_equal(result, expected2, check_names=False)
+        tm.assert_series_equal(result, expected2)
 
     def test_query_None(self, df, expected1, expected2):
         result = df.query("A>0", engine=None)
         tm.assert_frame_equal(result, expected1)
         result = df.eval("A+1", engine=None)
-        tm.assert_series_equal(result, expected2, check_names=False)
+        tm.assert_series_equal(result, expected2)
 
     def test_query_python(self, df, expected1, expected2):
         result = df.query("A>0", engine="python")
         tm.assert_frame_equal(result, expected1)
         result = df.eval("A+1", engine="python")
-        tm.assert_series_equal(result, expected2, check_names=False)
+        tm.assert_series_equal(result, expected2)
 
     def test_query_numexpr(self, df, expected1, expected2):
         if NUMEXPR_INSTALLED:
             result = df.query("A>0", engine="numexpr")
             tm.assert_frame_equal(result, expected1)
             result = df.eval("A+1", engine="numexpr")
-            tm.assert_series_equal(result, expected2, check_names=False)
+            tm.assert_series_equal(result, expected2)
         else:
             msg = (
                 r"'numexpr' is not installed or an unsupported version. "
@@ -194,8 +194,12 @@ def test_using_numpy(self, engine, parser):
         df = Series([0.2, 1.5, 2.8], name="a").to_frame()
         res = df.eval("@np.floor(a)", engine=engine, parser=parser)
         expected = np.floor(df["a"])
-        if engine == "numexpr":
-            expected.name = None  # See GH 58069
+        tm.assert_series_equal(expected, res)
+
+    def test_eval_simple(self, engine, parser):
+        df = Series([0.2, 1.5, 2.8], name="a").to_frame()
+        res = df.eval("a", engine=engine, parser=parser)
+        expected = df["a"]
         tm.assert_series_equal(expected, res)
 
 

From 9d3747f3b44ba7444c228d429217c1424a812380 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 29 Apr 2024 13:14:34 -0700
Subject: [PATCH 072/100] DEPR: Series setitem/getitem treating ints as
 positional (#58089)

* DEPR: Series setitem/getitem treating ints as positional

* 32bit build compat

* update exception message for numpy 2
---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/core/series.py                         | 89 ++-----------------
 pandas/tests/copy_view/test_indexing.py       | 11 +--
 pandas/tests/extension/base/getitem.py        |  9 +-
 pandas/tests/indexing/test_coercion.py        | 12 +--
 pandas/tests/indexing/test_floats.py          | 15 ++--
 pandas/tests/series/indexing/test_datetime.py |  7 --
 pandas/tests/series/indexing/test_get.py      | 26 ++----
 pandas/tests/series/indexing/test_getitem.py  | 75 ++++++----------
 pandas/tests/series/indexing/test_indexing.py | 23 ++---
 pandas/tests/series/indexing/test_setitem.py  | 30 +++----
 11 files changed, 80 insertions(+), 218 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 6ae3a8e00c02f..66dafecffeb01 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -221,6 +221,7 @@ Removal of prior version deprecations/changes
 - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
 - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
 - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
+- Changed behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` to always treat integer keys as labels, never as positional, consistent with :class:`DataFrame` behavior (:issue:`50617`)
 - Disallow allowing logical operations (``||``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``); wrap the objects in :class:`Series`, :class:`Index`, or ``np.array`` first instead (:issue:`52264`)
 - Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`)
 - Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index c1920312489c9..8a26d52bb5df1 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -901,19 +901,9 @@ def __getitem__(self, key):
         if isinstance(key, (list, tuple)):
             key = unpack_1tuple(key)
 
-        if is_integer(key) and self.index._should_fallback_to_positional:
-            warnings.warn(
-                # GH#50617
-                "Series.__getitem__ treating keys as positions is deprecated. "
-                "In a future version, integer keys will always be treated "
-                "as labels (consistent with DataFrame behavior). To access "
-                "a value by position, use `ser.iloc[pos]`",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            return self._values[key]
-
         elif key_is_scalar:
+            # Note: GH#50617 in 3.0 we changed int key to always be treated as
+            #  a label, matching DataFrame behavior.
             return self._get_value(key)
 
         # Convert generator to list before going through hashable part
@@ -958,35 +948,6 @@ def _get_with(self, key):
         elif isinstance(key, tuple):
             return self._get_values_tuple(key)
 
-        elif not is_list_like(key):
-            # e.g. scalars that aren't recognized by lib.is_scalar, GH#32684
-            return self.loc[key]
-
-        if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)):
-            key = list(key)
-
-        key_type = lib.infer_dtype(key, skipna=False)
-
-        # Note: The key_type == "boolean" case should be caught by the
-        #  com.is_bool_indexer check in __getitem__
-        if key_type == "integer":
-            # We need to decide whether to treat this as a positional indexer
-            #  (i.e. self.iloc) or label-based (i.e. self.loc)
-            if not self.index._should_fallback_to_positional:
-                return self.loc[key]
-            else:
-                warnings.warn(
-                    # GH#50617
-                    "Series.__getitem__ treating keys as positions is deprecated. "
-                    "In a future version, integer keys will always be treated "
-                    "as labels (consistent with DataFrame behavior). To access "
-                    "a value by position, use `ser.iloc[pos]`",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                return self.iloc[key]
-
-        # handle the dup indexing case GH#4246
         return self.loc[key]
 
     def _get_values_tuple(self, key: tuple):
@@ -1076,27 +1037,8 @@ def __setitem__(self, key, value) -> None:
         except KeyError:
             # We have a scalar (or for MultiIndex or object-dtype, scalar-like)
             #  key that is not present in self.index.
-            if is_integer(key):
-                if not self.index._should_fallback_to_positional:
-                    # GH#33469
-                    self.loc[key] = value
-                else:
-                    # positional setter
-                    # can't use _mgr.setitem_inplace yet bc could have *both*
-                    #  KeyError and then ValueError, xref GH#45070
-                    warnings.warn(
-                        # GH#50617
-                        "Series.__setitem__ treating keys as positions is deprecated. "
-                        "In a future version, integer keys will always be treated "
-                        "as labels (consistent with DataFrame behavior). To set "
-                        "a value by position, use `ser.iloc[pos] = value`",
-                        FutureWarning,
-                        stacklevel=find_stack_level(),
-                    )
-                    self._set_values(key, value)
-            else:
-                # GH#12862 adding a new key to the Series
-                self.loc[key] = value
+            # GH#12862 adding a new key to the Series
+            self.loc[key] = value
 
         except (TypeError, ValueError, LossySetitemError):
             # The key was OK, but we cannot set the value losslessly
@@ -1155,28 +1097,7 @@ def _set_with(self, key, value) -> None:
             # Without this, the call to infer_dtype will consume the generator
             key = list(key)
 
-        if not self.index._should_fallback_to_positional:
-            # Regardless of the key type, we're treating it as labels
-            self._set_labels(key, value)
-
-        else:
-            # Note: key_type == "boolean" should not occur because that
-            #  should be caught by the is_bool_indexer check in __setitem__
-            key_type = lib.infer_dtype(key, skipna=False)
-
-            if key_type == "integer":
-                warnings.warn(
-                    # GH#50617
-                    "Series.__setitem__ treating keys as positions is deprecated. "
-                    "In a future version, integer keys will always be treated "
-                    "as labels (consistent with DataFrame behavior). To set "
-                    "a value by position, use `ser.iloc[pos] = value`",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                self._set_values(key, value)
-            else:
-                self._set_labels(key, value)
+        self._set_labels(key, value)
 
     def _set_labels(self, key, value) -> None:
         key = com.asarray_tuplesafe(key)
diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
index 09d13677eef62..b10141b0d63f4 100644
--- a/pandas/tests/copy_view/test_indexing.py
+++ b/pandas/tests/copy_view/test_indexing.py
@@ -622,16 +622,17 @@ def test_series_subset_set_with_indexer(backend, indexer_si, indexer):
     s_orig = s.copy()
     subset = s[:]
 
-    warn = None
-    msg = "Series.__setitem__ treating keys as positions is deprecated"
     if (
         indexer_si is tm.setitem
         and isinstance(indexer, np.ndarray)
         and indexer.dtype.kind == "i"
     ):
-        warn = FutureWarning
-    with tm.assert_produces_warning(warn, match=msg):
-        indexer_si(subset)[indexer] = 0
+        # In 3.0 we treat integers as always-labels
+        with pytest.raises(KeyError):
+            indexer_si(subset)[indexer] = 0
+        return
+
+    indexer_si(subset)[indexer] = 0
     expected = Series([0, 0, 3], index=["a", "b", "c"])
     tm.assert_series_equal(subset, expected)
 
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
index 1f89c7ad9d4e4..935edce32a0ab 100644
--- a/pandas/tests/extension/base/getitem.py
+++ b/pandas/tests/extension/base/getitem.py
@@ -329,11 +329,10 @@ def test_get(self, data):
         result = s.get("Z")
         assert result is None
 
-        msg = "Series.__getitem__ treating keys as positions is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            assert s.get(4) == s.iloc[4]
-            assert s.get(-1) == s.iloc[-1]
-            assert s.get(len(s)) is None
+        # As of 3.0, getitem with int keys treats them as labels
+        assert s.get(4) is None
+        assert s.get(-1) is None
+        assert s.get(len(s)) is None
 
         # GH 21257
         s = pd.Series(data)
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index d51a986a22f1e..d4bc0341e732e 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -117,16 +117,8 @@ def test_setitem_index_object(self, val, exp_dtype):
         obj = pd.Series([1, 2, 3, 4], index=pd.Index(list("abcd"), dtype=object))
         assert obj.index.dtype == object
 
-        if exp_dtype is IndexError:
-            temp = obj.copy()
-            warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
-            msg = "index 5 is out of bounds for axis 0 with size 4"
-            with pytest.raises(exp_dtype, match=msg):
-                with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-                    temp[5] = 5
-        else:
-            exp_index = pd.Index(list("abcd") + [val], dtype=object)
-            self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
+        exp_index = pd.Index(list("abcd") + [val], dtype=object)
+        self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
 
     @pytest.mark.parametrize(
         "val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)]
diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
index 1fe431e12f2a1..8597ee1198ff0 100644
--- a/pandas/tests/indexing/test_floats.py
+++ b/pandas/tests/indexing/test_floats.py
@@ -87,11 +87,11 @@ def test_scalar_non_numeric(self, index, frame_or_series, indexer_sl):
         ],
     )
     def test_scalar_non_numeric_series_fallback(self, index):
-        # fallsback to position selection, series only
+        # starting in 3.0, integer keys are always treated as labels, no longer
+        #  fall back to positional.
         s = Series(np.arange(len(index)), index=index)
 
-        msg = "Series.__getitem__ treating keys as positions is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        with pytest.raises(KeyError, match="3"):
             s[3]
         with pytest.raises(KeyError, match="^3.0$"):
             s[3.0]
@@ -118,12 +118,9 @@ def test_scalar_with_mixed(self, indexer_sl):
             indexer_sl(s3)[1.0]
 
         if indexer_sl is not tm.loc:
-            # __getitem__ falls back to positional
-            msg = "Series.__getitem__ treating keys as positions is deprecated"
-            with tm.assert_produces_warning(FutureWarning, match=msg):
-                result = s3[1]
-            expected = 2
-            assert result == expected
+            # as of 3.0, __getitem__ no longer falls back to positional
+            with pytest.raises(KeyError, match="^1$"):
+                s3[1]
 
         with pytest.raises(KeyError, match=r"^1\.0$"):
             indexer_sl(s3)[1.0]
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index e0ca4bf64ea91..3b41c8ee463d8 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -36,9 +36,6 @@ def test_fancy_getitem():
 
     s = Series(np.arange(len(dti)), index=dti)
 
-    msg = "Series.__getitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        assert s[48] == 48
     assert s["1/2/2009"] == 48
     assert s["2009-1-2"] == 48
     assert s[datetime(2009, 1, 2)] == 48
@@ -57,10 +54,6 @@ def test_fancy_setitem():
 
     s = Series(np.arange(len(dti)), index=dti)
 
-    msg = "Series.__setitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        s[48] = -1
-    assert s.iloc[48] == -1
     s["1/2/2009"] = -2
     assert s.iloc[48] == -2
     s["1/2/2009":"2009-06-05"] = -3
diff --git a/pandas/tests/series/indexing/test_get.py b/pandas/tests/series/indexing/test_get.py
index 1f3711ad91903..5ff92ca89efba 100644
--- a/pandas/tests/series/indexing/test_get.py
+++ b/pandas/tests/series/indexing/test_get.py
@@ -157,13 +157,8 @@ def test_get_with_default():
             assert s.get("e", "z") == "z"
             assert s.get("e", "e") == "e"
 
-            msg = "Series.__getitem__ treating keys as positions is deprecated"
-            warn = None
-            if index is d0:
-                warn = FutureWarning
-            with tm.assert_produces_warning(warn, match=msg):
-                assert s.get(10, "z") == "z"
-                assert s.get(10, 10) == 10
+            assert s.get(10, "z") == "z"
+            assert s.get(10, 10) == 10
 
 
 @pytest.mark.parametrize(
@@ -201,13 +196,10 @@ def test_get_with_ea(arr):
     result = ser.get("Z")
     assert result is None
 
-    msg = "Series.__getitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        assert ser.get(4) == ser.iloc[4]
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        assert ser.get(-1) == ser.iloc[-1]
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        assert ser.get(len(ser)) is None
+    # As of 3.0, ints are treated as labels
+    assert ser.get(4) is None
+    assert ser.get(-1) is None
+    assert ser.get(len(ser)) is None
 
     # GH#21257
     ser = Series(arr)
@@ -216,16 +208,14 @@ def test_get_with_ea(arr):
 
 
 def test_getitem_get(string_series, object_series):
-    msg = "Series.__getitem__ treating keys as positions is deprecated"
-
     for obj in [string_series, object_series]:
         idx = obj.index[5]
 
         assert obj[idx] == obj.get(idx)
         assert obj[idx] == obj.iloc[5]
 
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        assert string_series.get(-1) == string_series.get(string_series.index[-1])
+    # As of 3.0, ints are treated as labels
+    assert string_series.get(-1) is None
     assert string_series.iloc[5] == string_series.get(string_series.index[5])
 
 
diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py
index fac543ac450a5..ede39ba61dfeb 100644
--- a/pandas/tests/series/indexing/test_getitem.py
+++ b/pandas/tests/series/indexing/test_getitem.py
@@ -15,6 +15,7 @@
     conversion,
     timezones,
 )
+from pandas.compat.numpy import np_version_gt2
 
 from pandas.core.dtypes.common import is_scalar
 
@@ -72,19 +73,14 @@ def test_getitem_unrecognized_scalar(self):
     def test_getitem_negative_out_of_bounds(self):
         ser = Series(["a"] * 10, index=["a"] * 10)
 
-        msg = "index -11 is out of bounds for axis 0 with size 10|index out of bounds"
-        warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
-        with pytest.raises(IndexError, match=msg):
-            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-                ser[-11]
+        with pytest.raises(KeyError, match="^-11$"):
+            ser[-11]
 
     def test_getitem_out_of_bounds_indexerror(self, datetime_series):
         # don't segfault, GH#495
-        msg = r"index \d+ is out of bounds for axis 0 with size \d+"
-        warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
-        with pytest.raises(IndexError, match=msg):
-            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-                datetime_series[len(datetime_series)]
+        N = len(datetime_series)
+        with pytest.raises(KeyError, match=str(N)):
+            datetime_series[N]
 
     def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self):
         # GH#917
@@ -118,11 +114,13 @@ def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype):
             ser["c"]
 
     def test_getitem_int64(self, datetime_series):
+        if np_version_gt2:
+            msg = r"^np.int64\(5\)$"
+        else:
+            msg = "^5$"
         idx = np.int64(5)
-        msg = "Series.__getitem__ treating keys as positions is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            res = datetime_series[idx]
-        assert res == datetime_series.iloc[5]
+        with pytest.raises(KeyError, match=msg):
+            datetime_series[idx]
 
     def test_getitem_full_range(self):
         # github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7
@@ -218,10 +216,8 @@ def test_getitem_str_with_timedeltaindex(self):
     def test_getitem_bool_index_positional(self):
         # GH#48653
         ser = Series({True: 1, False: 0})
-        msg = "Series.__getitem__ treating keys as positions is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = ser[0]
-        assert result == 1
+        with pytest.raises(KeyError, match="^0$"):
+            ser[0]
 
 
 class TestSeriesGetitemSlices:
@@ -384,17 +380,16 @@ def test_getitem_intlist_intindex_periodvalues(self):
 
     @pytest.mark.parametrize("box", [list, np.array, Index])
     def test_getitem_intlist_intervalindex_non_int(self, box):
-        # GH#33404 fall back to positional since ints are unambiguous
+        # GH#33404 fall back to positional since ints are unambiguous;
+        #  changed in 3.0 to never fallback
         dti = date_range("2000-01-03", periods=3)._with_freq(None)
         ii = pd.IntervalIndex.from_breaks(dti)
         ser = Series(range(len(ii)), index=ii)
 
-        expected = ser.iloc[:1]
         key = box([0])
-        msg = "Series.__getitem__ treating keys as positions is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            result = ser[key]
-        tm.assert_series_equal(result, expected)
+        msg = r"None of \[Index\(\[0\], dtype='int(32|64)'\)\] are in the \[index\]"
+        with pytest.raises(KeyError, match=msg):
+            ser[key]
 
     @pytest.mark.parametrize("box", [list, np.array, Index])
     @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
@@ -635,11 +630,6 @@ def test_getitem_preserve_name(datetime_series):
     result = datetime_series[datetime_series > 0]
     assert result.name == datetime_series.name
 
-    msg = "Series.__getitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = datetime_series[[0, 2, 4]]
-    assert result.name == datetime_series.name
-
     result = datetime_series[5:10]
     assert result.name == datetime_series.name
 
@@ -667,21 +657,16 @@ def test_getitem_missing(datetime_series):
 
 
 def test_getitem_fancy(string_series, object_series):
-    msg = "Series.__getitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        slice1 = string_series[[1, 2, 3]]
-        slice2 = object_series[[1, 2, 3]]
-    assert string_series.index[2] == slice1.index[1]
-    assert object_series.index[2] == slice2.index[1]
-    assert string_series.iloc[2] == slice1.iloc[1]
-    assert object_series.iloc[2] == slice2.iloc[1]
+    msg = r"None of \[Index\(\[1, 2, 3\], dtype='int(32|64)'\)\] are in the \[index\]"
+    with pytest.raises(KeyError, match=msg):
+        string_series[[1, 2, 3]]
+    with pytest.raises(KeyError, match=msg):
+        object_series[[1, 2, 3]]
 
 
 def test_getitem_box_float64(datetime_series):
-    msg = "Series.__getitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        value = datetime_series[5]
-    assert isinstance(value, np.float64)
+    with pytest.raises(KeyError, match="^5$"):
+        datetime_series[5]
 
 
 def test_getitem_unordered_dup():
@@ -712,13 +697,11 @@ def test_slice_can_reorder_not_uniquely_indexed():
 
 @pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"])
 def test_duplicated_index_getitem_positional_indexer(index_vals):
-    # GH 11747
+    # GH 11747; changed in 3.0 integers are treated as always-labels
     s = Series(range(5), index=list(index_vals))
 
-    msg = "Series.__getitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = s[3]
-    assert result == 3
+    with pytest.raises(KeyError, match="^3$"):
+        s[3]
 
 
 class TestGetitemDeprecatedIndexers:
diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
index a629d18131306..5002b6d20da09 100644
--- a/pandas/tests/series/indexing/test_indexing.py
+++ b/pandas/tests/series/indexing/test_indexing.py
@@ -32,27 +32,16 @@ def test_basic_indexing():
         np.random.default_rng(2).standard_normal(5), index=["a", "b", "a", "a", "b"]
     )
 
-    warn_msg = "Series.__[sg]etitem__ treating keys as positions is deprecated"
-    msg = "index 5 is out of bounds for axis 0 with size 5"
-    with pytest.raises(IndexError, match=msg):
-        with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-            s[5]
-    with pytest.raises(IndexError, match=msg):
-        with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-            s[5] = 0
+    with pytest.raises(KeyError, match="^5$"):
+        s[5]
 
     with pytest.raises(KeyError, match=r"^'c'$"):
         s["c"]
 
     s = s.sort_index()
 
-    with pytest.raises(IndexError, match=msg):
-        with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-            s[5]
-    msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
-    with pytest.raises(IndexError, match=msg):
-        with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-            s[5] = 0
+    with pytest.raises(KeyError, match="^5$"):
+        s[5]
 
 
 def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
@@ -153,9 +142,7 @@ def test_series_box_timestamp():
     assert isinstance(ser.iloc[4], Timestamp)
 
     ser = Series(rng, index=rng)
-    msg = "Series.__getitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        assert isinstance(ser[0], Timestamp)
+    assert isinstance(ser[rng[0]], Timestamp)
     assert isinstance(ser.at[rng[1]], Timestamp)
     assert isinstance(ser.iat[2], Timestamp)
     assert isinstance(ser.loc[rng[3]], Timestamp)
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index 7a2a4892f61fb..b94e6b6f0c6c8 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -181,14 +181,12 @@ def test_object_series_setitem_dt64array_exact_match(self):
 
 class TestSetitemScalarIndexer:
     def test_setitem_negative_out_of_bounds(self):
+        # As of 3.0, int keys are treated as labels, so this becomes
+        #  setitem-with-expansion
         ser = Series(["a"] * 10, index=["a"] * 10)
-
-        # string index falls back to positional
-        msg = "index -11|-1 is out of bounds for axis 0 with size 10"
-        warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
-        with pytest.raises(IndexError, match=msg):
-            with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-                ser[-11] = "foo"
+        ser[-11] = "foo"
+        exp = Series(["a"] * 10 + ["foo"], index=["a"] * 10 + [-11])
+        tm.assert_series_equal(ser, exp)
 
     @pytest.mark.parametrize("indexer", [tm.loc, tm.at])
     @pytest.mark.parametrize("ser_index", [0, 1])
@@ -1749,24 +1747,24 @@ def test_setitem_bool_int_float_consistency(indexer_sli):
 def test_setitem_positional_with_casting():
     # GH#45070 case where in __setitem__ we get a KeyError, then when
     #  we fallback we *also* get a ValueError if we try to set inplace.
+    # As of 3.0 we always treat int keys as labels, so this becomes
+    #  setitem-with-expansion
     ser = Series([1, 2, 3], index=["a", "b", "c"])
 
-    warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-        ser[0] = "X"
-    expected = Series(["X", 2, 3], index=["a", "b", "c"], dtype=object)
+    ser[0] = "X"
+    expected = Series([1, 2, 3, "X"], index=["a", "b", "c", 0], dtype=object)
     tm.assert_series_equal(ser, expected)
 
 
 def test_setitem_positional_float_into_int_coerces():
     # Case where we hit a KeyError and then trying to set in-place incorrectly
-    #  casts a float to an int
+    #  casts a float to an int;
+    # As of 3.0 we always treat int keys as labels, so this becomes
+    #  setitem-with-expansion
     ser = Series([1, 2, 3], index=["a", "b", "c"])
 
-    warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-        ser[0] = 1.5
-    expected = Series([1.5, 2, 3], index=["a", "b", "c"])
+    ser[0] = 1.5
+    expected = Series([1, 2, 3, 1.5], index=["a", "b", "c", 0])
     tm.assert_series_equal(ser, expected)
 
 

From a052307e2deb36a3548b58de8888765fb4b7bed0 Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Mon, 29 Apr 2024 16:15:54 -0400
Subject: [PATCH 073/100] Fix PR07,RT03,SA01 errors for Index.union,
 Index.symmetric_difference (#58457)

* Fix PR07,RT03,SA01 errors for Index.union, Index.symmetric_difference

* Update pandas/core/indexes/base.py

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 ci/code_checks.sh           |  2 --
 pandas/core/indexes/base.py | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 1724fae98a6e5..8b1bccdaa8d1b 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -106,9 +106,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.ravel PR01,RT03" \
         -i "pandas.Index.slice_indexer PR07,RT03,SA01" \
         -i "pandas.Index.str PR01,SA01" \
-        -i "pandas.Index.symmetric_difference PR07,RT03,SA01" \
         -i "pandas.Index.take PR01,PR07" \
-        -i "pandas.Index.union PR07,RT03,SA01" \
         -i "pandas.Index.view GL08" \
         -i "pandas.Int16Dtype SA01" \
         -i "pandas.Int32Dtype SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index f0ac8604ccd60..212d0bcef8f43 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2872,6 +2872,8 @@ def union(self, other, sort=None):
         Parameters
         ----------
         other : Index or array-like
+            Index or an array-like object containing elements to form the union
+            with the original Index.
         sort : bool or None, default None
             Whether to sort the resulting Index.
 
@@ -2888,6 +2890,14 @@ def union(self, other, sort=None):
         Returns
         -------
         Index
+            Returns a new Index object with all unique elements from both the original
+            Index and the `other` Index.
+
+        See Also
+        --------
+        Index.unique : Return unique values in the index.
+        Index.intersection : Form the intersection of two Index objects.
+        Index.difference : Return a new Index with elements of index not in `other`.
 
         Examples
         --------
@@ -3312,7 +3322,10 @@ def symmetric_difference(self, other, result_name=None, sort=None):
         Parameters
         ----------
         other : Index or array-like
+            Index or an array-like object with elements to compute the symmetric
+            difference with the original Index.
         result_name : str
+            A string representing the name of the resulting Index, if desired.
         sort : bool or None, default None
             Whether to sort the resulting index. By default, the
             values are attempted to be sorted, but any TypeError from
@@ -3326,6 +3339,14 @@ def symmetric_difference(self, other, result_name=None, sort=None):
         Returns
         -------
         Index
+            Returns a new Index object containing elements that appear in either the
+            original Index or the `other` Index, but not both.
+
+        See Also
+        --------
+        Index.difference : Return a new Index with elements of index not in other.
+        Index.union : Form the union of two Index objects.
+        Index.intersection : Form the intersection of two Index objects.
 
         Notes
         -----

From f3f3853cd7ed92108cfd53adaad6dd631d48fc72 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 30 Apr 2024 00:16:41 +0200
Subject: [PATCH 074/100] BUG: astype not casting values for dictionary dtype
 correctly (#58479)

* BUG: astype not casting values for dictionary dtype correctly

* Fixup
---
 doc/source/whatsnew/v3.0.0.rst       | 1 +
 pandas/core/arrays/arrow/array.py    | 2 ++
 pandas/tests/extension/test_arrow.py | 8 ++++++++
 3 files changed, 11 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 66dafecffeb01..59926c0751d32 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -390,6 +390,7 @@ Numeric
 
 Conversion
 ^^^^^^^^^^
+- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`)
 - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
 - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
 - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 1154130b9bed3..0240433cdb683 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -525,6 +525,8 @@ def _box_pa_array(
         if pa_type is not None and pa_array.type != pa_type:
             if pa.types.is_dictionary(pa_type):
                 pa_array = pa_array.dictionary_encode()
+                if pa_array.type != pa_type:
+                    pa_array = pa_array.cast(pa_type)
             else:
                 try:
                     pa_array = pa_array.cast(pa_type)
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 79440b55dd5dd..7d31fe6085c3a 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3498,6 +3498,14 @@ def test_to_numpy_timestamp_to_int():
     tm.assert_numpy_array_equal(result, expected)
 
 
+@pytest.mark.parametrize("arrow_type", [pa.large_string(), pa.string()])
+def test_cast_dictionary_different_value_dtype(arrow_type):
+    df = pd.DataFrame({"a": ["x", "y"]}, dtype="string[pyarrow]")
+    data_type = ArrowDtype(pa.dictionary(pa.int32(), arrow_type))
+    result = df.astype({"a": data_type})
+    assert result.dtypes.iloc[0] == data_type
+
+
 def test_map_numeric_na_action():
     ser = pd.Series([32, 40, None], dtype="int64[pyarrow]")
     result = ser.map(lambda x: 42, na_action="ignore")

From 7cdee7a15670b3273e45425619b493c7d74c3719 Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Mon, 29 Apr 2024 20:25:50 -0400
Subject: [PATCH 075/100] DOC: fixing RT03 erros for Index: duplicated and
 nunique (#58432)

* DOC: fixing RT03 erros for Index: duplicated and nunique

* deleting it lines from code_checks

* fixing EXPECTED TO FAIL, BUT NOT FAILING error

* fixing code_checks issue

* fixed Expected to fail error
---
 ci/code_checks.sh           | 3 ---
 pandas/core/base.py         | 1 +
 pandas/core/indexes/base.py | 1 +
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 8b1bccdaa8d1b..45831f6030794 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -93,7 +93,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index PR07" \
         -i "pandas.Index.append PR07,RT03,SA01" \
         -i "pandas.Index.difference PR07,RT03,SA01" \
-        -i "pandas.Index.duplicated RT03" \
         -i "pandas.Index.get_indexer PR07,SA01" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
@@ -101,7 +100,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.identical PR01,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.names GL08" \
-        -i "pandas.Index.nunique RT03" \
         -i "pandas.Index.putmask PR01,RT03" \
         -i "pandas.Index.ravel PR01,RT03" \
         -i "pandas.Index.slice_indexer PR07,RT03,SA01" \
@@ -256,7 +254,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.mode SA01" \
         -i "pandas.Series.mul PR07" \
         -i "pandas.Series.ne PR07,SA01" \
-        -i "pandas.Series.nunique RT03" \
         -i "pandas.Series.pad PR01,SA01" \
         -i "pandas.Series.plot PR02,SA01" \
         -i "pandas.Series.pop RT03,SA01" \
diff --git a/pandas/core/base.py b/pandas/core/base.py
index e54fac3da72a6..87e87538ca1d9 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1062,6 +1062,7 @@ def nunique(self, dropna: bool = True) -> int:
         Returns
         -------
         int
+            A integer indicating the number of unique elements in the object.
 
         See Also
         --------
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 212d0bcef8f43..73ba02c515344 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2767,6 +2767,7 @@ def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
         Returns
         -------
         np.ndarray[bool]
+            A numpy array of boolean values indicating duplicate index values.
 
         See Also
         --------

From 46bd88f795ad4ff51fbd97b5e29c1b216524c72d Mon Sep 17 00:00:00 2001
From: rohanjain101 <38412262+rohanjain101@users.noreply.github.com>
Date: Mon, 29 Apr 2024 20:26:37 -0400
Subject: [PATCH 076/100] preserve index in list accessor (#58438)

* preserve index in list accessor

* gh reference

* explode fix

* cleanup

* improve test

* Update v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* f

---------

Co-authored-by: Rohan Jain <rohanjain@microsoft.com>
Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/core/arrays/arrow/accessors.py         | 22 ++++++++++------
 .../series/accessors/test_list_accessor.py    | 25 ++++++++++++++++---
 3 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 59926c0751d32..afe63b6785524 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -483,6 +483,7 @@ Other
 - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
 - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
 - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
+- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
 
 .. ***DO NOT USE THIS SECTION***
 
diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py
index 19ec253e81ef2..d8f948a37d206 100644
--- a/pandas/core/arrays/arrow/accessors.py
+++ b/pandas/core/arrays/arrow/accessors.py
@@ -110,7 +110,9 @@ def len(self) -> Series:
         from pandas import Series
 
         value_lengths = pc.list_value_length(self._pa_array)
-        return Series(value_lengths, dtype=ArrowDtype(value_lengths.type))
+        return Series(
+            value_lengths, dtype=ArrowDtype(value_lengths.type), index=self._data.index
+        )
 
     def __getitem__(self, key: int | slice) -> Series:
         """
@@ -149,7 +151,9 @@ def __getitem__(self, key: int | slice) -> Series:
             # if key < 0:
             #     key = pc.add(key, pc.list_value_length(self._pa_array))
             element = pc.list_element(self._pa_array, key)
-            return Series(element, dtype=ArrowDtype(element.type))
+            return Series(
+                element, dtype=ArrowDtype(element.type), index=self._data.index
+            )
         elif isinstance(key, slice):
             if pa_version_under11p0:
                 raise NotImplementedError(
@@ -167,7 +171,7 @@ def __getitem__(self, key: int | slice) -> Series:
             if step is None:
                 step = 1
             sliced = pc.list_slice(self._pa_array, start, stop, step)
-            return Series(sliced, dtype=ArrowDtype(sliced.type))
+            return Series(sliced, dtype=ArrowDtype(sliced.type), index=self._data.index)
         else:
             raise ValueError(f"key must be an int or slice, got {type(key).__name__}")
 
@@ -195,15 +199,17 @@ def flatten(self) -> Series:
         ... )
         >>> s.list.flatten()
         0    1
-        1    2
-        2    3
-        3    3
+        0    2
+        0    3
+        1    3
         dtype: int64[pyarrow]
         """
         from pandas import Series
 
-        flattened = pc.list_flatten(self._pa_array)
-        return Series(flattened, dtype=ArrowDtype(flattened.type))
+        counts = pa.compute.list_value_length(self._pa_array)
+        flattened = pa.compute.list_flatten(self._pa_array)
+        index = self._data.index.repeat(counts.fill_null(pa.scalar(0, counts.type)))
+        return Series(flattened, dtype=ArrowDtype(flattened.type), index=index)
 
 
 class StructAccessor(ArrowAccessor):
diff --git a/pandas/tests/series/accessors/test_list_accessor.py b/pandas/tests/series/accessors/test_list_accessor.py
index 1c60567c1a530..c153e800cb534 100644
--- a/pandas/tests/series/accessors/test_list_accessor.py
+++ b/pandas/tests/series/accessors/test_list_accessor.py
@@ -31,10 +31,23 @@ def test_list_getitem(list_dtype):
     tm.assert_series_equal(actual, expected)
 
 
+def test_list_getitem_index():
+    # GH 58425
+    ser = Series(
+        [[1, 2, 3], [4, None, 5], None],
+        dtype=ArrowDtype(pa.list_(pa.int64())),
+        index=[1, 3, 7],
+    )
+    actual = ser.list[1]
+    expected = Series([2, None, None], dtype="int64[pyarrow]", index=[1, 3, 7])
+    tm.assert_series_equal(actual, expected)
+
+
 def test_list_getitem_slice():
     ser = Series(
         [[1, 2, 3], [4, None, 5], None],
         dtype=ArrowDtype(pa.list_(pa.int64())),
+        index=[1, 3, 7],
     )
     if pa_version_under11p0:
         with pytest.raises(
@@ -44,7 +57,9 @@ def test_list_getitem_slice():
     else:
         actual = ser.list[1:None:None]
         expected = Series(
-            [[2, 3], [None, 5], None], dtype=ArrowDtype(pa.list_(pa.int64()))
+            [[2, 3], [None, 5], None],
+            dtype=ArrowDtype(pa.list_(pa.int64())),
+            index=[1, 3, 7],
         )
         tm.assert_series_equal(actual, expected)
 
@@ -61,11 +76,15 @@ def test_list_len():
 
 def test_list_flatten():
     ser = Series(
-        [[1, 2, 3], [4, None], None],
+        [[1, 2, 3], None, [4, None], [], [7, 8]],
         dtype=ArrowDtype(pa.list_(pa.int64())),
     )
     actual = ser.list.flatten()
-    expected = Series([1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()))
+    expected = Series(
+        [1, 2, 3, 4, None, 7, 8],
+        dtype=ArrowDtype(pa.int64()),
+        index=[0, 0, 0, 2, 2, 4, 4],
+    )
     tm.assert_series_equal(actual, expected)
 
 

From 78a2ef2f43a40e13f51c223b64d2325bd9e7716e Mon Sep 17 00:00:00 2001
From: KeiOshima <ko2091@nyu.edu>
Date: Mon, 29 Apr 2024 20:27:15 -0400
Subject: [PATCH 077/100] DOC: ficing PR01 and SA01 issue for Index: Identical
 (#58442)

* DOC: ficing PR01 and SA01 issue for Index: Identical

* fixing EXpected to fail issue
---
 ci/code_checks.sh           |  1 -
 pandas/core/indexes/base.py | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 45831f6030794..161047197ff6f 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -97,7 +97,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.get_indexer_for PR01,SA01" \
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
-        -i "pandas.Index.identical PR01,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.names GL08" \
         -i "pandas.Index.putmask PR01,RT03" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 73ba02c515344..2bf0aca31449e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5344,12 +5344,23 @@ def identical(self, other) -> bool:
         """
         Similar to equals, but checks that object attributes and types are also equal.
 
+        Parameters
+        ----------
+        other : Index
+            The Index object you want to compare with the current Index object.
+
         Returns
         -------
         bool
             If two Index objects have equal elements and same type True,
             otherwise False.
 
+        See Also
+        --------
+        Index.equals: Determine if two Index object are equal.
+        Index.has_duplicates: Check if the Index has duplicate values.
+        Index.is_unique: Return if the index has unique values.
+
         Examples
         --------
         >>> idx1 = pd.Index(["1", "2", "3"])

From f2909854ab6b2b7912ed68df5c5f0dd8a8fd3f3a Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 30 Apr 2024 22:01:16 +0530
Subject: [PATCH 078/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.take (#58489)

* DOC: add PR01,PR07 for pandas.Index.take

* DOC: remove PR01,PR07 for pandas.Index.take
---
 ci/code_checks.sh           |  1 -
 pandas/core/indexes/base.py | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 161047197ff6f..1bdd2d5e8aa33 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -103,7 +103,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.ravel PR01,RT03" \
         -i "pandas.Index.slice_indexer PR07,RT03,SA01" \
         -i "pandas.Index.str PR01,SA01" \
-        -i "pandas.Index.take PR01,PR07" \
         -i "pandas.Index.view GL08" \
         -i "pandas.Int16Dtype SA01" \
         -i "pandas.Int32Dtype SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2bf0aca31449e..c7b009bc02dbe 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1120,9 +1120,21 @@ def astype(self, dtype, copy: bool = True):
         axis : int, optional
             The axis over which to select values, always 0.
         allow_fill : bool, default True
+            How to handle negative values in `indices`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right (the default). This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
+
         fill_value : scalar, default None
             If allow_fill=True and fill_value is not None, indices specified by
             -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.
+        **kwargs
+            Required for compatibility with numpy.
 
         Returns
         -------

From 2e7fa91a30d28f92fb31ee891fd2a74f57e99f78 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 30 Apr 2024 22:07:35 +0530
Subject: [PATCH 079/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.sliece_indexer (#58490)

* DOC: add PR07,RT03 in pandas.Index.slice_indexer

* DOC: add SA01 in pandas.Index.slice_indexer

* DOC: remove pandas.Index.slice_indexer
---
 ci/code_checks.sh           | 1 -
 pandas/core/indexes/base.py | 9 ++++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 1bdd2d5e8aa33..6b6ca25178720 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -101,7 +101,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.names GL08" \
         -i "pandas.Index.putmask PR01,RT03" \
         -i "pandas.Index.ravel PR01,RT03" \
-        -i "pandas.Index.slice_indexer PR07,RT03,SA01" \
         -i "pandas.Index.str PR01,SA01" \
         -i "pandas.Index.view GL08" \
         -i "pandas.Int16Dtype SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c7b009bc02dbe..c83dd3be13424 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6342,19 +6342,26 @@ def slice_indexer(
         end : label, default None
             If None, defaults to the end.
         step : int, default None
+            If None, defaults to 1.
 
         Returns
         -------
         slice
+            A slice object.
 
         Raises
         ------
         KeyError : If key does not exist, or key is not unique and index is
             not ordered.
 
+        See Also
+        --------
+        Index.slice_locs : Computes slice locations for input labels.
+        Index.get_slice_bound : Retrieves slice bound that corresponds to given label.
+
         Notes
         -----
-        This function assumes that the data is sorted, so use at your own peril
+        This function assumes that the data is sorted, so use at your own peril.
 
         Examples
         --------

From c150511159f7ef8dc4df8d45a99b1c49ea948dea Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 30 Apr 2024 22:08:17 +0530
Subject: [PATCH 080/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DataFrame.__dataframe__ (#58491)

* DOC: add SA01 in pandas.DataFrame.__dataframe__

* DOC: remove SA01 in pandas.DataFrame.__dataframe__
---
 ci/code_checks.sh    | 1 -
 pandas/core/frame.py | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 6b6ca25178720..d1ba0d24b4b7f 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -70,7 +70,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         --format=actions \
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
-        -i "pandas.DataFrame.__dataframe__ SA01" \
         -i "pandas.DataFrame.at_time PR01" \
         -i "pandas.DataFrame.kurt RT03,SA01" \
         -i "pandas.DataFrame.kurtosis RT03,SA01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9fbbc2c08efaa..b7eba737829ec 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -931,6 +931,11 @@ def __dataframe__(
         DataFrame interchange object
             The object which consuming library can use to ingress the dataframe.
 
+        See Also
+        --------
+        DataFrame.from_records : Constructor from tuples, also record arrays.
+        DataFrame.from_dict : From dicts of Series, arrays, or dicts.
+
         Notes
         -----
         Details on the interchange protocol:

From fdcdcb84cfa675b756c7f4c42d3eb466c49bc098 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 30 Apr 2024 22:09:10 +0530
Subject: [PATCH 081/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DataFrame.kurt and pandas.DataFrame.kurtosis (#58493)

DOC: fix ruff issues
---
 ci/code_checks.sh    |  2 --
 pandas/core/frame.py | 80 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index d1ba0d24b4b7f..8f6c5e0beee0b 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -71,8 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
         -i "pandas.DataFrame.at_time PR01" \
-        -i "pandas.DataFrame.kurt RT03,SA01" \
-        -i "pandas.DataFrame.kurtosis RT03,SA01" \
         -i "pandas.DataFrame.max RT03" \
         -i "pandas.DataFrame.mean RT03,SA01" \
         -i "pandas.DataFrame.median RT03,SA01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b7eba737829ec..653b07b6e27ed 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -12069,7 +12069,6 @@ def kurt(
     ) -> Series | Any: ...
 
     @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt")
-    @doc(make_doc("kurt", ndim=2))
     def kurt(
         self,
         axis: Axis | None = 0,
@@ -12077,6 +12076,85 @@ def kurt(
         numeric_only: bool = False,
         **kwargs,
     ) -> Series | Any:
+        """
+        Return unbiased kurtosis over requested axis.
+
+        Kurtosis obtained using Fisher's definition of
+        kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
+
+        Parameters
+        ----------
+        axis : {index (0), columns (1)}
+            Axis for the function to be applied on.
+            For `Series` this parameter is unused and defaults to 0.
+
+            For DataFrames, specifying ``axis=None`` will apply the aggregation
+            across both axes.
+
+            .. versionadded:: 2.0.0
+
+        skipna : bool, default True
+            Exclude NA/null values when computing the result.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        **kwargs
+            Additional keyword arguments to be passed to the function.
+
+        Returns
+        -------
+        Series or scalar
+            Unbiased kurtosis over requested axis.
+
+        See Also
+        --------
+        Dataframe.kurtosis : Returns unbiased kurtosis over requested axis.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 2, 3], index=["cat", "dog", "dog", "mouse"])
+        >>> s
+        cat    1
+        dog    2
+        dog    2
+        mouse  3
+        dtype: int64
+        >>> s.kurt()
+        1.5
+
+        With a DataFrame
+
+        >>> df = pd.DataFrame(
+        ...     {"a": [1, 2, 2, 3], "b": [3, 4, 4, 4]},
+        ...     index=["cat", "dog", "dog", "mouse"],
+        ... )
+        >>> df
+               a   b
+          cat  1   3
+          dog  2   4
+          dog  2   4
+        mouse  3   4
+        >>> df.kurt()
+        a   1.5
+        b   4.0
+        dtype: float64
+
+        With axis=None
+
+        >>> df.kurt(axis=None).round(6)
+        -0.988693
+
+        Using axis=1
+
+        >>> df = pd.DataFrame(
+        ...     {"a": [1, 2], "b": [3, 4], "c": [3, 4], "d": [1, 2]},
+        ...     index=["cat", "dog"],
+        ... )
+        >>> df.kurt(axis=1)
+        cat   -6.0
+        dog   -6.0
+        dtype: float64
+        """
         result = super().kurt(
             axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
         )

From 53b7f24258b878107e11f2b20ac4d9184ba72b49 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 30 Apr 2024 23:24:43 +0530
Subject: [PATCH 082/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.view (#58486)

* DOC: add GL08 for pandas.Index.view

* DOC: remove GL08 for pandas.Index.view

* DOC: fix examples in docstring

* DOC: fix examples in docstring

* DOC: fix examples in docstring

* DOC: fix examples in docstring

* DOC: fix examples in docstring
---
 ci/code_checks.sh           |  1 -
 pandas/core/indexes/base.py | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 8f6c5e0beee0b..3ecca97b5dccd 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -99,7 +99,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.putmask PR01,RT03" \
         -i "pandas.Index.ravel PR01,RT03" \
         -i "pandas.Index.str PR01,SA01" \
-        -i "pandas.Index.view GL08" \
         -i "pandas.Int16Dtype SA01" \
         -i "pandas.Int32Dtype SA01" \
         -i "pandas.Int64Dtype SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c83dd3be13424..baa8a7493a030 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1013,6 +1013,42 @@ def ravel(self, order: str_t = "C") -> Self:
         return self[:]
 
     def view(self, cls=None):
+        """
+        Return a view on self.
+
+        Parameters
+        ----------
+        cls : data-type or ndarray sub-class, optional
+            Data-type descriptor of the returned view, e.g., float32 or int16.
+            Omitting it results in the view having the same data-type as `self`.
+            This argument can also be specified as an ndarray sub-class,
+            e.g., np.int64 or np.float32 which then specifies the type of
+            the returned object.
+
+        Returns
+        -------
+        numpy.ndarray
+            A new view of the same data in memory.
+
+        See Also
+        --------
+        numpy.ndarray.view : Returns a new view of array with the same data.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3], index=["1", "2", "3"])
+        >>> s.index.view("object")
+        array(['1', '2', '3'], dtype=object)
+
+        >>> s = pd.Series([1, 2, 3], index=[-1, 0, 1])
+        >>> s.index.view(np.int64)
+        array([-1,  0,  1])
+        >>> s.index.view(np.float32)
+        array([   nan,    nan, 0.e+00, 0.e+00, 1.e-45, 0.e+00], dtype=float32)
+        >>> s.index.view(np.uint64)
+        array([18446744073709551615,                    0,                    1],
+          dtype=uint64)
+        """
         # we need to see if we are subclassing an
         # index type here
         if cls is not None:

From c9bc4809528998313a609ab16168ca237bc186b6 Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Tue, 30 Apr 2024 13:55:25 -0400
Subject: [PATCH 083/100] Remove deprecated plot_date calls (#58484)

* Remove deprecated plot_date calls

These were deprecated in Matplotlib 3.9.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 pandas/tests/plotting/test_datetimelike.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index 6b709522bab70..4b4eeada58366 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -1432,13 +1432,19 @@ def test_mpl_nopandas(self):
         values1 = np.arange(10.0, 11.0, 0.5)
         values2 = np.arange(11.0, 12.0, 0.5)
 
-        kw = {"fmt": "-", "lw": 4}
-
         _, ax = mpl.pyplot.subplots()
-        ax.plot_date([x.toordinal() for x in dates], values1, **kw)
-        ax.plot_date([x.toordinal() for x in dates], values2, **kw)
-
-        line1, line2 = ax.get_lines()
+        (
+            line1,
+            line2,
+        ) = ax.plot(
+            [x.toordinal() for x in dates],
+            values1,
+            "-",
+            [x.toordinal() for x in dates],
+            values2,
+            "-",
+            linewidth=4,
+        )
 
         exp = np.array([x.toordinal() for x in dates], dtype=np.float64)
         tm.assert_numpy_array_equal(line1.get_xydata()[:, 0], exp)

From cb8b213cbde4b677cc79e781e1c7f535e0724fe9 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Tue, 30 Apr 2024 23:25:58 +0530
Subject: [PATCH 084/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DataFrame.attime (#58492)

* DOC: add SA01PR01 in pandas.DataFrame.at_time

* DOC: remove PR01 in pandas.DataFrame.at_time

* DOC: remove pandas.Series.at_time
---
 ci/code_checks.sh      | 2 --
 pandas/core/generic.py | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 3ecca97b5dccd..af432dcd64c82 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -70,7 +70,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         --format=actions \
         -i ES01 `# For now it is ok if docstrings are missing the extended summary` \
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
-        -i "pandas.DataFrame.at_time PR01" \
         -i "pandas.DataFrame.max RT03" \
         -i "pandas.DataFrame.mean RT03,SA01" \
         -i "pandas.DataFrame.median RT03,SA01" \
@@ -187,7 +186,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series SA01" \
         -i "pandas.Series.__iter__ RT03,SA01" \
         -i "pandas.Series.add PR07" \
-        -i "pandas.Series.at_time PR01" \
         -i "pandas.Series.backfill PR01,SA01" \
         -i "pandas.Series.case_when RT03" \
         -i "pandas.Series.cat PR07,SA01" \
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 121f49cb7d1cf..24727bb9d83c1 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8532,6 +8532,8 @@ def at_time(self, time, asof: bool = False, axis: Axis | None = None) -> Self:
         ----------
         time : datetime.time or str
             The values to select.
+        asof : bool, default False
+            This parameter is currently not supported.
         axis : {0 or 'index', 1 or 'columns'}, default 0
             For `Series` this parameter is unused and defaults to 0.
 

From 66cfd806144f001f460679f9322c3c0b7d335685 Mon Sep 17 00:00:00 2001
From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com>
Date: Tue, 30 Apr 2024 13:57:25 -0400
Subject: [PATCH 085/100] Fix PR07,RT03,SA01 errors for Index.append,
 Index.difference (#58453)

* Fix errors for Index.append

* Fixed errors for Index.difference
---
 ci/code_checks.sh           |  2 --
 pandas/core/indexes/base.py | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index af432dcd64c82..f49bfb1581332 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -87,8 +87,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
-        -i "pandas.Index.append PR07,RT03,SA01" \
-        -i "pandas.Index.difference PR07,RT03,SA01" \
         -i "pandas.Index.get_indexer PR07,SA01" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index baa8a7493a030..054f522e7a37b 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3297,6 +3297,8 @@ def difference(self, other, sort=None):
         Parameters
         ----------
         other : Index or array-like
+            Index object or an array-like object containing elements to be compared
+            with the elements of the original Index.
         sort : bool or None, default None
             Whether to sort the resulting index. By default, the
             values are attempted to be sorted, but any TypeError from
@@ -3310,6 +3312,14 @@ def difference(self, other, sort=None):
         Returns
         -------
         Index
+            Returns a new Index object containing elements that are in the original
+            Index but not in the `other` Index.
+
+        See Also
+        --------
+        Index.symmetric_difference : Compute the symmetric difference of two Index
+            objects.
+        Index.intersection : Form the intersection of two Index objects.
 
         Examples
         --------
@@ -5192,10 +5202,18 @@ def append(self, other: Index | Sequence[Index]) -> Index:
         Parameters
         ----------
         other : Index or list/tuple of indices
+            Single Index or a collection of indices, which can be either a list or a
+            tuple.
 
         Returns
         -------
         Index
+            Returns a new Index object resulting from appending the provided other
+            indices to the original Index.
+
+        See Also
+        --------
+        Index.insert : Make new Index inserting new item at location.
 
         Examples
         --------

From 0f9adf86858a428ff7fc63d3b48f6dbc8321ba52 Mon Sep 17 00:00:00 2001
From: Khor Chean Wei <cheanwei.KCW@gmail.com>
Date: Wed, 1 May 2024 01:58:11 +0800
Subject: [PATCH 086/100] ENH: Allow parameter  min_periods in
 DataFrame.corrwith() (#58231)

* Testing

* Testing

* enhance test case

* add test

* testing

* add

* add test

* enhance

* add

* add

* add

* add

* add

* add

* enhance

* enhance

* enhance

* Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* test

* Update test_cov_corr.py

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
---
 doc/source/whatsnew/v3.0.0.rst              |  1 +
 pandas/core/frame.py                        |  9 +++++++-
 pandas/tests/frame/methods/test_cov_corr.py | 25 +++++++++++++++++++++
 pandas/tests/groupby/test_api.py            |  2 ++
 4 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index afe63b6785524..1fc2f1041e2ea 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -39,6 +39,7 @@ Other enhancements
 - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
 - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
+- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
 - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 653b07b6e27ed..3d2a6093464a9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -11132,6 +11132,7 @@ def corrwith(
         drop: bool = False,
         method: CorrelationMethod = "pearson",
         numeric_only: bool = False,
+        min_periods: int | None = None,
     ) -> Series:
         """
         Compute pairwise correlation.
@@ -11162,6 +11163,9 @@ def corrwith(
         numeric_only : bool, default False
             Include only `float`, `int` or `boolean` data.
 
+        min_periods : int, optional
+            Minimum number of observations needed to have a valid result.
+
             .. versionadded:: 1.5.0
 
             .. versionchanged:: 2.0.0
@@ -11205,7 +11209,10 @@ def corrwith(
         this = self._get_numeric_data() if numeric_only else self
 
         if isinstance(other, Series):
-            return this.apply(lambda x: other.corr(x, method=method), axis=axis)
+            return this.apply(
+                lambda x: other.corr(x, method=method, min_periods=min_periods),
+                axis=axis,
+            )
 
         if numeric_only:
             other = other._get_numeric_data()
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index 4d2d83d25e8da..53aa44f264c7a 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -461,3 +461,28 @@ def test_corrwith_spearman_with_tied_data(self):
         result = df_bool.corrwith(ser_bool)
         expected = Series([0.57735, 0.57735], index=["A", "B"])
         tm.assert_series_equal(result, expected)
+
+    def test_corrwith_min_periods_method(self):
+        # GH#9490
+        pytest.importorskip("scipy")
+        df1 = DataFrame(
+            {
+                "A": [1, np.nan, 7, 8],
+                "B": [False, True, True, False],
+                "C": [10, 4, 9, 3],
+            }
+        )
+        df2 = df1[["B", "C"]]
+        result = (df1 + 1).corrwith(df2.B, method="spearman", min_periods=2)
+        expected = Series([0.0, 1.0, 0.0], index=["A", "B", "C"])
+        tm.assert_series_equal(result, expected)
+
+    def test_corrwith_min_periods_boolean(self):
+        # GH#9490
+        df_bool = DataFrame(
+            {"A": [True, True, False, False], "B": [True, False, False, True]}
+        )
+        ser_bool = Series([True, True, False, True])
+        result = df_bool.corrwith(ser_bool, min_periods=3)
+        expected = Series([0.57735, 0.57735], index=["A", "B"])
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py
index d2cfa530e7c65..33b39bad4ab81 100644
--- a/pandas/tests/groupby/test_api.py
+++ b/pandas/tests/groupby/test_api.py
@@ -192,6 +192,8 @@ def test_frame_consistency(groupby_func):
         exclude_expected = {"numeric_only"}
     elif groupby_func in ("quantile",):
         exclude_expected = {"method", "axis"}
+    elif groupby_func in ["corrwith"]:
+        exclude_expected = {"min_periods"}
     if groupby_func not in ["pct_change", "size"]:
         exclude_expected |= {"axis"}
 

From 086b047242e8f2a1a2a8d5f7851cecb528eb4785 Mon Sep 17 00:00:00 2001
From: Gianluca Ficarelli <26835404+GianlucaFicarelli@users.noreply.github.com>
Date: Tue, 30 Apr 2024 20:25:26 +0200
Subject: [PATCH 087/100] PERF: MultiIndex._engine use smaller dtypes (#58411)

* PERF: MultiIndex._engine use smaller dtypes

* Move offsets downcasting to MultiIndex._engine

* Remove unused import uint64_t
---
 doc/source/whatsnew/v3.0.0.rst              |   1 +
 pandas/_libs/index.pyi                      |   4 +-
 pandas/_libs/index.pyx                      |  48 +++++++--
 pandas/core/indexes/multi.py                | 110 +++++++++-----------
 pandas/tests/indexes/multi/test_indexing.py |  41 +++++---
 5 files changed, 114 insertions(+), 90 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 1fc2f1041e2ea..ce9022bdc2967 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -336,6 +336,7 @@ Performance improvements
 - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
 - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
 - Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`)
+- Performance improvement in :meth:`MultiIndex._engine` to use smaller dtypes if possible (:issue:`58411`)
 - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
 - Performance improvement in :meth:`MultiIndex.memory_usage` to ignore the index engine when it isn't already cached. (:issue:`58385`)
 - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
index 12a5bf245977e..bf6d8ba8973d3 100644
--- a/pandas/_libs/index.pyi
+++ b/pandas/_libs/index.pyi
@@ -74,13 +74,13 @@ class MaskedBoolEngine(MaskedUInt8Engine): ...
 
 class BaseMultiIndexCodesEngine:
     levels: list[np.ndarray]
-    offsets: np.ndarray  # ndarray[uint64_t, ndim=1]
+    offsets: np.ndarray  # np.ndarray[..., ndim=1]
 
     def __init__(
         self,
         levels: list[Index],  # all entries hashable
         labels: list[np.ndarray],  # all entries integer-dtyped
-        offsets: np.ndarray,  # np.ndarray[np.uint64, ndim=1]
+        offsets: np.ndarray,  # np.ndarray[..., ndim=1]
     ) -> None: ...
     def get_indexer(self, target: npt.NDArray[np.object_]) -> npt.NDArray[np.intp]: ...
     def _extract_level_codes(self, target: MultiIndex) -> np.ndarray: ...
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index a700074d46ba8..f1be8d97c71eb 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -9,7 +9,6 @@ from numpy cimport (
     intp_t,
     ndarray,
     uint8_t,
-    uint64_t,
 )
 
 cnp.import_array()
@@ -699,8 +698,7 @@ cdef class BaseMultiIndexCodesEngine:
     Keys are located by first locating each component against the respective
     level, then locating (the integer representation of) codes.
     """
-    def __init__(self, object levels, object labels,
-                 ndarray[uint64_t, ndim=1] offsets):
+    def __init__(self, object levels, object labels, ndarray offsets):
         """
         Parameters
         ----------
@@ -708,7 +706,7 @@ cdef class BaseMultiIndexCodesEngine:
             Levels of the MultiIndex.
         labels : list-like of numpy arrays of integer dtype
             Labels of the MultiIndex.
-        offsets : numpy array of uint64 dtype
+        offsets : numpy array of int dtype
             Pre-calculated offsets, one for each level of the index.
         """
         self.levels = levels
@@ -718,8 +716,9 @@ cdef class BaseMultiIndexCodesEngine:
         # with positive integers (-1 for NaN becomes 1). This enables us to
         # differentiate between values that are missing in other and matching
         # NaNs. We will set values that are not found to 0 later:
-        labels_arr = np.array(labels, dtype="int64").T + multiindex_nulls_shift
-        codes = labels_arr.astype("uint64", copy=False)
+        codes = np.array(labels).T
+        codes += multiindex_nulls_shift  # inplace sum optimisation
+
         self.level_has_nans = [-1 in lab for lab in labels]
 
         # Map each codes combination in the index to an integer unambiguously
@@ -731,8 +730,37 @@ cdef class BaseMultiIndexCodesEngine:
         # integers representing labels: we will use its get_loc and get_indexer
         self._base.__init__(self, lab_ints)
 
-    def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
-        raise NotImplementedError("Implemented by subclass")  # pragma: no cover
+    def _codes_to_ints(self, ndarray codes) -> np.ndarray:
+        """
+        Transform combination(s) of uint in one uint or Python integer (each), in a
+        strictly monotonic way (i.e. respecting the lexicographic order of integer
+        combinations).
+
+        Parameters
+        ----------
+        codes : 1- or 2-dimensional array of dtype uint
+            Combinations of integers (one per row)
+
+        Returns
+        -------
+        scalar or 1-dimensional array, of dtype _codes_dtype
+            Integer(s) representing one combination (each).
+        """
+        # To avoid overflows, first make sure we are working with the right dtype:
+        codes = codes.astype(self._codes_dtype, copy=False)
+
+        # Shift the representation of each level by the pre-calculated number of bits:
+        codes <<= self.offsets  # inplace shift optimisation
+
+        # Now sum and OR are in fact interchangeable. This is a simple
+        # composition of the (disjunct) significant bits of each level (i.e.
+        # each column in "codes") in a single positive integer (per row):
+        if codes.ndim == 1:
+            # Single key
+            return np.bitwise_or.reduce(codes)
+
+        # Multiple keys
+        return np.bitwise_or.reduce(codes, axis=1)
 
     def _extract_level_codes(self, target) -> np.ndarray:
         """
@@ -757,7 +785,7 @@ cdef class BaseMultiIndexCodesEngine:
             codes[codes > 0] += 1
             if self.level_has_nans[i]:
                 codes[target.codes[i] == -1] += 1
-        return self._codes_to_ints(np.array(level_codes, dtype="uint64").T)
+        return self._codes_to_ints(np.array(level_codes, dtype=self._codes_dtype).T)
 
     def get_indexer(self, target: np.ndarray) -> np.ndarray:
         """
@@ -788,7 +816,7 @@ cdef class BaseMultiIndexCodesEngine:
             raise KeyError(key)
 
         # Transform indices into single integer:
-        lab_int = self._codes_to_ints(np.array(indices, dtype="uint64"))
+        lab_int = self._codes_to_ints(np.array(indices, dtype=self._codes_dtype))
 
         return self._base.get_loc(self, lab_int)
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index c8e16fad00d5b..a5bcf49c5490b 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -123,84 +123,56 @@
 )
 
 
-class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):
-    """
-    This class manages a MultiIndex by mapping label combinations to positive
-    integers.
+class MultiIndexUInt64Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):
+    """Manages a MultiIndex by mapping label combinations to positive integers.
+
+    The number of possible label combinations must not overflow the 64 bits integers.
     """
 
     _base = libindex.UInt64Engine
+    _codes_dtype = "uint64"
 
-    def _codes_to_ints(self, codes):
-        """
-        Transform combination(s) of uint64 in one uint64 (each), in a strictly
-        monotonic way (i.e. respecting the lexicographic order of integer
-        combinations): see BaseMultiIndexCodesEngine documentation.
 
-        Parameters
-        ----------
-        codes : 1- or 2-dimensional array of dtype uint64
-            Combinations of integers (one per row)
+class MultiIndexUInt32Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt32Engine):
+    """Manages a MultiIndex by mapping label combinations to positive integers.
 
-        Returns
-        -------
-        scalar or 1-dimensional array, of dtype uint64
-            Integer(s) representing one combination (each).
-        """
-        # Shift the representation of each level by the pre-calculated number
-        # of bits:
-        codes <<= self.offsets
+    The number of possible label combinations must not overflow the 32 bits integers.
+    """
 
-        # Now sum and OR are in fact interchangeable. This is a simple
-        # composition of the (disjunct) significant bits of each level (i.e.
-        # each column in "codes") in a single positive integer:
-        if codes.ndim == 1:
-            # Single key
-            return np.bitwise_or.reduce(codes)
+    _base = libindex.UInt32Engine
+    _codes_dtype = "uint32"
 
-        # Multiple keys
-        return np.bitwise_or.reduce(codes, axis=1)
 
+class MultiIndexUInt16Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt16Engine):
+    """Manages a MultiIndex by mapping label combinations to positive integers.
 
-class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):
-    """
-    This class manages those (extreme) cases in which the number of possible
-    label combinations overflows the 64 bits integers, and uses an ObjectEngine
-    containing Python integers.
+    The number of possible label combinations must not overflow the 16 bits integers.
     """
 
-    _base = libindex.ObjectEngine
+    _base = libindex.UInt16Engine
+    _codes_dtype = "uint16"
 
-    def _codes_to_ints(self, codes):
-        """
-        Transform combination(s) of uint64 in one Python integer (each), in a
-        strictly monotonic way (i.e. respecting the lexicographic order of
-        integer combinations): see BaseMultiIndexCodesEngine documentation.
 
-        Parameters
-        ----------
-        codes : 1- or 2-dimensional array of dtype uint64
-            Combinations of integers (one per row)
+class MultiIndexUInt8Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt8Engine):
+    """Manages a MultiIndex by mapping label combinations to positive integers.
 
-        Returns
-        -------
-        int, or 1-dimensional array of dtype object
-            Integer(s) representing one combination (each).
-        """
-        # Shift the representation of each level by the pre-calculated number
-        # of bits. Since this can overflow uint64, first make sure we are
-        # working with Python integers:
-        codes = codes.astype("object") << self.offsets
+    The number of possible label combinations must not overflow the 8 bits integers.
+    """
 
-        # Now sum and OR are in fact interchangeable. This is a simple
-        # composition of the (disjunct) significant bits of each level (i.e.
-        # each column in "codes") in a single positive integer (per row):
-        if codes.ndim == 1:
-            # Single key
-            return np.bitwise_or.reduce(codes)
+    _base = libindex.UInt8Engine
+    _codes_dtype = "uint8"
 
-        # Multiple keys
-        return np.bitwise_or.reduce(codes, axis=1)
+
+class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):
+    """Manages a MultiIndex by mapping label combinations to positive integers.
+
+    This class manages those (extreme) cases in which the number of possible
+    label combinations overflows the 64 bits integers, and uses an ObjectEngine
+    containing Python integers.
+    """
+
+    _base = libindex.ObjectEngine
+    _codes_dtype = "object"
 
 
 def names_compat(meth: F) -> F:
@@ -1229,13 +1201,25 @@ def _engine(self):
         # equivalent to sorting lexicographically the codes themselves. Notice
         # that each level needs to be shifted by the number of bits needed to
         # represent the _previous_ ones:
-        offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")
+        offsets = np.concatenate([lev_bits[1:], [0]])
+        # Downcast the type if possible, to prevent upcasting when shifting codes:
+        offsets = offsets.astype(np.min_scalar_type(int(offsets[0])))
 
         # Check the total number of bits needed for our representation:
         if lev_bits[0] > 64:
             # The levels would overflow a 64 bit uint - use Python integers:
             return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
-        return MultiIndexUIntEngine(self.levels, self.codes, offsets)
+        if lev_bits[0] > 32:
+            # The levels would overflow a 32 bit uint - use uint64
+            return MultiIndexUInt64Engine(self.levels, self.codes, offsets)
+        if lev_bits[0] > 16:
+            # The levels would overflow a 16 bit uint - use uint8
+            return MultiIndexUInt32Engine(self.levels, self.codes, offsets)
+        if lev_bits[0] > 8:
+            # The levels would overflow a 8 bit uint - use uint16
+            return MultiIndexUInt16Engine(self.levels, self.codes, offsets)
+        # The levels fit in an 8 bit uint - use uint8
+        return MultiIndexUInt8Engine(self.levels, self.codes, offsets)
 
     # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return
     # type "Type[MultiIndex]" in supertype "Index"
diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py
index 18d64999de496..f08a7625e7f8a 100644
--- a/pandas/tests/indexes/multi/test_indexing.py
+++ b/pandas/tests/indexes/multi/test_indexing.py
@@ -919,30 +919,41 @@ def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_id
     assert result == expected
 
 
-def test_pyint_engine():
+@pytest.mark.parametrize(
+    "N, expected_dtype",
+    [
+        (1, "uint8"),  # 2*4*N = 8
+        (2, "uint16"),  # 2*4*N = 16
+        (4, "uint32"),  # 2*4*N = 32
+        (8, "uint64"),  # 2*4*N = 64
+        (10, "object"),  # 2*4*N = 80
+    ],
+)
+def test_pyint_engine(N, expected_dtype):
     # GH#18519 : when combinations of codes cannot be represented in 64
     # bits, the index underlying the MultiIndex engine works with Python
     # integers, rather than uint64.
-    N = 5
     keys = [
         tuple(arr)
         for arr in [
-            [0] * 10 * N,
-            [1] * 10 * N,
-            [2] * 10 * N,
-            [np.nan] * N + [2] * 9 * N,
-            [0] * N + [2] * 9 * N,
-            [np.nan] * N + [2] * 8 * N + [0] * N,
+            [0] * 4 * N,
+            [1] * 4 * N,
+            [np.nan] * N + [0] * 3 * N,
+            [0] * N + [1] * 3 * N,
+            [np.nan] * N + [1] * 2 * N + [0] * N,
         ]
     ]
-    # Each level contains 4 elements (including NaN), so it is represented
-    # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a
-    # 64 bit engine and truncating the first levels, the fourth and fifth
-    # keys would collide; if truncating the last levels, the fifth and
-    # sixth; if rotating bits rather than shifting, the third and fifth.
+    # Each level contains 3 elements (NaN, 0, 1), and it's represented
+    # in 2 bits to store 4 possible values (0=notfound, 1=NaN, 2=0, 3=1), for
+    # a total of 2*N*4 = 80 > 64 bits where N=10 and the number of levels is N*4.
+    # If we were using a 64 bit engine and truncating the first levels, the
+    # fourth and fifth keys would collide; if truncating the last levels, the
+    # fifth and sixth; if rotating bits rather than shifting, the third and fifth.
+
+    index = MultiIndex.from_tuples(keys)
+    assert index._engine.values.dtype == expected_dtype
 
     for idx, key_value in enumerate(keys):
-        index = MultiIndex.from_tuples(keys)
         assert index.get_loc(key_value) == idx
 
         expected = np.arange(idx + 1, dtype=np.intp)
@@ -952,7 +963,7 @@ def test_pyint_engine():
     # With missing key:
     idces = range(len(keys))
     expected = np.array([-1] + list(idces), dtype=np.intp)
-    missing = tuple([0, 1] * 5 * N)
+    missing = tuple([0, 1, 0, 1] * N)
     result = index.get_indexer([missing] + [keys[i] for i in idces])
     tm.assert_numpy_array_equal(result, expected)
 

From 59f6a3373751bd6b8e257066e33fdc3c618030ea Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Tue, 30 Apr 2024 22:02:25 +0200
Subject: [PATCH 088/100] BUG: hashing read only object categories raises
 (#58481)

---
 pandas/_libs/hashing.pyx                      | 3 ++-
 pandas/tests/arrays/categorical/test_algos.py | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
index a9bf784d5f973..a1fd70529efa7 100644
--- a/pandas/_libs/hashing.pyx
+++ b/pandas/_libs/hashing.pyx
@@ -11,6 +11,7 @@ import numpy as np
 
 from numpy cimport (
     import_array,
+    ndarray,
     uint8_t,
     uint64_t,
 )
@@ -22,7 +23,7 @@ from pandas._libs.util cimport is_nan
 
 @cython.boundscheck(False)
 def hash_object_array(
-    object[:] arr, str key, str encoding="utf8"
+    ndarray[object, ndim=1] arr, str key, str encoding="utf8"
 ) -> np.ndarray[np.uint64]:
     """
     Parameters
diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py
index 69c3364c7e98e..a7d0becc30dd9 100644
--- a/pandas/tests/arrays/categorical/test_algos.py
+++ b/pandas/tests/arrays/categorical/test_algos.py
@@ -86,3 +86,11 @@ def test_diff():
     df = ser.to_frame(name="A")
     with pytest.raises(TypeError, match=msg):
         df.diff()
+
+
+def test_hash_read_only_categorical():
+    # GH#58481
+    idx = pd.Index(pd.Index(["a", "b", "c"], dtype="object").values)
+    cat = pd.CategoricalDtype(idx)
+    arr = pd.Series(["a", "b"], dtype=cat).values
+    assert hash(arr.dtype) == hash(arr.dtype)

From b804d9efda0392a9817f3350e6a8deb2d3c801a5 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 1 May 2024 22:22:21 +0530
Subject: [PATCH 089/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.get_indexer (#58506)

* DOC: add PR07,SA01 in pandas.Index.get_indexer

* DOC: remove PR07,SA01 in pandas.Index.get_indexer

* DOC: remove PR07,SA01 in pandas.IntervalIndex.get_indexer

* DOC: remove PR07,SA01 in pandas.MultiIndex.get_indexer
---
 ci/code_checks.sh           | 3 ---
 pandas/core/indexes/base.py | 7 +++++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index f49bfb1581332..91335719cf303 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -87,7 +87,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
-        -i "pandas.Index.get_indexer PR07,SA01" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
         -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
@@ -109,7 +108,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.IntervalDtype.subtype SA01" \
         -i "pandas.IntervalIndex.closed SA01" \
         -i "pandas.IntervalIndex.contains RT03" \
-        -i "pandas.IntervalIndex.get_indexer PR07,SA01" \
         -i "pandas.IntervalIndex.get_loc PR07,RT03,SA01" \
         -i "pandas.IntervalIndex.is_non_overlapping_monotonic SA01" \
         -i "pandas.IntervalIndex.left GL08" \
@@ -123,7 +121,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.MultiIndex.copy PR07,RT03,SA01" \
         -i "pandas.MultiIndex.drop PR07,RT03,SA01" \
         -i "pandas.MultiIndex.dtypes SA01" \
-        -i "pandas.MultiIndex.get_indexer PR07,SA01" \
         -i "pandas.MultiIndex.get_level_values SA01" \
         -i "pandas.MultiIndex.get_loc PR07" \
         -i "pandas.MultiIndex.get_loc_level PR07" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 054f522e7a37b..46da27e216986 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3543,6 +3543,7 @@ def get_indexer(
         Parameters
         ----------
         target : Index
+            An iterable containing the values to be used for computing indexer.
         method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
             * default: exact matches only.
             * pad / ffill: find the PREVIOUS index value if no exact match.
@@ -3570,6 +3571,12 @@ def get_indexer(
             positions matches the corresponding target values. Missing values
             in the target are marked by -1.
 
+        See Also
+        --------
+        Index.get_indexer_for : Returns an indexer even when non-unique.
+        Index.get_non_unique : Returns indexer and masks for new index given
+            the current index.
+
         Notes
         -----
         Returns -1 for unmatched values, for further explanation see the

From 66daafc734873d0dac917f616ccc7e045becf153 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 1 May 2024 22:24:50 +0530
Subject: [PATCH 090/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.get_indexer_non_unique (#58508)

* DOC: remove PR07,SA01 in pandas.Index.get_indexer_non_unique

* DOC: remove PR07,SA01 in pandas.Index.get_indexer_non_unique
---
 ci/code_checks.sh           | 1 -
 pandas/core/indexes/base.py | 7 +++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 91335719cf303..8081efd008147 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -88,7 +88,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
         -i "pandas.Index.get_indexer_for PR01,SA01" \
-        -i "pandas.Index.get_indexer_non_unique PR07,SA01" \
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.names GL08" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 46da27e216986..93be22ca7d5f8 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5850,6 +5850,7 @@ def _should_fallback_to_positional(self) -> bool:
         Parameters
         ----------
         target : %(target_klass)s
+            An iterable containing the values to be used for computing indexer.
 
         Returns
         -------
@@ -5861,6 +5862,12 @@ def _should_fallback_to_positional(self) -> bool:
             An indexer into the target of the values not found.
             These correspond to the -1 in the indexer array.
 
+        See Also
+        --------
+        Index.get_indexer : Computes indexer and mask for new index given
+            the current index.
+        Index.get_indexer_for : Returns an indexer even when non-unique.
+
         Examples
         --------
         >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])

From 7257a89b5e4c6be3eb92c596f4a85956a91de24a Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 1 May 2024 22:28:41 +0530
Subject: [PATCH 091/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.putmask (#58510)

* DOC: add PR01,RT03 in pandas.Index.putmask

* DOC: remove PR01,RT03 in pandas.Index.putmask
---
 ci/code_checks.sh           |  1 -
 pandas/core/indexes/base.py | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 8081efd008147..a3db2559315d0 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -91,7 +91,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.names GL08" \
-        -i "pandas.Index.putmask PR01,RT03" \
         -i "pandas.Index.ravel PR01,RT03" \
         -i "pandas.Index.str PR01,SA01" \
         -i "pandas.Int16Dtype SA01" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 93be22ca7d5f8..3952503581bba 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5260,9 +5260,19 @@ def putmask(self, mask, value) -> Index:
         """
         Return a new Index of the values set with the mask.
 
+        Parameters
+        ----------
+        mask : np.ndarray[bool]
+            Array of booleans denoting where values in the original
+            data are not ``NA``.
+        value : scalar
+            Scalar value to use to fill holes (e.g. 0).
+            This value cannot be a list-likes.
+
         Returns
         -------
         Index
+            A new Index of the values set with the mask.
 
         See Also
         --------

From 439526c9d434c520a3a977178949285042b6d773 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 1 May 2024 22:33:12 +0530
Subject: [PATCH 092/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DataFrame.prod (#58512)

* DOC: add RT03 in pandas.DataFrame.prod

* DOC: remove RT03 in pandas.DataFrame.prod

* DOC: remove RT03 in pandas.DataFrame.product

* DOC: add RT03 in pandas.DataFrame.prod
---
 ci/code_checks.sh    |  2 --
 pandas/core/frame.py | 68 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index a3db2559315d0..10e36be9c3efc 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -75,8 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.median RT03,SA01" \
         -i "pandas.DataFrame.min RT03" \
         -i "pandas.DataFrame.plot PR02,SA01" \
-        -i "pandas.DataFrame.prod RT03" \
-        -i "pandas.DataFrame.product RT03" \
         -i "pandas.DataFrame.sem PR01,RT03,SA01" \
         -i "pandas.DataFrame.skew RT03,SA01" \
         -i "pandas.DataFrame.sparse PR01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 3d2a6093464a9..8bb0608e0bcd5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -11730,7 +11730,6 @@ def sum(
         return result
 
     @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod")
-    @doc(make_doc("prod", ndim=2))
     def prod(
         self,
         axis: Axis | None = 0,
@@ -11739,6 +11738,73 @@ def prod(
         min_count: int = 0,
         **kwargs,
     ) -> Series:
+        """
+        Return the product of the values over the requested axis.
+
+        Parameters
+        ----------
+        axis : {index (0), columns (1)}
+            Axis for the function to be applied on.
+            For `Series` this parameter is unused and defaults to 0.
+
+            .. warning::
+
+                The behavior of DataFrame.prod with ``axis=None`` is deprecated,
+                in a future version this will reduce over both axes and return a scalar
+                To retain the old behavior, pass axis=0 (or do not pass axis).
+
+            .. versionadded:: 2.0.0
+
+        skipna : bool, default True
+            Exclude NA/null values when computing the result.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. Not implemented for Series.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If fewer than
+            ``min_count`` non-NA values are present the result will be NA.
+        **kwargs
+            Additional keyword arguments to be passed to the function.
+
+        Returns
+        -------
+        Series or scalar
+            The product of the values over the requested axis.
+
+        See Also
+        --------
+        Series.sum : Return the sum.
+        Series.min : Return the minimum.
+        Series.max : Return the maximum.
+        Series.idxmin : Return the index of the minimum.
+        Series.idxmax : Return the index of the maximum.
+        DataFrame.sum : Return the sum over the requested axis.
+        DataFrame.min : Return the minimum over the requested axis.
+        DataFrame.max : Return the maximum over the requested axis.
+        DataFrame.idxmin : Return the index of the minimum over the requested axis.
+        DataFrame.idxmax : Return the index of the maximum over the requested axis.
+
+        Examples
+        --------
+        By default, the product of an empty or all-NA Series is ``1``
+
+        >>> pd.Series([], dtype="float64").prod()
+        1.0
+
+        This can be controlled with the ``min_count`` parameter
+
+        >>> pd.Series([], dtype="float64").prod(min_count=1)
+        nan
+
+        Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
+        empty series identically.
+
+        >>> pd.Series([np.nan]).prod()
+        1.0
+
+        >>> pd.Series([np.nan]).prod(min_count=1)
+        nan
+        """
         result = super().prod(
             axis=axis,
             skipna=skipna,

From 8cc036ccf1c568e9077a29539edfa15028ab5ec1 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Wed, 1 May 2024 22:33:53 +0530
Subject: [PATCH 093/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DataFrame.skew (#58514)

DOC: remove RT03,SA01 in pandas.DataFrame.skew
---
 ci/code_checks.sh    |  1 -
 pandas/core/frame.py | 75 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 10e36be9c3efc..7e4c7cb527a62 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -76,7 +76,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.min RT03" \
         -i "pandas.DataFrame.plot PR02,SA01" \
         -i "pandas.DataFrame.sem PR01,RT03,SA01" \
-        -i "pandas.DataFrame.skew RT03,SA01" \
         -i "pandas.DataFrame.sparse PR01" \
         -i "pandas.DataFrame.std PR01,RT03,SA01" \
         -i "pandas.DataFrame.sum RT03" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8bb0608e0bcd5..88e4d695b8328 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -12095,7 +12095,6 @@ def skew(
     ) -> Series | Any: ...
 
     @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew")
-    @doc(make_doc("skew", ndim=2))
     def skew(
         self,
         axis: Axis | None = 0,
@@ -12103,6 +12102,80 @@ def skew(
         numeric_only: bool = False,
         **kwargs,
     ) -> Series | Any:
+        """
+        Return unbiased skew over requested axis.
+
+        Normalized by N-1.
+
+        Parameters
+        ----------
+        axis : {index (0), columns (1)}
+            Axis for the function to be applied on.
+            For `Series` this parameter is unused and defaults to 0.
+
+            For DataFrames, specifying ``axis=None`` will apply the aggregation
+            across both axes.
+
+            .. versionadded:: 2.0.0
+
+        skipna : bool, default True
+            Exclude NA/null values when computing the result.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        **kwargs
+            Additional keyword arguments to be passed to the function.
+
+        Returns
+        -------
+        Series or scalar
+            Unbiased skew over requested axis.
+
+        See Also
+        --------
+        Dataframe.kurt : Returns unbiased kurtosis over requested axis.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3])
+        >>> s.skew()
+        0.0
+
+        With a DataFrame
+
+        >>> df = pd.DataFrame(
+        ...     {"a": [1, 2, 3], "b": [2, 3, 4], "c": [1, 3, 5]},
+        ...     index=["tiger", "zebra", "cow"],
+        ... )
+        >>> df
+                a   b   c
+        tiger   1   2   1
+        zebra   2   3   3
+        cow     3   4   5
+        >>> df.skew()
+        a   0.0
+        b   0.0
+        c   0.0
+        dtype: float64
+
+        Using axis=1
+
+        >>> df.skew(axis=1)
+        tiger   1.732051
+        zebra  -1.732051
+        cow     0.000000
+        dtype: float64
+
+        In this case, `numeric_only` should be set to `True` to avoid
+        getting an error.
+
+        >>> df = pd.DataFrame(
+        ...     {"a": [1, 2, 3], "b": ["T", "Z", "X"]}, index=["tiger", "zebra", "cow"]
+        ... )
+        >>> df.skew(numeric_only=True)
+        a   0.0
+        dtype: float64
+        """
         result = super().skew(
             axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
         )

From d27670976c862e1039d954caad0b6388014b694a Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 2 May 2024 01:02:39 +0530
Subject: [PATCH 094/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.get_indexer_for (#58507)

* DOC: add PR01,SA01 in pandas.Index.get_indexer_for

* DOC: remove PR01,SA01 in pandas.Index.get_indexer_for
---
 ci/code_checks.sh           |  1 -
 pandas/core/indexes/base.py | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 7e4c7cb527a62..da0d98eff5e46 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
-        -i "pandas.Index.get_indexer_for PR01,SA01" \
         -i "pandas.Index.get_loc PR07,RT03,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.names GL08" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3952503581bba..048362a28dfd7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5953,11 +5953,23 @@ def get_indexer_for(self, target) -> npt.NDArray[np.intp]:
         This dispatches to get_indexer or get_indexer_non_unique
         as appropriate.
 
+        Parameters
+        ----------
+        target : Index
+            An iterable containing the values to be used for computing indexer.
+
         Returns
         -------
         np.ndarray[np.intp]
             List of indices.
 
+        See Also
+        --------
+        Index.get_indexer : Computes indexer and mask for new index given
+            the current index.
+        Index.get_non_unique : Returns indexer and masks for new index given
+            the current index.
+
         Examples
         --------
         >>> idx = pd.Index([np.nan, "var1", np.nan])

From 33baa453e5a33cf704c9a9fb7e677e438f5fa1dc Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 2 May 2024 01:08:51 +0530
Subject: [PATCH 095/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DataFrame.sparse (#58515)

* DOC: remove PR01 in pandas.DataFrame.sparse

* DOC: remove PR01 in pandas.DataFrame.sparse
---
 ci/code_checks.sh                     | 1 -
 pandas/core/arrays/sparse/accessor.py | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index da0d98eff5e46..8364314ca55be 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -76,7 +76,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.min RT03" \
         -i "pandas.DataFrame.plot PR02,SA01" \
         -i "pandas.DataFrame.sem PR01,RT03,SA01" \
-        -i "pandas.DataFrame.sparse PR01" \
         -i "pandas.DataFrame.std PR01,RT03,SA01" \
         -i "pandas.DataFrame.sum RT03" \
         -i "pandas.DataFrame.swaplevel SA01" \
diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
index 1f82285e3e40e..6a1c25711acb0 100644
--- a/pandas/core/arrays/sparse/accessor.py
+++ b/pandas/core/arrays/sparse/accessor.py
@@ -243,6 +243,11 @@ class SparseFrameAccessor(BaseAccessor, PandasDelegate):
     """
     DataFrame accessor for sparse data.
 
+    Parameters
+    ----------
+    data : scipy.sparse.spmatrix
+        Must be convertible to csc format.
+
     See Also
     --------
     DataFrame.sparse.density : Ratio of non-sparse points to total (dense) data points.

From 7320430af7d1e19fbb4eb9e447ef270848495729 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 2 May 2024 01:09:28 +0530
Subject: [PATCH 096/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Int (#58511)

* DOC: add SA01 for Int16Dtype,Int32Dtype,Int64Dtype,Int8Dtype

* DOC: remove SA01 for Int16Dtype,Int32Dtype,Int64Dtype,Int8Dtype

* DOC: remove SA01 for Int16Dtype,Int32Dtype,Int64Dtype,Int8Dtype

* DOC: change description to n-bit nullable integer type
---
 ci/code_checks.sh             | 8 --------
 pandas/core/arrays/integer.py | 7 +++++++
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 8364314ca55be..cde9f9dd43280 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -88,10 +88,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.names GL08" \
         -i "pandas.Index.ravel PR01,RT03" \
         -i "pandas.Index.str PR01,SA01" \
-        -i "pandas.Int16Dtype SA01" \
-        -i "pandas.Int32Dtype SA01" \
-        -i "pandas.Int64Dtype SA01" \
-        -i "pandas.Int8Dtype SA01" \
         -i "pandas.Interval PR02" \
         -i "pandas.Interval.closed SA01" \
         -i "pandas.Interval.left SA01" \
@@ -391,10 +387,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.weekday SA01" \
         -i "pandas.Timestamp.weekofyear SA01" \
         -i "pandas.Timestamp.year GL08" \
-        -i "pandas.UInt16Dtype SA01" \
-        -i "pandas.UInt32Dtype SA01" \
-        -i "pandas.UInt64Dtype SA01" \
-        -i "pandas.UInt8Dtype SA01" \
         -i "pandas.api.extensions.ExtensionArray SA01" \
         -i "pandas.api.extensions.ExtensionArray._accumulate RT03,SA01" \
         -i "pandas.api.extensions.ExtensionArray._concat_same_type PR07,SA01" \
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 21a9b09227663..f85fbd062b0c3 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -144,6 +144,13 @@ class IntegerArray(NumericArray):
 -------
 None
 
+See Also
+--------
+Int8Dtype : 8-bit nullable integer type.
+Int16Dtype : 16-bit nullable integer type.
+Int32Dtype : 32-bit nullable integer type.
+Int64Dtype : 64-bit nullable integer type.
+
 Examples
 --------
 For Int8Dtype:

From f6932cb8c538e89d231bbd10b4b422f8b3d41f39 Mon Sep 17 00:00:00 2001
From: iangainey <109095667+iangainey@users.noreply.github.com>
Date: Wed, 1 May 2024 17:01:14 -0400
Subject: [PATCH 097/100] REF: Read excel parse refactor (#58497)

---
 pandas/io/excel/_base.py | 304 +++++++++++++++++++++++----------------
 1 file changed, 178 insertions(+), 126 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 2b35cfa044ae9..6063ac098a4dc 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -780,143 +780,195 @@ def parse(
                 output[asheetname] = DataFrame()
                 continue
 
-            is_list_header = False
-            is_len_one_list_header = False
-            if is_list_like(header):
-                assert isinstance(header, Sequence)
-                is_list_header = True
-                if len(header) == 1:
-                    is_len_one_list_header = True
-
-            if is_len_one_list_header:
-                header = cast(Sequence[int], header)[0]
-
-            # forward fill and pull out names for MultiIndex column
-            header_names = None
-            if header is not None and is_list_like(header):
-                assert isinstance(header, Sequence)
-
-                header_names = []
-                control_row = [True] * len(data[0])
-
-                for row in header:
-                    if is_integer(skiprows):
-                        assert isinstance(skiprows, int)
-                        row += skiprows
-
-                    if row > len(data) - 1:
-                        raise ValueError(
-                            f"header index {row} exceeds maximum index "
-                            f"{len(data) - 1} of data.",
-                        )
-
-                    data[row], control_row = fill_mi_header(data[row], control_row)
-
-                    if index_col is not None:
-                        header_name, _ = pop_header_name(data[row], index_col)
-                        header_names.append(header_name)
-
-            # If there is a MultiIndex header and an index then there is also
-            # a row containing just the index name(s)
-            has_index_names = False
-            if is_list_header and not is_len_one_list_header and index_col is not None:
-                index_col_list: Sequence[int]
-                if isinstance(index_col, int):
-                    index_col_list = [index_col]
-                else:
-                    assert isinstance(index_col, Sequence)
-                    index_col_list = index_col
-
-                # We have to handle mi without names. If any of the entries in the data
-                # columns are not empty, this is a regular row
-                assert isinstance(header, Sequence)
-                if len(header) < len(data):
-                    potential_index_names = data[len(header)]
-                    potential_data = [
-                        x
-                        for i, x in enumerate(potential_index_names)
-                        if not control_row[i] and i not in index_col_list
-                    ]
-                    has_index_names = all(x == "" or x is None for x in potential_data)
-
-            if is_list_like(index_col):
-                # Forward fill values for MultiIndex index.
-                if header is None:
-                    offset = 0
-                elif isinstance(header, int):
-                    offset = 1 + header
-                else:
-                    offset = 1 + max(header)
+            output = self._parse_sheet(
+                data=data,
+                output=output,
+                asheetname=asheetname,
+                header=header,
+                names=names,
+                index_col=index_col,
+                usecols=usecols,
+                dtype=dtype,
+                skiprows=skiprows,
+                nrows=nrows,
+                true_values=true_values,
+                false_values=false_values,
+                na_values=na_values,
+                parse_dates=parse_dates,
+                date_parser=date_parser,
+                date_format=date_format,
+                thousands=thousands,
+                decimal=decimal,
+                comment=comment,
+                skipfooter=skipfooter,
+                dtype_backend=dtype_backend,
+                **kwds,
+            )
 
-                # GH34673: if MultiIndex names present and not defined in the header,
-                # offset needs to be incremented so that forward filling starts
-                # from the first MI value instead of the name
-                if has_index_names:
-                    offset += 1
+        if last_sheetname is None:
+            raise ValueError("Sheet name is an empty list")
 
-                # Check if we have an empty dataset
-                # before trying to collect data.
-                if offset < len(data):
-                    assert isinstance(index_col, Sequence)
+        if ret_dict:
+            return output
+        else:
+            return output[last_sheetname]
 
-                    for col in index_col:
-                        last = data[offset][col]
+    def _parse_sheet(
+        self,
+        data: list,
+        output: dict,
+        asheetname: str | int | None = None,
+        header: int | Sequence[int] | None = 0,
+        names: SequenceNotStr[Hashable] | range | None = None,
+        index_col: int | Sequence[int] | None = None,
+        usecols=None,
+        dtype: DtypeArg | None = None,
+        skiprows: Sequence[int] | int | Callable[[int], object] | None = None,
+        nrows: int | None = None,
+        true_values: Iterable[Hashable] | None = None,
+        false_values: Iterable[Hashable] | None = None,
+        na_values=None,
+        parse_dates: list | dict | bool = False,
+        date_parser: Callable | lib.NoDefault = lib.no_default,
+        date_format: dict[Hashable, str] | str | None = None,
+        thousands: str | None = None,
+        decimal: str = ".",
+        comment: str | None = None,
+        skipfooter: int = 0,
+        dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+        **kwds,
+    ):
+        is_list_header = False
+        is_len_one_list_header = False
+        if is_list_like(header):
+            assert isinstance(header, Sequence)
+            is_list_header = True
+            if len(header) == 1:
+                is_len_one_list_header = True
+
+        if is_len_one_list_header:
+            header = cast(Sequence[int], header)[0]
+
+        # forward fill and pull out names for MultiIndex column
+        header_names = None
+        if header is not None and is_list_like(header):
+            assert isinstance(header, Sequence)
+
+            header_names = []
+            control_row = [True] * len(data[0])
+
+            for row in header:
+                if is_integer(skiprows):
+                    assert isinstance(skiprows, int)
+                    row += skiprows
+
+                if row > len(data) - 1:
+                    raise ValueError(
+                        f"header index {row} exceeds maximum index "
+                        f"{len(data) - 1} of data.",
+                    )
 
-                        for row in range(offset + 1, len(data)):
-                            if data[row][col] == "" or data[row][col] is None:
-                                data[row][col] = last
-                            else:
-                                last = data[row][col]
+                data[row], control_row = fill_mi_header(data[row], control_row)
 
-            # GH 12292 : error when read one empty column from excel file
-            try:
-                parser = TextParser(
-                    data,
-                    names=names,
-                    header=header,
-                    index_col=index_col,
-                    has_index_names=has_index_names,
-                    dtype=dtype,
-                    true_values=true_values,
-                    false_values=false_values,
-                    skiprows=skiprows,
-                    nrows=nrows,
-                    na_values=na_values,
-                    skip_blank_lines=False,  # GH 39808
-                    parse_dates=parse_dates,
-                    date_parser=date_parser,
-                    date_format=date_format,
-                    thousands=thousands,
-                    decimal=decimal,
-                    comment=comment,
-                    skipfooter=skipfooter,
-                    usecols=usecols,
-                    dtype_backend=dtype_backend,
-                    **kwds,
-                )
+                if index_col is not None:
+                    header_name, _ = pop_header_name(data[row], index_col)
+                    header_names.append(header_name)
 
-                output[asheetname] = parser.read(nrows=nrows)
+        # If there is a MultiIndex header and an index then there is also
+        # a row containing just the index name(s)
+        has_index_names = False
+        if is_list_header and not is_len_one_list_header and index_col is not None:
+            index_col_list: Sequence[int]
+            if isinstance(index_col, int):
+                index_col_list = [index_col]
+            else:
+                assert isinstance(index_col, Sequence)
+                index_col_list = index_col
+
+            # We have to handle mi without names. If any of the entries in the data
+            # columns are not empty, this is a regular row
+            assert isinstance(header, Sequence)
+            if len(header) < len(data):
+                potential_index_names = data[len(header)]
+                potential_data = [
+                    x
+                    for i, x in enumerate(potential_index_names)
+                    if not control_row[i] and i not in index_col_list
+                ]
+                has_index_names = all(x == "" or x is None for x in potential_data)
+
+        if is_list_like(index_col):
+            # Forward fill values for MultiIndex index.
+            if header is None:
+                offset = 0
+            elif isinstance(header, int):
+                offset = 1 + header
+            else:
+                offset = 1 + max(header)
+
+            # GH34673: if MultiIndex names present and not defined in the header,
+            # offset needs to be incremented so that forward filling starts
+            # from the first MI value instead of the name
+            if has_index_names:
+                offset += 1
+
+            # Check if we have an empty dataset
+            # before trying to collect data.
+            if offset < len(data):
+                assert isinstance(index_col, Sequence)
+
+                for col in index_col:
+                    last = data[offset][col]
+
+                    for row in range(offset + 1, len(data)):
+                        if data[row][col] == "" or data[row][col] is None:
+                            data[row][col] = last
+                        else:
+                            last = data[row][col]
+
+        # GH 12292 : error when read one empty column from excel file
+        try:
+            parser = TextParser(
+                data,
+                names=names,
+                header=header,
+                index_col=index_col,
+                has_index_names=has_index_names,
+                dtype=dtype,
+                true_values=true_values,
+                false_values=false_values,
+                skiprows=skiprows,
+                nrows=nrows,
+                na_values=na_values,
+                skip_blank_lines=False,  # GH 39808
+                parse_dates=parse_dates,
+                date_parser=date_parser,
+                date_format=date_format,
+                thousands=thousands,
+                decimal=decimal,
+                comment=comment,
+                skipfooter=skipfooter,
+                usecols=usecols,
+                dtype_backend=dtype_backend,
+                **kwds,
+            )
 
-                if header_names:
-                    output[asheetname].columns = output[asheetname].columns.set_names(
-                        header_names
-                    )
+            output[asheetname] = parser.read(nrows=nrows)
 
-            except EmptyDataError:
-                # No Data, return an empty DataFrame
-                output[asheetname] = DataFrame()
+            if header_names:
+                output[asheetname].columns = output[asheetname].columns.set_names(
+                    header_names
+                )
 
-            except Exception as err:
-                err.args = (f"{err.args[0]} (sheet: {asheetname})", *err.args[1:])
-                raise err
+        except EmptyDataError:
+            # No Data, return an empty DataFrame
+            output[asheetname] = DataFrame()
 
-        if last_sheetname is None:
-            raise ValueError("Sheet name is an empty list")
+        except Exception as err:
+            err.args = (f"{err.args[0]} (sheet: {asheetname})", *err.args[1:])
+            raise err
 
-        if ret_dict:
-            return output
-        else:
-            return output[last_sheetname]
+        return output
 
 
 @doc(storage_options=_shared_docs["storage_options"])

From 564d0d9e04970538951e307911f0af2c44414841 Mon Sep 17 00:00:00 2001
From: undermyumbrella1 <120079323+undermyumbrella1@users.noreply.github.com>
Date: Thu, 2 May 2024 05:05:26 +0800
Subject: [PATCH 098/100] BUG: as_index=False can return a MultiIndex in
 groupby.apply (#58369)

---
 doc/source/whatsnew/v3.0.0.rst                    | 1 +
 pandas/core/groupby/groupby.py                    | 5 +----
 pandas/tests/groupby/methods/test_value_counts.py | 9 +++------
 pandas/tests/groupby/test_apply.py                | 2 +-
 pandas/tests/groupby/test_apply_mutate.py         | 4 +---
 pandas/tests/groupby/test_groupby.py              | 9 ++++-----
 6 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index ce9022bdc2967..9e7349a061295 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -449,6 +449,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
 - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
+- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
 
 
 Reshaping
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 79d9f49a3b355..f44ef8c4dbbfa 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1202,10 +1202,7 @@ def _concat_objects(
                     sort=False,
                 )
             else:
-                # GH5610, returns a MI, with the first level being a
-                # range index
-                keys = RangeIndex(len(values))
-                result = concat(values, axis=0, keys=keys)
+                result = concat(values, axis=0)
 
         elif not not_indexed_same:
             result = concat(values, axis=0)
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index be52b4a591c26..0f136b06c782a 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -329,13 +329,10 @@ def test_against_frame_and_seriesgroupby(
         else:
             name = "proportion" if normalize else "count"
             expected = expected.reset_index().rename({0: name}, axis=1)
-            if groupby == "column":
-                expected = expected.rename({"level_0": "country"}, axis=1)
-                expected["country"] = np.where(expected["country"], "US", "FR")
-            elif groupby == "function":
-                expected["level_0"] = expected["level_0"] == 1
+            if groupby in ["array", "function"] and (not as_index and frame):
+                expected.insert(loc=0, column="level_0", value=result["level_0"])
             else:
-                expected["level_0"] = np.where(expected["level_0"], "US", "FR")
+                expected.insert(loc=0, column="country", value=result["country"])
             tm.assert_frame_equal(result, expected)
     else:
         # compare against SeriesGroupBy value_counts
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 1a2589fe94ea5..e27c782c1bdcf 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -315,7 +315,7 @@ def test_groupby_as_index_apply():
 
     # apply doesn't maintain the original ordering
     # changed in GH5610 as the as_index=False returns a MI here
-    exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)])
+    exp_not_as_apply = Index([0, 2, 1, 4])
     tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
     exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None])
 
diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py
index e5028884e992b..fa20efad4da77 100644
--- a/pandas/tests/groupby/test_apply_mutate.py
+++ b/pandas/tests/groupby/test_apply_mutate.py
@@ -90,9 +90,7 @@ def fn(x):
         result = df.groupby(["col1"], as_index=False).apply(fn)
     expected = pd.Series(
         [1, 2, 0, 4, 5, 0],
-        index=pd.MultiIndex.from_tuples(
-            [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (1, 5)]
-        ),
+        index=range(6),
         name="col2",
     )
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 54d7895691f3f..d50fea459552a 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -113,8 +113,9 @@ def f(x, q=None, axis=0):
         expected_seq = df_grouped.quantile([0.4, 0.8])
         if not as_index:
             # apply treats the op as a transform; .quantile knows it's a reduction
-            apply_result = apply_result.reset_index()
-            apply_result["level_0"] = [1, 1, 2, 2]
+            apply_result.index = range(4)
+            apply_result.insert(loc=0, column="level_0", value=[1, 1, 2, 2])
+            apply_result.insert(loc=1, column="level_1", value=[0.4, 0.8, 0.4, 0.8])
         tm.assert_frame_equal(apply_result, expected_seq, check_names=False)
 
         agg_result = df_grouped.agg(f, q=80)
@@ -519,9 +520,7 @@ def test_as_index_select_column():
     result = df.groupby("A", as_index=False, group_keys=True)["B"].apply(
         lambda x: x.cumsum()
     )
-    expected = Series(
-        [2, 6, 6], name="B", index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)])
-    )
+    expected = Series([2, 6, 6], name="B", index=range(3))
     tm.assert_series_equal(result, expected)
 
 

From 9250bf7829adeac62461dd7aed4d1b2cb790a35d Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 2 May 2024 04:07:09 +0530
Subject: [PATCH 099/100] DOC: Enforce Numpy Docstring Validation for
 pandas.DataFrame.sem (#58513)

* DOC: add PR01,RT03,SA01 in pandas.DataFrame.sem

* DOC: remove PR01,RT03,SA01 in pandas.DataFrame.sem
---
 ci/code_checks.sh    |  1 -
 pandas/core/frame.py | 71 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index cde9f9dd43280..43c80cf80d487 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -75,7 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.median RT03,SA01" \
         -i "pandas.DataFrame.min RT03" \
         -i "pandas.DataFrame.plot PR02,SA01" \
-        -i "pandas.DataFrame.sem PR01,RT03,SA01" \
         -i "pandas.DataFrame.std PR01,RT03,SA01" \
         -i "pandas.DataFrame.sum RT03" \
         -i "pandas.DataFrame.swaplevel SA01" \
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 88e4d695b8328..96943eb71c7bd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -11945,7 +11945,6 @@ def sem(
     ) -> Series | Any: ...
 
     @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem")
-    @doc(make_doc("sem", ndim=2))
     def sem(
         self,
         axis: Axis | None = 0,
@@ -11954,6 +11953,76 @@ def sem(
         numeric_only: bool = False,
         **kwargs,
     ) -> Series | Any:
+        """
+        Return unbiased standard error of the mean over requested axis.
+
+        Normalized by N-1 by default. This can be changed using the ddof argument
+
+        Parameters
+        ----------
+        axis : {index (0), columns (1)}
+            For `Series` this parameter is unused and defaults to 0.
+
+            .. warning::
+
+                The behavior of DataFrame.sem with ``axis=None`` is deprecated,
+                in a future version this will reduce over both axes and return a scalar
+                To retain the old behavior, pass axis=0 (or do not pass axis).
+
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+        ddof : int, default 1
+            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
+            where N represents the number of elements.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. Not implemented for Series.
+        **kwargs :
+            Additional keywords passed.
+
+        Returns
+        -------
+        Series or DataFrame (if level specified)
+            Unbiased standard error of the mean over requested axis.
+
+        See Also
+        --------
+        DataFrame.var : Return unbiased variance over requested axis.
+        DataFrame.std : Returns sample standard deviation over requested axis.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3])
+        >>> s.sem().round(6)
+        0.57735
+
+        With a DataFrame
+
+        >>> df = pd.DataFrame({"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"])
+        >>> df
+               a   b
+        tiger  1   2
+        zebra  2   3
+        >>> df.sem()
+        a   0.5
+        b   0.5
+        dtype: float64
+
+        Using axis=1
+
+        >>> df.sem(axis=1)
+        tiger   0.5
+        zebra   0.5
+        dtype: float64
+
+        In this case, `numeric_only` should be set to `True`
+        to avoid getting an error.
+
+        >>> df = pd.DataFrame({"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"])
+        >>> df.sem(numeric_only=True)
+        a   0.5
+        dtype: float64
+        """
         result = super().sem(
             axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs
         )

From 9110f7cdac46c69212512635a7fdc99963540c30 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 2 May 2024 04:07:33 +0530
Subject: [PATCH 100/100] DOC: Enforce Numpy Docstring Validation for
 pandas.Index.get_loc (#58509)

* DOC: add PR07,RT03,SA01 in pandas.Index.get_loc

* DOC: remove PR07,RT03,SA01 in pandas.Index.get_loc
---
 ci/code_checks.sh           |  1 -
 pandas/core/indexes/base.py | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 43c80cf80d487..996f361e9440f 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -82,7 +82,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.Grouper PR02" \
         -i "pandas.Index PR07" \
-        -i "pandas.Index.get_loc PR07,RT03,SA01" \
         -i "pandas.Index.join PR07,RT03,SA01" \
         -i "pandas.Index.names GL08" \
         -i "pandas.Index.ravel PR01,RT03" \
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 048362a28dfd7..e93db22906b39 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3490,10 +3490,22 @@ def get_loc(self, key):
         Parameters
         ----------
         key : label
+            The key to check its location if it is present in the index.
 
         Returns
         -------
         int if unique index, slice if monotonic index, else mask
+            Integer location, slice or boolean mask.
+
+        See Also
+        --------
+        Index.get_slice_bound : Calculate slice bound that corresponds to
+            given label.
+        Index.get_indexer : Computes indexer and mask for new index given
+            the current index.
+        Index.get_non_unique : Returns indexer and masks for new index given
+            the current index.
+        Index.get_indexer_for : Returns an indexer even when non-unique.
 
         Examples
         --------