From 93a67dbbb6f8adad0061eaa21a046ba44e297957 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Wed, 29 May 2019 11:17:24 +0100
Subject: [PATCH 01/51] CI: unary operator expected error in log files (#26547)

---
 ci/setup_env.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/setup_env.sh b/ci/setup_env.sh
index e2667558a63d7..8f73bb228e2bd 100755
--- a/ci/setup_env.sh
+++ b/ci/setup_env.sh
@@ -118,12 +118,12 @@ echo "conda list"
 conda list
 
 # Install DB for Linux
-if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
   echo "installing dbs"
   mysql -e 'create database pandas_nosetest;'
   psql -c 'create database pandas_nosetest;' -U postgres
 else
-   echo "not using dbs on non-linux"
+   echo "not using dbs on non-linux Travis builds or Azure Pipelines"
 fi
 
 echo "done"

From a91da0c94e541217865cdf52b9f6ea694f0493d3 Mon Sep 17 00:00:00 2001
From: Vaibhav Vishal <vaibhav.hrt@gmail.com>
Date: Wed, 29 May 2019 18:10:39 +0530
Subject: [PATCH 02/51] Fix type annotations in pandas.core.indexes.period
 (#26518)

---
 mypy.ini                      | 3 ---
 pandas/core/indexes/period.py | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/mypy.ini b/mypy.ini
index 3df8fd13a2a75..eea6a3b551677 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -7,6 +7,3 @@ ignore_errors=True
 
 [mypy-pandas.core.indexes.datetimelike]
 ignore_errors=True
-
-[mypy-pandas.core.indexes.period]
-ignore_errors=True
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 044951ceda502..64272431cf703 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -80,7 +80,7 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin):
 
     Parameters
     ----------
-    data : array-like (1-dimensional), optional
+    data : array-like (1d integer np.ndarray or PeriodArray), optional
         Optional period-like data to construct index with
     copy : bool
         Make a copy of input ndarray
@@ -168,7 +168,7 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin):
     _is_numeric_dtype = False
     _infer_as_myclass = True
 
-    _data = None  # type: PeriodArray
+    _data = None
 
     _engine_type = libindex.PeriodEngine
 

From 5488636266bcd78282c66d551b452ab38da17bd0 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Wed, 29 May 2019 18:02:00 -0700
Subject: [PATCH 03/51] Fixturize Test Excel (#26543)

---
 pandas/tests/io/test_excel.py | 614 +++++++++++++++-------------------
 1 file changed, 272 insertions(+), 342 deletions(-)

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 100de227aa97c..6db3d1d4ab34d 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -22,7 +22,7 @@
 from pandas.io.common import URLError
 from pandas.io.excel import (
     ExcelFile, ExcelWriter, _OpenpyxlWriter, _XlsxWriter, _XlwtWriter,
-    read_excel, register_writer)
+    register_writer)
 from pandas.io.formats.excel import ExcelFormatter
 from pandas.io.parsers import read_csv
 
@@ -53,7 +53,6 @@ class SharedItems:
 
     @pytest.fixture(autouse=True)
     def setup_method(self, datapath):
-        self.dirpath = datapath("io", "data")
         self.frame = _frame.copy()
         self.frame2 = _frame2.copy()
         self.tsframe = _tsframe.copy()
@@ -65,135 +64,85 @@ class ReadingTestsBase(SharedItems):
     # This is based on ExcelWriterBase
 
     @pytest.fixture(autouse=True, params=['xlrd', None])
-    def set_engine(self, request):
-        func_name = "get_exceldf"
-        old_func = getattr(self, func_name)
-        new_func = partial(old_func, engine=request.param)
-        setattr(self, func_name, new_func)
-        yield
-        setattr(self, func_name, old_func)
-
-    def get_csv_refdf(self, basename):
+    def cd_and_set_engine(self, request, datapath, monkeypatch):
         """
-        Obtain the reference data from read_csv with the Python engine.
-
-        Parameters
-        ----------
-
-        basename : str
-            File base name, excluding file extension.
-
-        Returns
-        -------
-
-        dfref : DataFrame
+        Change directory and set engine for read_excel calls.
         """
-        pref = os.path.join(self.dirpath, basename + '.csv')
-        dfref = read_csv(pref, index_col=0, parse_dates=True, engine='python')
-        return dfref
+        func = partial(pd.read_excel, engine=request.param)
+        monkeypatch.chdir(datapath("io", "data"))
+        monkeypatch.setattr(pd, 'read_excel', func)
 
-    def get_excelfile(self, basename, ext):
+    @pytest.fixture
+    def df_ref(self):
         """
-        Return test data ExcelFile instance.
-
-        Parameters
-        ----------
-
-        basename : str
-            File base name, excluding file extension.
-
-        Returns
-        -------
-
-        excel : io.excel.ExcelFile
-        """
-        return ExcelFile(os.path.join(self.dirpath, basename + ext))
-
-    def get_exceldf(self, basename, ext, *args, **kwds):
-        """
-        Return test data DataFrame.
-
-        Parameters
-        ----------
-
-        basename : str
-            File base name, excluding file extension.
-
-        Returns
-        -------
-
-        df : DataFrame
+        Obtain the reference data from read_csv with the Python engine.
         """
-        pth = os.path.join(self.dirpath, basename + ext)
-        return read_excel(pth, *args, **kwds)
+        df_ref = read_csv('test1.csv', index_col=0,
+                          parse_dates=True, engine='python')
+        return df_ref
 
     @td.skip_if_no("xlrd", "1.0.1")  # see gh-22682
-    def test_usecols_int(self, ext):
-
-        df_ref = self.get_csv_refdf("test1")
+    def test_usecols_int(self, ext, df_ref):
         df_ref = df_ref.reindex(columns=["A", "B", "C"])
 
         # usecols as int
         with tm.assert_produces_warning(FutureWarning,
                                         check_stacklevel=False):
             with ignore_xlrd_time_clock_warning():
-                df1 = self.get_exceldf("test1", ext, "Sheet1",
-                                       index_col=0, usecols=3)
+                df1 = pd.read_excel("test1" + ext, "Sheet1",
+                                    index_col=0, usecols=3)
 
         # usecols as int
         with tm.assert_produces_warning(FutureWarning,
                                         check_stacklevel=False):
             with ignore_xlrd_time_clock_warning():
-                df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1],
-                                       index_col=0, usecols=3)
+                df2 = pd.read_excel("test1" + ext, "Sheet2", skiprows=[1],
+                                    index_col=0, usecols=3)
 
         # TODO add index to xls file)
         tm.assert_frame_equal(df1, df_ref, check_names=False)
         tm.assert_frame_equal(df2, df_ref, check_names=False)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
-    def test_usecols_list(self, ext):
+    def test_usecols_list(self, ext, df_ref):
 
-        dfref = self.get_csv_refdf('test1')
-        dfref = dfref.reindex(columns=['B', 'C'])
-        df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
-                               usecols=[0, 2, 3])
-        df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
-                               index_col=0, usecols=[0, 2, 3])
+        df_ref = df_ref.reindex(columns=['B', 'C'])
+        df1 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0,
+                            usecols=[0, 2, 3])
+        df2 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1],
+                            index_col=0, usecols=[0, 2, 3])
 
         # TODO add index to xls file)
-        tm.assert_frame_equal(df1, dfref, check_names=False)
-        tm.assert_frame_equal(df2, dfref, check_names=False)
+        tm.assert_frame_equal(df1, df_ref, check_names=False)
+        tm.assert_frame_equal(df2, df_ref, check_names=False)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
-    def test_usecols_str(self, ext):
+    def test_usecols_str(self, ext, df_ref):
 
-        dfref = self.get_csv_refdf('test1')
-
-        df1 = dfref.reindex(columns=['A', 'B', 'C'])
-        df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
-                               usecols='A:D')
-        df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
-                               index_col=0, usecols='A:D')
+        df1 = df_ref.reindex(columns=['A', 'B', 'C'])
+        df2 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0,
+                            usecols='A:D')
+        df3 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1],
+                            index_col=0, usecols='A:D')
 
         # TODO add index to xls, read xls ignores index name ?
         tm.assert_frame_equal(df2, df1, check_names=False)
         tm.assert_frame_equal(df3, df1, check_names=False)
 
-        df1 = dfref.reindex(columns=['B', 'C'])
-        df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
-                               usecols='A,C,D')
-        df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
-                               index_col=0, usecols='A,C,D')
+        df1 = df_ref.reindex(columns=['B', 'C'])
+        df2 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0,
+                            usecols='A,C,D')
+        df3 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1],
+                            index_col=0, usecols='A,C,D')
         # TODO add index to xls file
         tm.assert_frame_equal(df2, df1, check_names=False)
         tm.assert_frame_equal(df3, df1, check_names=False)
 
-        df1 = dfref.reindex(columns=['B', 'C'])
-        df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
-                               usecols='A,C:D')
-        df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
-                               index_col=0, usecols='A,C:D')
+        df1 = df_ref.reindex(columns=['B', 'C'])
+        df2 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0,
+                            usecols='A,C:D')
+        df3 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1],
+                            index_col=0, usecols='A,C:D')
         tm.assert_frame_equal(df2, df1, check_names=False)
         tm.assert_frame_equal(df3, df1, check_names=False)
 
@@ -202,50 +151,52 @@ def test_usecols_str(self, ext):
         [1, 0, 3], [1, 3, 0],
         [3, 0, 1], [3, 1, 0],
     ])
-    def test_usecols_diff_positional_int_columns_order(self, ext, usecols):
-        expected = self.get_csv_refdf("test1")[["A", "C"]]
-        result = self.get_exceldf("test1", ext, "Sheet1",
-                                  index_col=0, usecols=usecols)
+    def test_usecols_diff_positional_int_columns_order(
+            self, ext, usecols, df_ref):
+        expected = df_ref[["A", "C"]]
+        result = pd.read_excel("test1" + ext, "Sheet1",
+                               index_col=0, usecols=usecols)
         tm.assert_frame_equal(result, expected, check_names=False)
 
     @pytest.mark.parametrize("usecols", [
         ["B", "D"], ["D", "B"]
     ])
-    def test_usecols_diff_positional_str_columns_order(self, ext, usecols):
-        expected = self.get_csv_refdf("test1")[["B", "D"]]
+    def test_usecols_diff_positional_str_columns_order(
+            self, ext, usecols, df_ref):
+        expected = df_ref[["B", "D"]]
         expected.index = range(len(expected))
 
-        result = self.get_exceldf("test1", ext, "Sheet1", usecols=usecols)
+        result = pd.read_excel("test1" + ext, "Sheet1", usecols=usecols)
         tm.assert_frame_equal(result, expected, check_names=False)
 
-    def test_read_excel_without_slicing(self, ext):
-        expected = self.get_csv_refdf("test1")
-        result = self.get_exceldf("test1", ext, "Sheet1", index_col=0)
+    def test_read_excel_without_slicing(self, ext, df_ref):
+        expected = df_ref
+        result = pd.read_excel("test1" + ext, "Sheet1", index_col=0)
         tm.assert_frame_equal(result, expected, check_names=False)
 
-    def test_usecols_excel_range_str(self, ext):
-        expected = self.get_csv_refdf("test1")[["C", "D"]]
-        result = self.get_exceldf("test1", ext, "Sheet1",
-                                  index_col=0, usecols="A,D:E")
+    def test_usecols_excel_range_str(self, ext, df_ref):
+        expected = df_ref[["C", "D"]]
+        result = pd.read_excel("test1" + ext, "Sheet1",
+                               index_col=0, usecols="A,D:E")
         tm.assert_frame_equal(result, expected, check_names=False)
 
     def test_usecols_excel_range_str_invalid(self, ext):
         msg = "Invalid column name: E1"
 
         with pytest.raises(ValueError, match=msg):
-            self.get_exceldf("test1", ext, "Sheet1", usecols="D:E1")
+            pd.read_excel("test1" + ext, "Sheet1", usecols="D:E1")
 
     def test_index_col_label_error(self, ext):
         msg = "list indices must be integers.*, not str"
 
         with pytest.raises(TypeError, match=msg):
-            self.get_exceldf("test1", ext, "Sheet1", index_col=["A"],
-                             usecols=["A", "C"])
+            pd.read_excel("test1" + ext, "Sheet1", index_col=["A"],
+                          usecols=["A", "C"])
 
     def test_index_col_empty(self, ext):
         # see gh-9208
-        result = self.get_exceldf("test1", ext, "Sheet3",
-                                  index_col=["A", "B", "C"])
+        result = pd.read_excel("test1" + ext, "Sheet3",
+                               index_col=["A", "B", "C"])
         expected = DataFrame(columns=["D", "E", "F"],
                              index=MultiIndex(levels=[[]] * 3,
                                               codes=[[]] * 3,
@@ -255,8 +206,7 @@ def test_index_col_empty(self, ext):
     @pytest.mark.parametrize("index_col", [None, 2])
     def test_index_col_with_unnamed(self, ext, index_col):
         # see gh-18792
-        result = self.get_exceldf("test1", ext, "Sheet4",
-                                  index_col=index_col)
+        result = pd.read_excel("test1" + ext, "Sheet4", index_col=index_col)
         expected = DataFrame([["i1", "a", "x"], ["i2", "b", "y"]],
                              columns=["Unnamed: 0", "col1", "col2"])
         if index_col:
@@ -269,54 +219,54 @@ def test_usecols_pass_non_existent_column(self, ext):
                "columns expected but not found: " + r"\['E'\]")
 
         with pytest.raises(ValueError, match=msg):
-            self.get_exceldf("test1", ext, usecols=["E"])
+            pd.read_excel("test1" + ext, usecols=["E"])
 
     def test_usecols_wrong_type(self, ext):
         msg = ("'usecols' must either be list-like of "
                "all strings, all unicode, all integers or a callable.")
 
         with pytest.raises(ValueError, match=msg):
-            self.get_exceldf("test1", ext, usecols=["E1", 0])
+            pd.read_excel("test1" + ext, usecols=["E1", 0])
 
     def test_excel_stop_iterator(self, ext):
 
-        parsed = self.get_exceldf('test2', ext, 'Sheet1')
+        parsed = pd.read_excel('test2' + ext, 'Sheet1')
         expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
         tm.assert_frame_equal(parsed, expected)
 
     def test_excel_cell_error_na(self, ext):
 
-        parsed = self.get_exceldf('test3', ext, 'Sheet1')
+        parsed = pd.read_excel('test3' + ext, 'Sheet1')
         expected = DataFrame([[np.nan]], columns=['Test'])
         tm.assert_frame_equal(parsed, expected)
 
     def test_excel_passes_na(self, ext):
 
-        excel = self.get_excelfile('test4', ext)
+        excel = ExcelFile('test4' + ext)
 
-        parsed = read_excel(excel, 'Sheet1', keep_default_na=False,
-                            na_values=['apple'])
+        parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
+                               na_values=['apple'])
         expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
                              columns=['Test'])
         tm.assert_frame_equal(parsed, expected)
 
-        parsed = read_excel(excel, 'Sheet1', keep_default_na=True,
-                            na_values=['apple'])
+        parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
+                               na_values=['apple'])
         expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                              columns=['Test'])
         tm.assert_frame_equal(parsed, expected)
 
         # 13967
-        excel = self.get_excelfile('test5', ext)
+        excel = ExcelFile('test5' + ext)
 
-        parsed = read_excel(excel, 'Sheet1', keep_default_na=False,
-                            na_values=['apple'])
+        parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False,
+                               na_values=['apple'])
         expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']],
                              columns=['Test'])
         tm.assert_frame_equal(parsed, expected)
 
-        parsed = read_excel(excel, 'Sheet1', keep_default_na=True,
-                            na_values=['apple'])
+        parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True,
+                               na_values=['apple'])
         expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                              columns=['Test'])
         tm.assert_frame_equal(parsed, expected)
@@ -325,34 +275,33 @@ def test_excel_passes_na(self, ext):
     @pytest.mark.parametrize('arg', ['sheet', 'sheetname', 'parse_cols'])
     def test_unexpected_kwargs_raises(self, ext, arg):
         # gh-17964
-        excel = self.get_excelfile('test1', ext)
+        excel = ExcelFile('test1' + ext)
 
         kwarg = {arg: 'Sheet1'}
         msg = "unexpected keyword argument `{}`".format(arg)
         with pytest.raises(TypeError, match=msg):
-            read_excel(excel, **kwarg)
+            pd.read_excel(excel, **kwarg)
 
     @td.skip_if_no('xlrd', '1.0.1')  # GH-22682
-    def test_excel_table_sheet_by_index(self, ext):
+    def test_excel_table_sheet_by_index(self, ext, df_ref):
 
-        excel = self.get_excelfile('test1', ext)
-        dfref = self.get_csv_refdf('test1')
+        excel = ExcelFile('test1' + ext)
 
-        df1 = read_excel(excel, 0, index_col=0)
-        df2 = read_excel(excel, 1, skiprows=[1], index_col=0)
-        tm.assert_frame_equal(df1, dfref, check_names=False)
-        tm.assert_frame_equal(df2, dfref, check_names=False)
+        df1 = pd.read_excel(excel, 0, index_col=0)
+        df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0)
+        tm.assert_frame_equal(df1, df_ref, check_names=False)
+        tm.assert_frame_equal(df2, df_ref, check_names=False)
 
         df1 = excel.parse(0, index_col=0)
         df2 = excel.parse(1, skiprows=[1], index_col=0)
-        tm.assert_frame_equal(df1, dfref, check_names=False)
-        tm.assert_frame_equal(df2, dfref, check_names=False)
+        tm.assert_frame_equal(df1, df_ref, check_names=False)
+        tm.assert_frame_equal(df2, df_ref, check_names=False)
 
-        df3 = read_excel(excel, 0, index_col=0, skipfooter=1)
+        df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1)
         tm.assert_frame_equal(df3, df1.iloc[:-1])
 
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            df4 = read_excel(excel, 0, index_col=0, skip_footer=1)
+            df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1)
             tm.assert_frame_equal(df3, df4)
 
         df3 = excel.parse(0, index_col=0, skipfooter=1)
@@ -360,21 +309,18 @@ def test_excel_table_sheet_by_index(self, ext):
 
         import xlrd
         with pytest.raises(xlrd.XLRDError):
-            read_excel(excel, 'asdf')
-
-    def test_excel_table(self, ext):
+            pd.read_excel(excel, 'asdf')
 
-        dfref = self.get_csv_refdf('test1')
+    def test_excel_table(self, ext, df_ref):
 
-        df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0)
-        df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
-                               index_col=0)
+        df1 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0)
+        df2 = pd.read_excel('test1' + ext, 'Sheet2', skiprows=[1],
+                            index_col=0)
         # TODO add index to file
-        tm.assert_frame_equal(df1, dfref, check_names=False)
-        tm.assert_frame_equal(df2, dfref, check_names=False)
+        tm.assert_frame_equal(df1, df_ref, check_names=False)
+        tm.assert_frame_equal(df2, df_ref, check_names=False)
 
-        df3 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
-                               skipfooter=1)
+        df3 = pd.read_excel('test1' + ext, 'Sheet1', index_col=0, skipfooter=1)
         tm.assert_frame_equal(df3, df1.iloc[:-1])
 
     def test_reader_special_dtypes(self, ext):
@@ -393,32 +339,32 @@ def test_reader_special_dtypes(self, ext):
         basename = 'test_types'
 
         # should read in correctly and infer types
-        actual = self.get_exceldf(basename, ext, 'Sheet1')
+        actual = pd.read_excel(basename + ext, 'Sheet1')
         tm.assert_frame_equal(actual, expected)
 
         # if not coercing number, then int comes in as float
         float_expected = expected.copy()
         float_expected["IntCol"] = float_expected["IntCol"].astype(float)
         float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0
-        actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False)
+        actual = pd.read_excel(basename + ext, 'Sheet1', convert_float=False)
         tm.assert_frame_equal(actual, float_expected)
 
         # check setting Index (assuming xls and xlsx are the same here)
         for icol, name in enumerate(expected.columns):
-            actual = self.get_exceldf(basename, ext, 'Sheet1', index_col=icol)
+            actual = pd.read_excel(basename + ext, 'Sheet1', index_col=icol)
             exp = expected.set_index(name)
             tm.assert_frame_equal(actual, exp)
 
         # convert_float and converters should be different but both accepted
         expected["StrCol"] = expected["StrCol"].apply(str)
-        actual = self.get_exceldf(
-            basename, ext, 'Sheet1', converters={"StrCol": str})
+        actual = pd.read_excel(basename + ext, 'Sheet1',
+                               converters={"StrCol": str})
         tm.assert_frame_equal(actual, expected)
 
         no_convert_float = float_expected.copy()
         no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str)
-        actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False,
-                                  converters={"StrCol": str})
+        actual = pd.read_excel(basename + ext, 'Sheet1', convert_float=False,
+                               converters={"StrCol": str})
         tm.assert_frame_equal(actual, no_convert_float)
 
     # GH8212 - support for converters and missing values
@@ -441,14 +387,13 @@ def test_reader_converters(self, ext):
 
         # should read in correctly and set types of single cells (not array
         # dtypes)
-        actual = self.get_exceldf(basename, ext, 'Sheet1',
-                                  converters=converters)
+        actual = pd.read_excel(basename + ext, 'Sheet1', converters=converters)
         tm.assert_frame_equal(actual, expected)
 
     def test_reader_dtype(self, ext):
         # GH 8212
         basename = 'testdtype'
-        actual = self.get_exceldf(basename, ext)
+        actual = pd.read_excel(basename + ext)
 
         expected = DataFrame({
             'a': [1, 2, 3, 4],
@@ -459,10 +404,10 @@ def test_reader_dtype(self, ext):
 
         tm.assert_frame_equal(actual, expected)
 
-        actual = self.get_exceldf(basename, ext,
-                                  dtype={'a': 'float64',
-                                         'b': 'float32',
-                                         'c': str})
+        actual = pd.read_excel(basename + ext,
+                               dtype={'a': 'float64',
+                                      'b': 'float32',
+                                      'c': str})
 
         expected['a'] = expected['a'].astype('float64')
         expected['b'] = expected['b'].astype('float32')
@@ -470,7 +415,7 @@ def test_reader_dtype(self, ext):
         tm.assert_frame_equal(actual, expected)
 
         with pytest.raises(ValueError):
-            self.get_exceldf(basename, ext, dtype={'d': 'int64'})
+            pd.read_excel(basename + ext, dtype={'d': 'int64'})
 
     @pytest.mark.parametrize("dtype,expected", [
         (None,
@@ -496,7 +441,7 @@ def test_reader_dtype_str(self, ext, dtype, expected):
         # see gh-20377
         basename = "testdtype"
 
-        actual = self.get_exceldf(basename, ext, dtype=dtype)
+        actual = pd.read_excel(basename + ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
     def test_reading_all_sheets(self, ext):
@@ -504,7 +449,7 @@ def test_reading_all_sheets(self, ext):
         # Ensure a dict is returned.
         # See PR #9450
         basename = 'test_multisheet'
-        dfs = self.get_exceldf(basename, ext, sheet_name=None)
+        dfs = pd.read_excel(basename + ext, sheet_name=None)
         # ensure this is not alphabetical to test order preservation
         expected_keys = ['Charlie', 'Alpha', 'Beta']
         tm.assert_contains_all(expected_keys, dfs.keys())
@@ -521,7 +466,7 @@ def test_reading_multiple_specific_sheets(self, ext):
         basename = 'test_multisheet'
         # Explicitly request duplicates. Only the set should be returned.
         expected_keys = [2, 'Charlie', 'Charlie']
-        dfs = self.get_exceldf(basename, ext, sheet_name=expected_keys)
+        dfs = pd.read_excel(basename + ext, sheet_name=expected_keys)
         expected_keys = list(set(expected_keys))
         tm.assert_contains_all(expected_keys, dfs.keys())
         assert len(expected_keys) == len(dfs.keys())
@@ -531,18 +476,18 @@ def test_reading_all_sheets_with_blank(self, ext):
         # In the case where some sheets are blank.
         # Issue #11711
         basename = 'blank_with_header'
-        dfs = self.get_exceldf(basename, ext, sheet_name=None)
+        dfs = pd.read_excel(basename + ext, sheet_name=None)
         expected_keys = ['Sheet1', 'Sheet2', 'Sheet3']
         tm.assert_contains_all(expected_keys, dfs.keys())
 
     # GH6403
     def test_read_excel_blank(self, ext):
-        actual = self.get_exceldf('blank', ext, 'Sheet1')
+        actual = pd.read_excel('blank' + ext, 'Sheet1')
         tm.assert_frame_equal(actual, DataFrame())
 
     def test_read_excel_blank_with_header(self, ext):
         expected = DataFrame(columns=['col_1', 'col_2'])
-        actual = self.get_exceldf('blank_with_header', ext, 'Sheet1')
+        actual = pd.read_excel('blank_with_header' + ext, 'Sheet1')
         tm.assert_frame_equal(actual, expected)
 
     def test_date_conversion_overflow(self, ext):
@@ -552,11 +497,11 @@ def test_date_conversion_overflow(self, ext):
                                  [1e+20, 'Timothy Brown']],
                                 columns=['DateColWithBigInt', 'StringCol'])
 
-        result = self.get_exceldf('testdateoverflow', ext)
+        result = pd.read_excel('testdateoverflow' + ext)
         tm.assert_frame_equal(result, expected)
 
     @td.skip_if_no("xlrd", "1.0.1")  # see gh-22682
-    def test_sheet_name_and_sheetname(self, ext):
+    def test_sheet_name_and_sheetname(self, ext, df_ref):
         # gh-10559: Minor improvement: Change "sheet_name" to "sheetname"
         # gh-10969: DOC: Consistent var names (sheetname vs sheet_name)
         # gh-12604: CLN GH10559 Rename sheetname variable to sheet_name
@@ -565,14 +510,13 @@ def test_sheet_name_and_sheetname(self, ext):
         filename = "test1"
         sheet_name = "Sheet1"
 
-        df_ref = self.get_csv_refdf(filename)
-        df1 = self.get_exceldf(filename, ext,
-                               sheet_name=sheet_name, index_col=0)  # doc
+        df1 = pd.read_excel(filename + ext,
+                            sheet_name=sheet_name, index_col=0)  # doc
         with ignore_xlrd_time_clock_warning():
-            df2 = self.get_exceldf(filename, ext, index_col=0,
-                                   sheet_name=sheet_name)
+            df2 = pd.read_excel(filename + ext, index_col=0,
+                                sheet_name=sheet_name)
 
-        excel = self.get_excelfile(filename, ext)
+        excel = ExcelFile(filename + ext)
         df1_parse = excel.parse(sheet_name=sheet_name, index_col=0)  # doc
         df2_parse = excel.parse(index_col=0,
                                 sheet_name=sheet_name)
@@ -584,55 +528,53 @@ def test_sheet_name_and_sheetname(self, ext):
 
     def test_excel_read_buffer(self, ext):
 
-        pth = os.path.join(self.dirpath, 'test1' + ext)
-        expected = read_excel(pth, 'Sheet1', index_col=0)
+        pth = 'test1' + ext
+        expected = pd.read_excel(pth, 'Sheet1', index_col=0)
         with open(pth, 'rb') as f:
-            actual = read_excel(f, 'Sheet1', index_col=0)
+            actual = pd.read_excel(f, 'Sheet1', index_col=0)
             tm.assert_frame_equal(expected, actual)
 
         with open(pth, 'rb') as f:
             xls = ExcelFile(f)
-            actual = read_excel(xls, 'Sheet1', index_col=0)
+            actual = pd.read_excel(xls, 'Sheet1', index_col=0)
             tm.assert_frame_equal(expected, actual)
 
     def test_bad_engine_raises(self, ext):
         bad_engine = 'foo'
         with pytest.raises(ValueError, match="Unknown engine: foo"):
-            read_excel('', engine=bad_engine)
+            pd.read_excel('', engine=bad_engine)
 
     @tm.network
     def test_read_from_http_url(self, ext):
         url = ('https://raw.github.com/pandas-dev/pandas/master/'
                'pandas/tests/io/data/test1' + ext)
-        url_table = read_excel(url)
-        local_table = self.get_exceldf('test1', ext)
+        url_table = pd.read_excel(url)
+        local_table = pd.read_excel('test1' + ext)
         tm.assert_frame_equal(url_table, local_table)
 
     @td.skip_if_not_us_locale
     def test_read_from_s3_url(self, ext, s3_resource):
         # Bucket "pandas-test" created in tests/io/conftest.py
-        file_name = os.path.join(self.dirpath, 'test1' + ext)
-
-        with open(file_name, "rb") as f:
+        with open('test1' + ext, "rb") as f:
             s3_resource.Bucket("pandas-test").put_object(Key="test1" + ext,
                                                          Body=f)
 
         url = ('s3://pandas-test/test1' + ext)
-        url_table = read_excel(url)
-        local_table = self.get_exceldf('test1', ext)
+        url_table = pd.read_excel(url)
+        local_table = pd.read_excel('test1' + ext)
         tm.assert_frame_equal(url_table, local_table)
 
     @pytest.mark.slow
     # ignore warning from old xlrd
     @pytest.mark.filterwarnings("ignore:This metho:PendingDeprecationWarning")
-    def test_read_from_file_url(self, ext):
+    def test_read_from_file_url(self, ext, datapath):
 
         # FILE
-        localtable = os.path.join(self.dirpath, 'test1' + ext)
-        local_table = read_excel(localtable)
+        localtable = os.path.join(datapath("io", "data"), 'test1' + ext)
+        local_table = pd.read_excel(localtable)
 
         try:
-            url_table = read_excel('file://localhost/' + localtable)
+            url_table = pd.read_excel('file://localhost/' + localtable)
         except URLError:
             # fails on some systems
             import platform
@@ -646,11 +588,11 @@ def test_read_from_pathlib_path(self, ext):
         # GH12655
         from pathlib import Path
 
-        str_path = os.path.join(self.dirpath, 'test1' + ext)
-        expected = read_excel(str_path, 'Sheet1', index_col=0)
+        str_path = 'test1' + ext
+        expected = pd.read_excel(str_path, 'Sheet1', index_col=0)
 
-        path_obj = Path(self.dirpath, 'test1' + ext)
-        actual = read_excel(path_obj, 'Sheet1', index_col=0)
+        path_obj = Path('test1' + ext)
+        actual = pd.read_excel(path_obj, 'Sheet1', index_col=0)
 
         tm.assert_frame_equal(expected, actual)
 
@@ -660,22 +602,20 @@ def test_read_from_py_localpath(self, ext):
         # GH12655
         from py.path import local as LocalPath
 
-        str_path = os.path.join(self.dirpath, 'test1' + ext)
-        expected = read_excel(str_path, 'Sheet1', index_col=0)
+        str_path = os.path.join('test1' + ext)
+        expected = pd.read_excel(str_path, 'Sheet1', index_col=0)
 
-        abs_dir = os.path.abspath(self.dirpath)
-        path_obj = LocalPath(abs_dir).join('test1' + ext)
-        actual = read_excel(path_obj, 'Sheet1', index_col=0)
+        path_obj = LocalPath().join('test1' + ext)
+        actual = pd.read_excel(path_obj, 'Sheet1', index_col=0)
 
         tm.assert_frame_equal(expected, actual)
 
     def test_reader_closes_file(self, ext):
 
-        pth = os.path.join(self.dirpath, 'test1' + ext)
-        f = open(pth, 'rb')
+        f = open('test1' + ext, 'rb')
         with ExcelFile(f) as xlsx:
             # parses okay
-            read_excel(xlsx, 'Sheet1', index_col=0)
+            pd.read_excel(xlsx, 'Sheet1', index_col=0)
 
         assert f.closed
 
@@ -694,16 +634,16 @@ def test_reader_seconds(self, ext):
                                                  time(16, 37, 0, 900000),
                                                  time(18, 20, 54)]})
 
-        actual = self.get_exceldf('times_1900', ext, 'Sheet1')
+        actual = pd.read_excel('times_1900' + ext, 'Sheet1')
         tm.assert_frame_equal(actual, expected)
 
-        actual = self.get_exceldf('times_1904', ext, 'Sheet1')
+        actual = pd.read_excel('times_1904' + ext, 'Sheet1')
         tm.assert_frame_equal(actual, expected)
 
     def test_read_excel_multiindex(self, ext):
         # see gh-4679
         mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]])
-        mi_file = os.path.join(self.dirpath, "testmultiindex" + ext)
+        mi_file = "testmultiindex" + ext
 
         # "mi_column" sheet
         expected = DataFrame([[1, 2.5, pd.Timestamp("2015-01-01"), True],
@@ -712,34 +652,37 @@ def test_read_excel_multiindex(self, ext):
                               [4, 5.5, pd.Timestamp("2015-01-04"), True]],
                              columns=mi)
 
-        actual = read_excel(mi_file, "mi_column", header=[0, 1], index_col=0)
+        actual = pd.read_excel(
+            mi_file, "mi_column", header=[0, 1], index_col=0)
         tm.assert_frame_equal(actual, expected)
 
         # "mi_index" sheet
         expected.index = mi
         expected.columns = ["a", "b", "c", "d"]
 
-        actual = read_excel(mi_file, "mi_index", index_col=[0, 1])
+        actual = pd.read_excel(mi_file, "mi_index", index_col=[0, 1])
         tm.assert_frame_equal(actual, expected, check_names=False)
 
         # "both" sheet
         expected.columns = mi
 
-        actual = read_excel(mi_file, "both", index_col=[0, 1], header=[0, 1])
+        actual = pd.read_excel(
+            mi_file, "both", index_col=[0, 1], header=[0, 1])
         tm.assert_frame_equal(actual, expected, check_names=False)
 
         # "mi_index_name" sheet
         expected.columns = ["a", "b", "c", "d"]
         expected.index = mi.set_names(["ilvl1", "ilvl2"])
 
-        actual = read_excel(mi_file, "mi_index_name", index_col=[0, 1])
+        actual = pd.read_excel(
+            mi_file, "mi_index_name", index_col=[0, 1])
         tm.assert_frame_equal(actual, expected)
 
         # "mi_column_name" sheet
         expected.index = list(range(4))
         expected.columns = mi.set_names(["c1", "c2"])
-        actual = read_excel(mi_file, "mi_column_name",
-                            header=[0, 1], index_col=0)
+        actual = pd.read_excel(mi_file, "mi_column_name",
+                               header=[0, 1], index_col=0)
         tm.assert_frame_equal(actual, expected)
 
         # see gh-11317
@@ -747,29 +690,29 @@ def test_read_excel_multiindex(self, ext):
         expected.columns = mi.set_levels(
             [1, 2], level=1).set_names(["c1", "c2"])
 
-        actual = read_excel(mi_file, "name_with_int",
-                            index_col=0, header=[0, 1])
+        actual = pd.read_excel(mi_file, "name_with_int",
+                               index_col=0, header=[0, 1])
         tm.assert_frame_equal(actual, expected)
 
         # "both_name" sheet
         expected.columns = mi.set_names(["c1", "c2"])
         expected.index = mi.set_names(["ilvl1", "ilvl2"])
 
-        actual = read_excel(mi_file, "both_name",
-                            index_col=[0, 1], header=[0, 1])
+        actual = pd.read_excel(mi_file, "both_name",
+                               index_col=[0, 1], header=[0, 1])
         tm.assert_frame_equal(actual, expected)
 
         # "both_skiprows" sheet
-        actual = read_excel(mi_file, "both_name_skiprows", index_col=[0, 1],
-                            header=[0, 1], skiprows=2)
+        actual = pd.read_excel(mi_file, "both_name_skiprows", index_col=[0, 1],
+                               header=[0, 1], skiprows=2)
         tm.assert_frame_equal(actual, expected)
 
     def test_read_excel_multiindex_header_only(self, ext):
         # see gh-11733.
         #
         # Don't try to parse a header name if there isn't one.
-        mi_file = os.path.join(self.dirpath, "testmultiindex" + ext)
-        result = read_excel(mi_file, "index_col_none", header=[0, 1])
+        mi_file = "testmultiindex" + ext
+        result = pd.read_excel(mi_file, "index_col_none", header=[0, 1])
 
         exp_columns = MultiIndex.from_product([("A", "B"), ("key", "val")])
         expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns)
@@ -778,7 +721,6 @@ def test_read_excel_multiindex_header_only(self, ext):
     def test_excel_old_index_format(self, ext):
         # see gh-4679
         filename = "test_index_name_pre17" + ext
-        in_file = os.path.join(self.dirpath, filename)
 
         # We detect headers to determine if index names exist, so
         # that "index" name in the "names" version of the data will
@@ -801,12 +743,12 @@ def test_excel_old_index_format(self, ext):
 
         expected = pd.DataFrame(data, index=si, columns=columns)
 
-        actual = pd.read_excel(in_file, "single_names", index_col=0)
+        actual = pd.read_excel(filename, "single_names", index_col=0)
         tm.assert_frame_equal(actual, expected)
 
         expected.index = mi
 
-        actual = pd.read_excel(in_file, "multi_names", index_col=[0, 1])
+        actual = pd.read_excel(filename, "multi_names", index_col=[0, 1])
         tm.assert_frame_equal(actual, expected)
 
         # The analogous versions of the "names" version data
@@ -828,31 +770,28 @@ def test_excel_old_index_format(self, ext):
 
         expected = pd.DataFrame(data, index=si, columns=columns)
 
-        actual = pd.read_excel(in_file, "single_no_names", index_col=0)
+        actual = pd.read_excel(filename, "single_no_names", index_col=0)
         tm.assert_frame_equal(actual, expected)
 
         expected.index = mi
 
-        actual = pd.read_excel(in_file, "multi_no_names", index_col=[0, 1])
+        actual = pd.read_excel(filename, "multi_no_names", index_col=[0, 1])
         tm.assert_frame_equal(actual, expected, check_names=False)
 
     def test_read_excel_bool_header_arg(self, ext):
         # GH 6114
         for arg in [True, False]:
             with pytest.raises(TypeError):
-                pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
-                              header=arg)
+                pd.read_excel('test1' + ext, header=arg)
 
     def test_read_excel_chunksize(self, ext):
         # GH 8011
         with pytest.raises(NotImplementedError):
-            pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
-                          chunksize=100)
+            pd.read_excel('test1' + ext, chunksize=100)
 
     def test_read_excel_skiprows_list(self, ext):
         # GH 4903
-        actual = pd.read_excel(os.path.join(self.dirpath,
-                                            'testskiprows' + ext),
+        actual = pd.read_excel('testskiprows' + ext,
                                'skiprows_list', skiprows=[0, 2])
         expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True],
                               [2, 3.5, pd.Timestamp('2015-01-02'), False],
@@ -861,41 +800,35 @@ def test_read_excel_skiprows_list(self, ext):
                              columns=['a', 'b', 'c', 'd'])
         tm.assert_frame_equal(actual, expected)
 
-        actual = pd.read_excel(os.path.join(self.dirpath,
-                                            'testskiprows' + ext),
+        actual = pd.read_excel('testskiprows' + ext,
                                'skiprows_list', skiprows=np.array([0, 2]))
         tm.assert_frame_equal(actual, expected)
 
     def test_read_excel_nrows(self, ext):
         # GH 16645
         num_rows_to_pull = 5
-        actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
-                               nrows=num_rows_to_pull)
-        expected = pd.read_excel(os.path.join(self.dirpath,
-                                              'test1' + ext))
+        actual = pd.read_excel('test1' + ext, nrows=num_rows_to_pull)
+        expected = pd.read_excel('test1' + ext)
         expected = expected[:num_rows_to_pull]
         tm.assert_frame_equal(actual, expected)
 
     def test_read_excel_nrows_greater_than_nrows_in_file(self, ext):
         # GH 16645
-        expected = pd.read_excel(os.path.join(self.dirpath,
-                                              'test1' + ext))
+        expected = pd.read_excel('test1' + ext)
         num_records_in_file = len(expected)
         num_rows_to_pull = num_records_in_file + 10
-        actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
-                               nrows=num_rows_to_pull)
+        actual = pd.read_excel('test1' + ext, nrows=num_rows_to_pull)
         tm.assert_frame_equal(actual, expected)
 
     def test_read_excel_nrows_non_integer_parameter(self, ext):
         # GH 16645
         msg = "'nrows' must be an integer >=0"
         with pytest.raises(ValueError, match=msg):
-            pd.read_excel(os.path.join(self.dirpath, 'test1' + ext),
-                          nrows='5')
+            pd.read_excel('test1' + ext, nrows='5')
 
     def test_read_excel_squeeze(self, ext):
         # GH 12157
-        f = os.path.join(self.dirpath, 'test_squeeze' + ext)
+        f = 'test_squeeze' + ext
 
         actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True)
         expected = pd.Series([2, 3, 4], [4, 5, 6], name='b')
@@ -934,7 +867,7 @@ def test_read_one_empty_col_no_header(self, ext, header, expected):
 
         with ensure_clean(ext) as path:
             df.to_excel(path, filename, index=False, header=False)
-            result = read_excel(path, filename, usecols=[0], header=header)
+            result = pd.read_excel(path, filename, usecols=[0], header=header)
 
         tm.assert_frame_equal(result, expected)
 
@@ -955,7 +888,7 @@ def test_read_one_empty_col_with_header(self, ext, header, expected):
 
         with ensure_clean(ext) as path:
             df.to_excel(path, 'with_header', index=False, header=True)
-            result = read_excel(path, filename, usecols=[0], header=header)
+            result = pd.read_excel(path, filename, usecols=[0], header=header)
 
         tm.assert_frame_equal(result, expected)
 
@@ -976,10 +909,10 @@ def test_set_column_names_in_parameter(self, ext):
             refdf.columns = ['A', 'B']
 
             with ExcelFile(pth) as reader:
-                xlsdf_no_head = read_excel(reader, 'Data_no_head',
-                                           header=None, names=['A', 'B'])
-                xlsdf_with_head = read_excel(reader, 'Data_with_head',
-                                             index_col=None, names=['A', 'B'])
+                xlsdf_no_head = pd.read_excel(reader, 'Data_no_head',
+                                              header=None, names=['A', 'B'])
+                xlsdf_with_head = pd.read_excel(
+                    reader, 'Data_with_head', index_col=None, names=['A', 'B'])
 
             tm.assert_frame_equal(xlsdf_no_head, refdf)
             tm.assert_frame_equal(xlsdf_with_head, refdf)
@@ -1005,7 +938,7 @@ def tdf(col_sheet_name):
                 for sheetname, df in dfs.items():
                     df.to_excel(ew, sheetname)
 
-            dfs_returned = read_excel(pth, sheet_name=sheets, index_col=0)
+            dfs_returned = pd.read_excel(pth, sheet_name=sheets, index_col=0)
 
             for s in sheets:
                 tm.assert_frame_equal(dfs[s], dfs_returned[s])
@@ -1101,15 +1034,15 @@ def test_read_excel_parse_dates(self, ext):
         with ensure_clean(ext) as pth:
             df2.to_excel(pth)
 
-            res = read_excel(pth, index_col=0)
+            res = pd.read_excel(pth, index_col=0)
             tm.assert_frame_equal(df2, res)
 
-            res = read_excel(pth, parse_dates=["date_strings"], index_col=0)
+            res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0)
             tm.assert_frame_equal(df, res)
 
             date_parser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y")
-            res = read_excel(pth, parse_dates=["date_strings"],
-                             date_parser=date_parser, index_col=0)
+            res = pd.read_excel(pth, parse_dates=["date_strings"],
+                                date_parser=date_parser, index_col=0)
             tm.assert_frame_equal(df, res)
 
 
@@ -1134,11 +1067,11 @@ def test_read_xlrd_book(self, ext):
             book = xlrd.open_workbook(pth)
 
             with ExcelFile(book, engine=engine) as xl:
-                result = read_excel(xl, sheet_name, index_col=0)
+                result = pd.read_excel(xl, sheet_name, index_col=0)
                 tm.assert_frame_equal(df, result)
 
-            result = read_excel(book, sheet_name=sheet_name,
-                                engine=engine, index_col=0)
+            result = pd.read_excel(book, sheet_name=sheet_name,
+                                   engine=engine, index_col=0)
             tm.assert_frame_equal(df, result)
 
 
@@ -1192,12 +1125,12 @@ def test_excel_sheet_by_name_raise(self, *_):
         gt.to_excel(self.path)
 
         xl = ExcelFile(self.path)
-        df = read_excel(xl, 0, index_col=0)
+        df = pd.read_excel(xl, 0, index_col=0)
 
         tm.assert_frame_equal(gt, df)
 
         with pytest.raises(xlrd.XLRDError):
-            read_excel(xl, "0")
+            pd.read_excel(xl, "0")
 
     def test_excel_writer_context_manager(self, *_):
         with ExcelWriter(self.path) as writer:
@@ -1205,8 +1138,8 @@ def test_excel_writer_context_manager(self, *_):
             self.frame2.to_excel(writer, "Data2")
 
         with ExcelFile(self.path) as reader:
-            found_df = read_excel(reader, "Data1", index_col=0)
-            found_df2 = read_excel(reader, "Data2", index_col=0)
+            found_df = pd.read_excel(reader, "Data1", index_col=0)
+            found_df2 = pd.read_excel(reader, "Data2", index_col=0)
 
             tm.assert_frame_equal(found_df, self.frame)
             tm.assert_frame_equal(found_df2, self.frame2)
@@ -1221,47 +1154,49 @@ def test_roundtrip(self, merge_cells, engine, ext):
 
         # test roundtrip
         self.frame.to_excel(self.path, 'test1')
-        recons = read_excel(self.path, 'test1', index_col=0)
+        recons = pd.read_excel(self.path, 'test1', index_col=0)
         tm.assert_frame_equal(self.frame, recons)
 
         self.frame.to_excel(self.path, 'test1', index=False)
-        recons = read_excel(self.path, 'test1', index_col=None)
+        recons = pd.read_excel(self.path, 'test1', index_col=None)
         recons.index = self.frame.index
         tm.assert_frame_equal(self.frame, recons)
 
         self.frame.to_excel(self.path, 'test1', na_rep='NA')
-        recons = read_excel(self.path, 'test1', index_col=0, na_values=['NA'])
+        recons = pd.read_excel(
+            self.path, 'test1', index_col=0, na_values=['NA'])
         tm.assert_frame_equal(self.frame, recons)
 
         # GH 3611
         self.frame.to_excel(self.path, 'test1', na_rep='88')
-        recons = read_excel(self.path, 'test1', index_col=0, na_values=['88'])
+        recons = pd.read_excel(
+            self.path, 'test1', index_col=0, na_values=['88'])
         tm.assert_frame_equal(self.frame, recons)
 
         self.frame.to_excel(self.path, 'test1', na_rep='88')
-        recons = read_excel(self.path, 'test1', index_col=0,
-                            na_values=[88, 88.0])
+        recons = pd.read_excel(
+            self.path, 'test1', index_col=0, na_values=[88, 88.0])
         tm.assert_frame_equal(self.frame, recons)
 
         # GH 6573
         self.frame.to_excel(self.path, 'Sheet1')
-        recons = read_excel(self.path, index_col=0)
+        recons = pd.read_excel(self.path, index_col=0)
         tm.assert_frame_equal(self.frame, recons)
 
         self.frame.to_excel(self.path, '0')
-        recons = read_excel(self.path, index_col=0)
+        recons = pd.read_excel(self.path, index_col=0)
         tm.assert_frame_equal(self.frame, recons)
 
         # GH 8825 Pandas Series should provide to_excel method
         s = self.frame["A"]
         s.to_excel(self.path)
-        recons = read_excel(self.path, index_col=0)
+        recons = pd.read_excel(self.path, index_col=0)
         tm.assert_frame_equal(s.to_frame(), recons)
 
     def test_mixed(self, merge_cells, engine, ext):
         self.mixed_frame.to_excel(self.path, 'test1')
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, 'test1', index_col=0)
+        recons = pd.read_excel(reader, 'test1', index_col=0)
         tm.assert_frame_equal(self.mixed_frame, recons)
 
     def test_ts_frame(self, *_):
@@ -1270,7 +1205,7 @@ def test_ts_frame(self, *_):
         df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
-        recons = read_excel(reader, "test1", index_col=0)
+        recons = pd.read_excel(reader, "test1", index_col=0)
         tm.assert_frame_equal(df, recons)
 
     def test_basics_with_nan(self, merge_cells, engine, ext):
@@ -1290,18 +1225,18 @@ def test_int_types(self, merge_cells, engine, ext, np_type):
         frame.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, "test1", index_col=0)
+        recons = pd.read_excel(reader, "test1", index_col=0)
 
         int_frame = frame.astype(np.int64)
         tm.assert_frame_equal(int_frame, recons)
 
-        recons2 = read_excel(self.path, "test1", index_col=0)
+        recons2 = pd.read_excel(self.path, "test1", index_col=0)
         tm.assert_frame_equal(int_frame, recons2)
 
         # Test with convert_float=False comes back as float.
         float_frame = frame.astype(float)
-        recons = read_excel(self.path, "test1",
-                            convert_float=False, index_col=0)
+        recons = pd.read_excel(self.path, "test1",
+                               convert_float=False, index_col=0)
         tm.assert_frame_equal(recons, float_frame,
                               check_index_type=False,
                               check_column_type=False)
@@ -1314,7 +1249,7 @@ def test_float_types(self, merge_cells, engine, ext, np_type):
         frame.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, "test1", index_col=0).astype(np_type)
+        recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type)
 
         tm.assert_frame_equal(frame, recons, check_dtype=False)
 
@@ -1325,7 +1260,7 @@ def test_bool_types(self, merge_cells, engine, ext, np_type):
         frame.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, "test1", index_col=0).astype(np_type)
+        recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type)
 
         tm.assert_frame_equal(frame, recons)
 
@@ -1334,7 +1269,7 @@ def test_inf_roundtrip(self, *_):
         frame.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, "test1", index_col=0)
+        recons = pd.read_excel(reader, "test1", index_col=0)
 
         tm.assert_frame_equal(frame, recons)
 
@@ -1352,9 +1287,9 @@ def test_sheets(self, merge_cells, engine, ext):
         self.tsframe.to_excel(writer, 'test2')
         writer.save()
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, 'test1', index_col=0)
+        recons = pd.read_excel(reader, 'test1', index_col=0)
         tm.assert_frame_equal(self.frame, recons)
-        recons = read_excel(reader, 'test2', index_col=0)
+        recons = pd.read_excel(reader, 'test2', index_col=0)
         tm.assert_frame_equal(self.tsframe, recons)
         assert 2 == len(reader.sheet_names)
         assert 'test1' == reader.sheet_names[0]
@@ -1372,7 +1307,7 @@ def test_colaliases(self, merge_cells, engine, ext):
         col_aliases = Index(['AA', 'X', 'Y', 'Z'])
         self.frame2.to_excel(self.path, 'test1', header=col_aliases)
         reader = ExcelFile(self.path)
-        rs = read_excel(reader, 'test1', index_col=0)
+        rs = pd.read_excel(reader, 'test1', index_col=0)
         xp = self.frame2.copy()
         xp.columns = col_aliases
         tm.assert_frame_equal(xp, rs)
@@ -1391,9 +1326,8 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext):
                        index_label=['test'],
                        merge_cells=merge_cells)
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, 'test1',
-                            index_col=0,
-                            ).astype(np.int64)
+        recons = pd.read_excel(
+            reader, 'test1', index_col=0).astype(np.int64)
         frame.index.names = ['test']
         assert frame.index.names == recons.index.names
 
@@ -1403,9 +1337,8 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext):
                        index_label=['test', 'dummy', 'dummy2'],
                        merge_cells=merge_cells)
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, 'test1',
-                            index_col=0,
-                            ).astype(np.int64)
+        recons = pd.read_excel(
+            reader, 'test1', index_col=0).astype(np.int64)
         frame.index.names = ['test']
         assert frame.index.names == recons.index.names
 
@@ -1415,9 +1348,8 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext):
                        index_label='test',
                        merge_cells=merge_cells)
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, 'test1',
-                            index_col=0,
-                            ).astype(np.int64)
+        recons = pd.read_excel(
+            reader, 'test1', index_col=0).astype(np.int64)
         frame.index.names = ['test']
         tm.assert_frame_equal(frame, recons.astype(bool))
 
@@ -1430,7 +1362,7 @@ def test_roundtrip_indexlabels(self, merge_cells, engine, ext):
         df = df.set_index(['A', 'B'])
 
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, 'test1', index_col=[0, 1])
+        recons = pd.read_excel(reader, 'test1', index_col=[0, 1])
         tm.assert_frame_equal(df, recons, check_less_precise=True)
 
     def test_excel_roundtrip_indexname(self, merge_cells, engine, ext):
@@ -1440,8 +1372,7 @@ def test_excel_roundtrip_indexname(self, merge_cells, engine, ext):
         df.to_excel(self.path, merge_cells=merge_cells)
 
         xf = ExcelFile(self.path)
-        result = read_excel(xf, xf.sheet_names[0],
-                            index_col=0)
+        result = pd.read_excel(xf, xf.sheet_names[0], index_col=0)
 
         tm.assert_frame_equal(result, df)
         assert result.index.name == 'foo'
@@ -1454,7 +1385,7 @@ def test_excel_roundtrip_datetime(self, merge_cells, *_):
         tsf.to_excel(self.path, "test1", merge_cells=merge_cells)
 
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, "test1", index_col=0)
+        recons = pd.read_excel(reader, "test1", index_col=0)
 
         tm.assert_frame_equal(self.tsframe, recons)
 
@@ -1488,8 +1419,8 @@ def test_excel_date_datetime_format(self, merge_cells, engine, ext):
             reader1 = ExcelFile(self.path)
             reader2 = ExcelFile(filename2)
 
-            rs1 = read_excel(reader1, "test1", index_col=0)
-            rs2 = read_excel(reader2, "test1", index_col=0)
+            rs1 = pd.read_excel(reader1, "test1", index_col=0)
+            rs2 = pd.read_excel(reader2, "test1", index_col=0)
 
             tm.assert_frame_equal(rs1, rs2)
 
@@ -1511,7 +1442,7 @@ def test_to_excel_interval_no_labels(self, *_):
         frame.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
-        recons = read_excel(reader, "test1", index_col=0)
+        recons = pd.read_excel(reader, "test1", index_col=0)
         tm.assert_frame_equal(expected, recons)
 
     def test_to_excel_interval_labels(self, *_):
@@ -1529,7 +1460,7 @@ def test_to_excel_interval_labels(self, *_):
         frame.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
-        recons = read_excel(reader, "test1", index_col=0)
+        recons = pd.read_excel(reader, "test1", index_col=0)
         tm.assert_frame_equal(expected, recons)
 
     def test_to_excel_timedelta(self, *_):
@@ -1547,7 +1478,7 @@ def test_to_excel_timedelta(self, *_):
         frame.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
-        recons = read_excel(reader, "test1", index_col=0)
+        recons = pd.read_excel(reader, "test1", index_col=0)
         tm.assert_frame_equal(expected, recons)
 
     def test_to_excel_periodindex(self, merge_cells, engine, ext):
@@ -1557,7 +1488,7 @@ def test_to_excel_periodindex(self, merge_cells, engine, ext):
         xp.to_excel(self.path, 'sht1')
 
         reader = ExcelFile(self.path)
-        rs = read_excel(reader, 'sht1', index_col=0)
+        rs = pd.read_excel(reader, 'sht1', index_col=0)
         tm.assert_frame_equal(xp, rs.to_period('M'))
 
     def test_to_excel_multiindex(self, merge_cells, engine, ext):
@@ -1573,7 +1504,7 @@ def test_to_excel_multiindex(self, merge_cells, engine, ext):
         # round trip
         frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
         reader = ExcelFile(self.path)
-        df = read_excel(reader, 'test1', index_col=[0, 1])
+        df = pd.read_excel(reader, 'test1', index_col=[0, 1])
         tm.assert_frame_equal(frame, df)
 
     # GH13511
@@ -1584,7 +1515,7 @@ def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext):
         frame = frame.set_index(['A', 'B'])
 
         frame.to_excel(self.path, merge_cells=merge_cells)
-        df = read_excel(self.path, index_col=[0, 1])
+        df = pd.read_excel(self.path, index_col=[0, 1])
         tm.assert_frame_equal(frame, df)
 
     # Test for Issue 11328. If column indices are integers, make
@@ -1607,8 +1538,7 @@ def test_to_excel_multiindex_cols(self, merge_cells, engine, ext):
         # round trip
         frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
         reader = ExcelFile(self.path)
-        df = read_excel(reader, 'test1', header=header,
-                        index_col=[0, 1])
+        df = pd.read_excel(reader, 'test1', header=header, index_col=[0, 1])
         if not merge_cells:
             fm = frame.columns.format(sparsify=False,
                                       adjoin=False, names=False)
@@ -1624,8 +1554,7 @@ def test_to_excel_multiindex_dates(self, merge_cells, engine, ext):
         tsframe.index.names = ['time', 'foo']
         tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells)
         reader = ExcelFile(self.path)
-        recons = read_excel(reader, 'test1',
-                            index_col=[0, 1])
+        recons = pd.read_excel(reader, 'test1', index_col=[0, 1])
 
         tm.assert_frame_equal(tsframe, recons)
         assert recons.index.names == ('time', 'foo')
@@ -1647,7 +1576,7 @@ def test_to_excel_multiindex_no_write_index(self, merge_cells, engine,
 
         # Read it back in.
         reader = ExcelFile(self.path)
-        frame3 = read_excel(reader, 'test1')
+        frame3 = pd.read_excel(reader, 'test1')
 
         # Test that it is the same as the initial frame.
         tm.assert_frame_equal(frame1, frame3)
@@ -1659,7 +1588,7 @@ def test_to_excel_float_format(self, *_):
         df.to_excel(self.path, "test1", float_format="%.2f")
 
         reader = ExcelFile(self.path)
-        result = read_excel(reader, "test1", index_col=0)
+        result = pd.read_excel(reader, "test1", index_col=0)
 
         expected = DataFrame([[0.12, 0.23, 0.57],
                               [12.32, 123123.20, 321321.20]],
@@ -1675,8 +1604,8 @@ def test_to_excel_output_encoding(self, merge_cells, engine, ext):
 
         with ensure_clean("__tmp_to_excel_float_format__." + ext) as filename:
             df.to_excel(filename, sheet_name="TestSheet", encoding="utf8")
-            result = read_excel(filename, "TestSheet",
-                                encoding="utf8", index_col=0)
+            result = pd.read_excel(filename, "TestSheet",
+                                   encoding="utf8", index_col=0)
             tm.assert_frame_equal(result, df)
 
     def test_to_excel_unicode_filename(self, merge_cells, engine, ext):
@@ -1694,7 +1623,7 @@ def test_to_excel_unicode_filename(self, merge_cells, engine, ext):
             df.to_excel(filename, "test1", float_format="%.2f")
 
             reader = ExcelFile(filename)
-            result = read_excel(reader, "test1", index_col=0)
+            result = pd.read_excel(reader, "test1", index_col=0)
 
             expected = DataFrame([[0.12, 0.23, 0.57],
                                   [12.32, 123123.20, 321321.20]],
@@ -1812,7 +1741,7 @@ def roundtrip(data, header=True, parser_hdr=0, index=True):
                           merge_cells=merge_cells, index=index)
 
             xf = ExcelFile(self.path)
-            return read_excel(xf, xf.sheet_names[0], header=parser_hdr)
+            return pd.read_excel(xf, xf.sheet_names[0], header=parser_hdr)
 
         # Basic test.
         parser_header = 0 if use_headers else None
@@ -1860,12 +1789,12 @@ def test_duplicated_columns(self, *_):
                              columns=["A", "B", "B.1"])
 
         # By default, we mangle.
-        result = read_excel(self.path, "test1", index_col=0)
+        result = pd.read_excel(self.path, "test1", index_col=0)
         tm.assert_frame_equal(result, expected)
 
         # Explicitly, we pass in the parameter.
-        result = read_excel(self.path, "test1", index_col=0,
-                            mangle_dupe_cols=True)
+        result = pd.read_excel(self.path, "test1", index_col=0,
+                               mangle_dupe_cols=True)
         tm.assert_frame_equal(result, expected)
 
         # see gh-11007, gh-10970
@@ -1873,21 +1802,22 @@ def test_duplicated_columns(self, *_):
                        columns=["A", "B", "A", "B"])
         df.to_excel(self.path, "test1")
 
-        result = read_excel(self.path, "test1", index_col=0)
+        result = pd.read_excel(self.path, "test1", index_col=0)
         expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
                              columns=["A", "B", "A.1", "B.1"])
         tm.assert_frame_equal(result, expected)
 
         # see gh-10982
         df.to_excel(self.path, "test1", index=False, header=False)
-        result = read_excel(self.path, "test1", header=None)
+        result = pd.read_excel(self.path, "test1", header=None)
 
         expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
         tm.assert_frame_equal(result, expected)
 
         msg = "Setting mangle_dupe_cols=False is not supported yet"
         with pytest.raises(ValueError, match=msg):
-            read_excel(self.path, "test1", header=None, mangle_dupe_cols=False)
+            pd.read_excel(
+                self.path, "test1", header=None, mangle_dupe_cols=False)
 
     def test_swapped_columns(self, merge_cells, engine, ext):
         # Test for issue #5427.
@@ -1895,7 +1825,7 @@ def test_swapped_columns(self, merge_cells, engine, ext):
                                  'B': [2, 2, 2]})
         write_frame.to_excel(self.path, 'test1', columns=['B', 'A'])
 
-        read_frame = read_excel(self.path, 'test1', header=0)
+        read_frame = pd.read_excel(self.path, 'test1', header=0)
 
         tm.assert_series_equal(write_frame['A'], read_frame['A'])
         tm.assert_series_equal(write_frame['B'], read_frame['B'])
@@ -1910,7 +1840,7 @@ def test_invalid_columns(self, *_):
             write_frame.to_excel(self.path, "test1", columns=["B", "C"])
 
         expected = write_frame.reindex(columns=["B", "C"])
-        read_frame = read_excel(self.path, "test1", index_col=0)
+        read_frame = pd.read_excel(self.path, "test1", index_col=0)
         tm.assert_frame_equal(expected, read_frame)
 
         with pytest.raises(KeyError):
@@ -1919,7 +1849,7 @@ def test_invalid_columns(self, *_):
     def test_comment_arg(self, *_):
         # see gh-18735
         #
-        # Test the comment argument functionality to read_excel.
+        # Test the comment argument functionality to pd.read_excel.
 
         # Create file to read in.
         df = DataFrame({"A": ["one", "#one", "one"],
@@ -1927,18 +1857,18 @@ def test_comment_arg(self, *_):
         df.to_excel(self.path, "test_c")
 
         # Read file without comment arg.
-        result1 = read_excel(self.path, "test_c", index_col=0)
+        result1 = pd.read_excel(self.path, "test_c", index_col=0)
 
         result1.iloc[1, 0] = None
         result1.iloc[1, 1] = None
         result1.iloc[2, 1] = None
 
-        result2 = read_excel(self.path, "test_c", comment="#", index_col=0)
+        result2 = pd.read_excel(self.path, "test_c", comment="#", index_col=0)
         tm.assert_frame_equal(result1, result2)
 
     def test_comment_default(self, merge_cells, engine, ext):
         # Re issue #18735
-        # Test the comment argument default to read_excel
+        # Test the comment argument default to pd.read_excel
 
         # Create file to read in
         df = DataFrame({'A': ['one', '#one', 'one'],
@@ -1946,8 +1876,8 @@ def test_comment_default(self, merge_cells, engine, ext):
         df.to_excel(self.path, 'test_c')
 
         # Read file with default and explicit comment=None
-        result1 = read_excel(self.path, 'test_c')
-        result2 = read_excel(self.path, 'test_c', comment=None)
+        result1 = pd.read_excel(self.path, 'test_c')
+        result2 = pd.read_excel(self.path, 'test_c', comment=None)
         tm.assert_frame_equal(result1, result2)
 
     def test_comment_used(self, *_):
@@ -1963,19 +1893,19 @@ def test_comment_used(self, *_):
         # Test read_frame_comment against manually produced expected output.
         expected = DataFrame({"A": ["one", None, "one"],
                               "B": ["two", None, None]})
-        result = read_excel(self.path, "test_c", comment="#", index_col=0)
+        result = pd.read_excel(self.path, "test_c", comment="#", index_col=0)
         tm.assert_frame_equal(result, expected)
 
     def test_comment_empty_line(self, merge_cells, engine, ext):
         # Re issue #18735
-        # Test that read_excel ignores commented lines at the end of file
+        # Test that pd.read_excel ignores commented lines at the end of file
 
         df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']})
         df.to_excel(self.path, index=False)
 
         # Test that all-comment lines at EoF are ignored
         expected = DataFrame({'a': [1], 'b': [2]})
-        result = read_excel(self.path, comment='#')
+        result = pd.read_excel(self.path, comment='#')
         tm.assert_frame_equal(result, expected)
 
     def test_datetimes(self, merge_cells, engine, ext):
@@ -1995,7 +1925,7 @@ def test_datetimes(self, merge_cells, engine, ext):
 
         write_frame = DataFrame({'A': datetimes})
         write_frame.to_excel(self.path, 'Sheet1')
-        read_frame = read_excel(self.path, 'Sheet1', header=0)
+        read_frame = pd.read_excel(self.path, 'Sheet1', header=0)
 
         tm.assert_series_equal(write_frame['A'], read_frame['A'])
 
@@ -2010,7 +1940,7 @@ def test_bytes_io(self, merge_cells, engine, ext):
         writer.save()
 
         bio.seek(0)
-        reread_df = read_excel(bio, index_col=0)
+        reread_df = pd.read_excel(bio, index_col=0)
         tm.assert_frame_equal(df, reread_df)
 
     def test_write_lists_dict(self, *_):
@@ -2019,7 +1949,7 @@ def test_write_lists_dict(self, *_):
                         "numeric": [1, 2, 3.0],
                         "str": ["apple", "banana", "cherry"]})
         df.to_excel(self.path, "Sheet1")
-        read = read_excel(self.path, "Sheet1", header=0, index_col=0)
+        read = pd.read_excel(self.path, "Sheet1", header=0, index_col=0)
 
         expected = df.copy()
         expected.mixed = expected.mixed.apply(str)
@@ -2033,8 +1963,8 @@ def test_true_and_false_value_options(self, *_):
         expected = df.replace({"foo": True, "bar": False})
 
         df.to_excel(self.path)
-        read_frame = read_excel(self.path, true_values=["foo"],
-                                false_values=["bar"], index_col=0)
+        read_frame = pd.read_excel(self.path, true_values=["foo"],
+                                   false_values=["bar"], index_col=0)
         tm.assert_frame_equal(read_frame, expected)
 
     def test_freeze_panes(self, *_):
@@ -2042,7 +1972,7 @@ def test_freeze_panes(self, *_):
         expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"])
         expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1))
 
-        result = read_excel(self.path, index_col=0)
+        result = pd.read_excel(self.path, index_col=0)
         tm.assert_frame_equal(result, expected)
 
     def test_path_path_lib(self, merge_cells, engine, ext):

From 59df3e07d96d2463b62bd3fc38e11297590ed40d Mon Sep 17 00:00:00 2001
From: Tim Gates <47873678+timgates42@users.noreply.github.com>
Date: Thu, 30 May 2019 11:27:47 +1000
Subject: [PATCH 04/51] Issue/26506 Provides correct desciption in docstring
 that get_indexer methods are not yet supported (#26519)

---
 pandas/core/indexes/base.py     |  3 ++-
 pandas/core/indexes/interval.py | 12 ++++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index a4544e79e2dfa..8538687ca3e91 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -52,6 +52,7 @@
 
 _index_doc_kwargs = dict(klass='Index', inplace='',
                          target_klass='Index',
+                         raises_section='',
                          unique='Index', duplicated='np.ndarray')
 _index_shared_docs = dict()
 
@@ -2787,7 +2788,7 @@ def get_loc(self, key, method=None, tolerance=None):
             Integers from 0 to n - 1 indicating that the index at these
             positions matches the corresponding target values. Missing values
             in the target are marked by -1.
-
+        %(raises_section)s
         Examples
         --------
         >>> index = pd.Index(['c', 'a', 'b'])
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 956a6f73dd785..53e1a36c48994 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -8,7 +8,7 @@
 
 from pandas._libs import Timedelta, Timestamp
 from pandas._libs.interval import Interval, IntervalMixin, IntervalTree
-from pandas.util._decorators import Appender, cache_readonly
+from pandas.util._decorators import Appender, Substitution, cache_readonly
 from pandas.util._exceptions import rewrite_exception
 
 from pandas.core.dtypes.cast import (
@@ -822,7 +822,15 @@ def get_value(self, series, key):
             loc = self.get_loc(key)
         return series.iloc[loc]
 
-    @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
+    @Substitution(**dict(_index_doc_kwargs,
+                         **{'raises_section': textwrap.dedent("""
+        Raises
+        ------
+        NotImplementedError
+            If any method argument other than the default of
+            None is specified as these are not yet implemented.
+        """)}))
+    @Appender(_index_shared_docs['get_indexer'])
     def get_indexer(self, target, method=None, limit=None, tolerance=None):
 
         self._check_method(method)

From 072408ea8d654e5bbab270f11fbe61246e76691f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 29 May 2019 20:29:40 -0500
Subject: [PATCH 05/51] ENH: Support nested renaming / selection (#26399)

---
 doc/source/user_guide/groupby.rst             |  74 ++++++++--
 doc/source/whatsnew/v0.25.0.rst               |  41 ++++++
 pandas/__init__.py                            |   2 +-
 pandas/core/api.py                            |   2 +-
 pandas/core/base.py                           |  14 +-
 pandas/core/groupby/__init__.py               |   4 +-
 pandas/core/groupby/generic.py                | 128 ++++++++++++++++--
 pandas/tests/api/test_api.py                  |   1 +
 .../tests/groupby/aggregate/test_aggregate.py | 101 +++++++++++++-
 pandas/tests/groupby/aggregate/test_other.py  |   1 +
 10 files changed, 337 insertions(+), 31 deletions(-)

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index 4f116a42253e5..2014dbd9865f3 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -568,6 +568,67 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
                             'mean': 'bar',
                             'std': 'baz'}))
 
+.. _groupby.aggregate.named:
+
+Named Aggregation
+~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.25.0
+
+To support column-specific aggregation *with control over the output column names*, pandas
+accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation", where
+
+- The keywords are the *output* column names
+- The values are tuples whose first element is the column to select
+  and the second element is the aggregation to apply to that column. Pandas
+  provides the ``pandas.NamedAgg`` namedtuple with the fields ``['column', 'aggfunc']``
+  to make it clearer what the arguments are. As usual, the aggregation can
+  be a callable or a string alias.
+
+.. ipython:: python
+
+   animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'],
+                           'height': [9.1, 6.0, 9.5, 34.0],
+                           'weight': [7.9, 7.5, 9.9, 198.0]})
+   animals
+
+   animals.groupby("kind").agg(
+       min_height=pd.NamedAgg(column='height', aggfunc='min'),
+       max_height=pd.NamedAgg(column='height', aggfunc='max'),
+       average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
+   )
+
+
+``pandas.NamedAgg`` is just a ``namedtuple``. Plain tuples are allowed as well.
+
+.. ipython:: python
+
+   animals.groupby("kind").agg(
+       min_height=('height', 'min'),
+       max_height=('height', 'max'),
+       average_weight=('height', np.mean),
+   )
+
+
+If your desired output column names are not valid python keywords, construct a dictionary
+and unpack the keyword arguments
+
+.. ipython:: python
+
+   animals.groupby("kind").agg(**{
+       'total weight': pd.NamedAgg(column='weight', aggfunc=sum),
+   })
+
+Additional keyword arguments are not passed through to the aggregation functions. Only pairs
+of ``(column, aggfunc)`` should be passed as ``**kwargs``. If your aggregation functions
+requires additional arguments, partially apply them with :meth:`functools.partial`.
+
+.. note::
+
+   For Python 3.5 and earlier, the order of ``**kwargs`` in a functions was not
+   preserved. This means that the output column ordering would not be
+   consistent. To ensure consistent ordering, the keys (and so output columns)
+   will always be sorted for Python 3.5.
 
 Applying different functions to DataFrame columns
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -588,19 +649,6 @@ must be either implemented on GroupBy or available via :ref:`dispatching
 
    grouped.agg({'C': 'sum', 'D': 'std'})
 
-.. note::
-
-    If you pass a dict to ``aggregate``, the ordering of the output columns is
-    non-deterministic. If you want to be sure the output columns will be in a specific
-    order, you can use an ``OrderedDict``.  Compare the output of the following two commands:
-
-.. ipython:: python
-
-   from collections import OrderedDict
-
-   grouped.agg({'D': 'std', 'C': 'mean'})
-   grouped.agg(OrderedDict([('D', 'std'), ('C', 'mean')]))
-
 .. _groupby.aggregate.cython:
 
 Cython-optimized aggregation functions
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 2c66d3e4db321..96837916f815b 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -19,6 +19,47 @@ These are the changes in pandas 0.25.0. See :ref:`release` for a full changelog
 including other versions of pandas.
 
 
+Enhancements
+~~~~~~~~~~~~
+
+.. _whatsnew_0250.enhancements.agg_relabel:
+
+Groupby Aggregation with Relabeling
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Pandas has added special groupby behavior, known as "named aggregation", for naming the
+output columns when applying multiple aggregation functions to specific columns (:issue:`18366`).
+
+.. ipython:: python
+
+   animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'],
+                           'height': [9.1, 6.0, 9.5, 34.0],
+                           'weight': [7.9, 7.5, 9.9, 198.0]})
+   animals
+   animals.groupby("kind").agg(
+       min_height=pd.NamedAgg(column='height', aggfunc='min'),
+       max_height=pd.NamedAgg(column='height', aggfunc='max'),
+       average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
+   )
+
+Pass the desired columns names as the ``**kwargs`` to ``.agg``. The values of ``**kwargs``
+should be tuples where the first element is the column selection, and the second element is the
+aggregation function to apply. Pandas provides the ``pandas.NamedAgg`` namedtuple to make it clearer
+what the arguments to the function are, but plain tuples are accepted as well.
+
+.. ipython:: python
+
+   animals.groupby("kind").agg(
+       min_height=('height', 'min'),
+       max_height=('height', 'max'),
+       average_weight=('height', np.mean),
+   )
+
+Named aggregation is the recommended replacement for the deprecated "dict-of-dicts"
+approach to naming the output of column-specific aggregations (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`).
+
+See :ref:`_groupby.aggregate.named` for more.
+
 .. _whatsnew_0250.enhancements.other:
 
 Other Enhancements
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 6af6f3093c120..4c494b4a62e39 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -65,7 +65,7 @@
     to_numeric, to_datetime, to_timedelta,
 
     # misc
-    np, Grouper, factorize, unique, value_counts,
+    np, Grouper, factorize, unique, value_counts, NamedAgg,
     array, Categorical, set_eng_float_format, Series, DataFrame,
     Panel)
 
diff --git a/pandas/core/api.py b/pandas/core/api.py
index b7398e433f28f..0106feabcce74 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -21,7 +21,7 @@
     DatetimeTZDtype,
 )
 from pandas.core.arrays import Categorical, array
-from pandas.core.groupby import Grouper
+from pandas.core.groupby import Grouper, NamedAgg
 from pandas.io.formats.format import set_eng_float_format
 from pandas.core.index import (Index, CategoricalIndex, Int64Index,
                                UInt64Index, RangeIndex, Float64Index,
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 3f59871fb5b38..e4274e48d3227 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -340,11 +340,15 @@ def _aggregate(self, arg, *args, **kwargs):
             def nested_renaming_depr(level=4):
                 # deprecation of nested renaming
                 # GH 15931
-                warnings.warn(
-                    ("using a dict with renaming "
-                     "is deprecated and will be removed in a future "
-                     "version"),
-                    FutureWarning, stacklevel=level)
+                msg = textwrap.dedent("""\
+                using a dict with renaming is deprecated and will be removed
+                in a future version.
+
+                For column-specific groupby renaming, use named aggregation
+
+                    >>> df.groupby(...).agg(name=('column', aggfunc))
+                """)
+                warnings.warn(msg, FutureWarning, stacklevel=level)
 
             # if we have a dict of any non-scalars
             # eg. {'A' : ['mean']}, normalize all to
diff --git a/pandas/core/groupby/__init__.py b/pandas/core/groupby/__init__.py
index ac35f3825e5e8..fe50bd91a4f56 100644
--- a/pandas/core/groupby/__init__.py
+++ b/pandas/core/groupby/__init__.py
@@ -1,4 +1,4 @@
-from pandas.core.groupby.groupby import GroupBy  # noqa: F401
 from pandas.core.groupby.generic import (  # noqa: F401
-    SeriesGroupBy, DataFrameGroupBy)
+    DataFrameGroupBy, NamedAgg, SeriesGroupBy)
+from pandas.core.groupby.groupby import GroupBy  # noqa: F401
 from pandas.core.groupby.grouper import Grouper  # noqa: F401
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 2f665975f96bd..faa4d868bb65a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -6,15 +6,18 @@
 which here returns a DataFrameGroupBy object.
 """
 
-from collections import OrderedDict, abc
+from collections import OrderedDict, abc, namedtuple
 import copy
 from functools import partial
 from textwrap import dedent
+import typing
+from typing import Any, Callable, List, Union
 import warnings
 
 import numpy as np
 
 from pandas._libs import Timestamp, lib
+from pandas.compat import PY36
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, Substitution
 
@@ -41,6 +44,10 @@
 
 from pandas.plotting._core import boxplot_frame_groupby
 
+NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
+# TODO(typing) the return value on this callable should be any *scalar*.
+AggScalar = Union[str, Callable[..., Any]]
+
 
 class NDFrameGroupBy(GroupBy):
 
@@ -144,8 +151,18 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True,
         return new_items, new_blocks
 
     def aggregate(self, func, *args, **kwargs):
-
         _level = kwargs.pop('_level', None)
+
+        relabeling = func is None and _is_multi_agg_with_relabel(**kwargs)
+        if relabeling:
+            func, columns, order = _normalize_keyword_aggregation(kwargs)
+
+            kwargs = {}
+        elif func is None:
+            # nicer error message
+            raise TypeError("Must provide 'func' or tuples of "
+                            "'(column, aggfunc).")
+
         result, how = self._aggregate(func, _level=_level, *args, **kwargs)
         if how is None:
             return result
@@ -179,6 +196,10 @@ def aggregate(self, func, *args, **kwargs):
             self._insert_inaxis_grouper_inplace(result)
             result.index = np.arange(len(result))
 
+        if relabeling:
+            result = result[order]
+            result.columns = columns
+
         return result._convert(datetime=True)
 
     agg = aggregate
@@ -791,11 +812,8 @@ def _aggregate_multiple_funcs(self, arg, _level):
             # list of functions / function names
             columns = []
             for f in arg:
-                if isinstance(f, str):
-                    columns.append(f)
-                else:
-                    # protect against callables without names
-                    columns.append(com.get_callable_name(f))
+                columns.append(com.get_callable_name(f) or f)
+
             arg = zip(columns, arg)
 
         results = OrderedDict()
@@ -1296,6 +1314,26 @@ class DataFrameGroupBy(NDFrameGroupBy):
     A
     1   1   2  0.590716
     2   3   4  0.704907
+
+    To control the output names with different aggregations per column,
+    pandas supports "named aggregation"
+
+    >>> df.groupby("A").agg(
+    ...     b_min=pd.NamedAgg(column="B", aggfunc="min"),
+    ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
+       b_min     c_sum
+    A
+    1      1 -1.956929
+    2      3 -0.322183
+
+    - The keywords are the *output* column names
+    - The values are tuples whose first element is the column to select
+      and the second element is the aggregation to apply to that column.
+      Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
+      ``['column', 'aggfunc']`` to make it clearer what the arguments are.
+      As usual, the aggregation can be a callable or a string alias.
+
+    See :ref:`groupby.aggregate.named` for more.
     """)
 
     @Substitution(see_also=_agg_see_also_doc,
@@ -1304,7 +1342,7 @@ class DataFrameGroupBy(NDFrameGroupBy):
                   klass='DataFrame',
                   axis='')
     @Appender(_shared_docs['aggregate'])
-    def aggregate(self, arg, *args, **kwargs):
+    def aggregate(self, arg=None, *args, **kwargs):
         return super().aggregate(arg, *args, **kwargs)
 
     agg = aggregate
@@ -1577,3 +1615,77 @@ def groupby_series(obj, col=None):
         return results
 
     boxplot = boxplot_frame_groupby
+
+
+def _is_multi_agg_with_relabel(**kwargs):
+    """
+    Check whether the kwargs pass to .agg look like multi-agg with relabling.
+
+    Parameters
+    ----------
+    **kwargs : dict
+
+    Returns
+    -------
+    bool
+
+    Examples
+    --------
+    >>> _is_multi_agg_with_relabel(a='max')
+    False
+    >>> _is_multi_agg_with_relabel(a_max=('a', 'max'),
+    ...                            a_min=('a', 'min'))
+    True
+    >>> _is_multi_agg_with_relabel()
+    False
+    """
+    return all(
+        isinstance(v, tuple) and len(v) == 2
+        for v in kwargs.values()
+    ) and kwargs
+
+
+def _normalize_keyword_aggregation(kwargs):
+    """
+    Normalize user-provided "named aggregation" kwargs.
+
+    Transforms from the new ``Dict[str, NamedAgg]`` style kwargs
+    to the old OrderedDict[str, List[scalar]]].
+
+    Parameters
+    ----------
+    kwargs : dict
+
+    Returns
+    -------
+    aggspec : dict
+        The transformed kwargs.
+    columns : List[str]
+        The user-provided keys.
+    order : List[Tuple[str, str]]
+        Pairs of the input and output column names.
+
+    Examples
+    --------
+    >>> _normalize_keyword_aggregation({'output': ('input', 'sum')})
+    (OrderedDict([('input', ['sum'])]), ('output',), [('input', 'sum')])
+    """
+    if not PY36:
+        kwargs = OrderedDict(sorted(kwargs.items()))
+
+    # Normalize the aggregation functions as Dict[column, List[func]],
+    # process normally, then fixup the names.
+    # TODO(Py35): When we drop python 3.5, change this to
+    # defaultdict(list)
+    aggspec = OrderedDict()  # type: typing.OrderedDict[str, List[AggScalar]]
+    order = []
+    columns, pairs = list(zip(*kwargs.items()))
+
+    for name, (column, aggfunc) in zip(columns, pairs):
+        if column in aggspec:
+            aggspec[column].append(aggfunc)
+        else:
+            aggspec[column] = [aggfunc]
+        order.append((column,
+                      com.get_callable_name(aggfunc) or aggfunc))
+    return aggspec, columns, order
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index c92808200ebea..aa42484bf9513 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -47,6 +47,7 @@ class TestPDApi(Base):
                'DatetimeTZDtype',
                'Int8Dtype', 'Int16Dtype', 'Int32Dtype', 'Int64Dtype',
                'UInt8Dtype', 'UInt16Dtype', 'UInt32Dtype', 'UInt64Dtype',
+               'NamedAgg',
                ]
 
     # these are already deprecated; awaiting removal
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 6f54d05680698..9e714a1086037 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -2,12 +2,13 @@
 test .agg behavior / note that .apply is tested generally in test_groupby.py
 """
 from collections import OrderedDict
+import functools
 
 import numpy as np
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, concat
+from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
 from pandas.core.base import SpecificationError
 from pandas.core.groupby.grouper import Grouping
 import pandas.util.testing as tm
@@ -326,3 +327,101 @@ def test_uint64_type_handling(dtype, how):
     result = df.groupby('y').agg({'x': how})
     result.x = result.x.astype(np.int64)
     tm.assert_frame_equal(result, expected, check_exact=True)
+
+
+class TestNamedAggregation:
+
+    def test_agg_relabel(self):
+        df = pd.DataFrame({"group": ['a', 'a', 'b', 'b'],
+                           "A": [0, 1, 2, 3],
+                           "B": [5, 6, 7, 8]})
+        result = df.groupby("group").agg(
+            a_max=("A", "max"),
+            b_max=("B", "max"),
+        )
+        expected = pd.DataFrame({"a_max": [1, 3], "b_max": [6, 8]},
+                                index=pd.Index(['a', 'b'], name='group'),
+                                columns=['a_max', 'b_max'])
+        tm.assert_frame_equal(result, expected)
+
+        # order invariance
+        p98 = functools.partial(np.percentile, q=98)
+        result = df.groupby('group').agg(
+            b_min=("B", "min"),
+            a_min=("A", min),
+            a_mean=("A", np.mean),
+            a_max=("A", "max"),
+            b_max=("B", "max"),
+            a_98=("A", p98)
+        )
+        expected = pd.DataFrame({"b_min": [5, 7],
+                                 "a_min": [0, 2],
+                                 "a_mean": [0.5, 2.5],
+                                 "a_max": [1, 3],
+                                 "b_max": [6, 8],
+                                 "a_98": [0.98, 2.98]},
+                                index=pd.Index(['a', 'b'], name='group'),
+                                columns=['b_min', 'a_min', 'a_mean',
+                                         'a_max', 'b_max', 'a_98'])
+        if not compat.PY36:
+            expected = expected[['a_98', 'a_max', 'a_mean',
+                                 'a_min', 'b_max', 'b_min']]
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_relabel_non_identifier(self):
+        df = pd.DataFrame({"group": ['a', 'a', 'b', 'b'],
+                           "A": [0, 1, 2, 3],
+                           "B": [5, 6, 7, 8]})
+
+        result = df.groupby("group").agg(**{'my col': ('A', 'max')})
+        expected = pd.DataFrame({'my col': [1, 3]},
+                                index=pd.Index(['a', 'b'], name='group'))
+        tm.assert_frame_equal(result, expected)
+
+    def test_duplicate_raises(self):
+        # TODO: we currently raise on multiple lambdas. We could *maybe*
+        # update com.get_callable_name to append `_i` to each lambda.
+        df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
+        with pytest.raises(SpecificationError, match="Function names"):
+            df.groupby("A").agg(a=("A", "min"), b=("A", "min"))
+
+    def test_agg_relabel_with_level(self):
+        df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]},
+                          index=pd.MultiIndex.from_product([['A', 'B'],
+                                                            ['a', 'b']]))
+        result = df.groupby(level=0).agg(aa=('A', 'max'), bb=('A', 'min'),
+                                         cc=('B', 'mean'))
+        expected = pd.DataFrame({
+            'aa': [0, 1],
+            'bb': [0, 1],
+            'cc': [1.5, 3.5]
+        }, index=['A', 'B'])
+        tm.assert_frame_equal(result, expected)
+
+    def test_agg_relabel_other_raises(self):
+        df = pd.DataFrame({"A": [0, 0, 1], "B": [1, 2, 3]})
+        grouped = df.groupby("A")
+        match = 'Must provide'
+        with pytest.raises(TypeError, match=match):
+            grouped.agg(foo=1)
+
+        with pytest.raises(TypeError, match=match):
+            grouped.agg()
+
+        with pytest.raises(TypeError, match=match):
+            grouped.agg(a=('B', 'max'), b=(1, 2, 3))
+
+    def test_missing_raises(self):
+        df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
+        with pytest.raises(KeyError, match="Column 'C' does not exist"):
+            df.groupby("A").agg(c=('C', 'sum'))
+
+    def test_agg_namedtuple(self):
+        df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
+        result = df.groupby("A").agg(
+            b=pd.NamedAgg("B", "sum"),
+            c=pd.NamedAgg(column="B", aggfunc="count")
+        )
+        expected = df.groupby("A").agg(b=("B", "sum"),
+                                       c=("B", "count"))
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 02d8c09bf2c8f..8168cf06ffdb1 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -217,6 +217,7 @@ def test_agg_dict_renaming_deprecation():
         df.groupby('A').agg({'B': {'foo': ['sum', 'max']},
                              'C': {'bar': ['count', 'min']}})
         assert "using a dict with renaming" in str(w[0].message)
+        assert "named aggregation" in str(w[0].message)
 
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         df.groupby('A')[['B', 'C']].agg({'ma': 'max'})

From 9e76f4a8d3374cd4f21b2a531b19f58a686136ab Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Thu, 30 May 2019 02:33:49 +0100
Subject: [PATCH 06/51] Fix 'observed' kwarg not doing anything on
 SeriesGroupBy (#26463)

---
 doc/source/whatsnew/v0.25.0.rst          |   1 +
 pandas/core/groupby/generic.py           |  93 ++------
 pandas/core/groupby/groupby.py           |  76 ++++++-
 pandas/tests/groupby/test_categorical.py | 264 +++++++++++++++--------
 4 files changed, 268 insertions(+), 166 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 96837916f815b..89a9da4a73b35 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -553,6 +553,7 @@ Groupby/Resample/Rolling
 - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
 - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
 - Bug in :func:`pandas.core.groupby.GroupBy.size` when grouping only NA values (:issue:`23050`)
+- Bug in :func:`Series.groupby` where ``observed`` kwarg was previously ignored (:issue:`24880`)
 - Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`)
 - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`)
 - Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index faa4d868bb65a..121244cde368a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -28,7 +28,6 @@
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas.core.algorithms as algorithms
-from pandas.core.arrays import Categorical
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
@@ -36,7 +35,7 @@
 from pandas.core.groupby import base
 from pandas.core.groupby.groupby import (
     GroupBy, _apply_docs, _transform_template)
-from pandas.core.index import CategoricalIndex, Index, MultiIndex
+from pandas.core.index import Index, MultiIndex
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
@@ -852,9 +851,10 @@ def _wrap_output(self, output, index, names=None):
             return Series(output, index=index, name=name)
 
     def _wrap_aggregated_output(self, output, names=None):
-        return self._wrap_output(output=output,
-                                 index=self.grouper.result_index,
-                                 names=names)
+        result = self._wrap_output(output=output,
+                                   index=self.grouper.result_index,
+                                   names=names)
+        return self._reindex_output(result)._convert(datetime=True)
 
     def _wrap_transformed_output(self, output, names=None):
         return self._wrap_output(output=output,
@@ -874,13 +874,16 @@ def _get_index():
             return index
 
         if isinstance(values[0], dict):
-            # GH #823
+            # GH #823 #24880
             index = _get_index()
-            result = DataFrame(values, index=index).stack()
+            result = self._reindex_output(DataFrame(values, index=index))
+            # if self.observed is False,
+            # keep all-NaN rows created while re-indexing
+            result = result.stack(dropna=self.observed)
             result.name = self._selection_name
             return result
 
-        if isinstance(values[0], (Series, dict)):
+        if isinstance(values[0], Series):
             return self._concat_objects(keys, values,
                                         not_indexed_same=not_indexed_same)
         elif isinstance(values[0], DataFrame):
@@ -888,9 +891,11 @@ def _get_index():
             return self._concat_objects(keys, values,
                                         not_indexed_same=not_indexed_same)
         else:
-            # GH #6265
-            return Series(values, index=_get_index(),
-                          name=self._selection_name)
+            # GH #6265 #24880
+            result = Series(data=values,
+                            index=_get_index(),
+                            name=self._selection_name)
+            return self._reindex_output(result)
 
     def _aggregate_named(self, func, *args, **kwargs):
         result = OrderedDict()
@@ -1373,7 +1378,8 @@ def _gotitem(self, key, ndim, subset=None):
             if subset is None:
                 subset = self.obj[key]
             return SeriesGroupBy(subset, selection=key,
-                                 grouper=self.grouper)
+                                 grouper=self.grouper,
+                                 observed=self.observed)
 
         raise AssertionError("invalid ndim for _gotitem")
 
@@ -1445,69 +1451,6 @@ def _wrap_agged_blocks(self, items, blocks):
 
         return self._reindex_output(result)._convert(datetime=True)
 
-    def _reindex_output(self, result):
-        """
-        If we have categorical groupers, then we want to make sure that
-        we have a fully reindex-output to the levels. These may have not
-        participated in the groupings (e.g. may have all been
-        nan groups);
-
-        This can re-expand the output space
-        """
-
-        # we need to re-expand the output space to accomodate all values
-        # whether observed or not in the cartesian product of our groupes
-        groupings = self.grouper.groupings
-        if groupings is None:
-            return result
-        elif len(groupings) == 1:
-            return result
-
-        # if we only care about the observed values
-        # we are done
-        elif self.observed:
-            return result
-
-        # reindexing only applies to a Categorical grouper
-        elif not any(isinstance(ping.grouper, (Categorical, CategoricalIndex))
-                     for ping in groupings):
-            return result
-
-        levels_list = [ping.group_index for ping in groupings]
-        index, _ = MultiIndex.from_product(
-            levels_list, names=self.grouper.names).sortlevel()
-
-        if self.as_index:
-            d = {self.obj._get_axis_name(self.axis): index, 'copy': False}
-            return result.reindex(**d)
-
-        # GH 13204
-        # Here, the categorical in-axis groupers, which need to be fully
-        # expanded, are columns in `result`. An idea is to do:
-        # result = result.set_index(self.grouper.names)
-        #                .reindex(index).reset_index()
-        # but special care has to be taken because of possible not-in-axis
-        # groupers.
-        # So, we manually select and drop the in-axis grouper columns,
-        # reindex `result`, and then reset the in-axis grouper columns.
-
-        # Select in-axis groupers
-        in_axis_grps = ((i, ping.name) for (i, ping)
-                        in enumerate(groupings) if ping.in_axis)
-        g_nums, g_names = zip(*in_axis_grps)
-
-        result = result.drop(labels=list(g_names), axis=1)
-
-        # Set a temp index and reindex (possibly expanding)
-        result = result.set_index(self.grouper.result_index
-                                  ).reindex(index, copy=False)
-
-        # Reset in-axis grouper columns
-        # (using level numbers `g_nums` because level names may not be unique)
-        result = result.reset_index(level=g_nums)
-
-        return result.reset_index(drop=True)
-
     def _iterate_column_groupbys(self):
         for i, colname in enumerate(self._selected_obj.columns):
             yield colname, SeriesGroupBy(self._selected_obj.iloc[:, i],
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index aa04b7505afe4..91bb71a1a8af7 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -36,13 +36,14 @@ class providing the base-class of operations.
 from pandas.api.types import (
     is_datetime64_dtype, is_integer_dtype, is_object_dtype)
 import pandas.core.algorithms as algorithms
+from pandas.core.arrays import Categorical
 from pandas.core.base import (
     DataError, GroupByError, PandasObject, SelectionMixin, SpecificationError)
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base
-from pandas.core.index import Index, MultiIndex
+from pandas.core.index import CategoricalIndex, Index, MultiIndex
 from pandas.core.series import Series
 from pandas.core.sorting import get_group_index_sorter
 
@@ -2301,6 +2302,79 @@ def tail(self, n=5):
         mask = self._cumcount_array(ascending=False) < n
         return self._selected_obj[mask]
 
+    def _reindex_output(self, output):
+        """
+        If we have categorical groupers, then we might want to make sure that
+        we have a fully re-indexed output to the levels. This means expanding
+        the output space to accommodate all values in the cartesian product of
+        our groups, regardless of whether they were observed in the data or
+        not. This will expand the output space if there are missing groups.
+
+        The method returns early without modifying the input if the number of
+        groupings is less than 2, self.observed == True or none of the groupers
+        are categorical.
+
+        Parameters
+        ----------
+        output: Series or DataFrame
+            Object resulting from grouping and applying an operation.
+
+        Returns
+        -------
+        Series or DataFrame
+            Object (potentially) re-indexed to include all possible groups.
+        """
+        groupings = self.grouper.groupings
+        if groupings is None:
+            return output
+        elif len(groupings) == 1:
+            return output
+
+        # if we only care about the observed values
+        # we are done
+        elif self.observed:
+            return output
+
+        # reindexing only applies to a Categorical grouper
+        elif not any(isinstance(ping.grouper, (Categorical, CategoricalIndex))
+                     for ping in groupings):
+            return output
+
+        levels_list = [ping.group_index for ping in groupings]
+        index, _ = MultiIndex.from_product(
+            levels_list, names=self.grouper.names).sortlevel()
+
+        if self.as_index:
+            d = {self.obj._get_axis_name(self.axis): index, 'copy': False}
+            return output.reindex(**d)
+
+        # GH 13204
+        # Here, the categorical in-axis groupers, which need to be fully
+        # expanded, are columns in `output`. An idea is to do:
+        # output = output.set_index(self.grouper.names)
+        #                .reindex(index).reset_index()
+        # but special care has to be taken because of possible not-in-axis
+        # groupers.
+        # So, we manually select and drop the in-axis grouper columns,
+        # reindex `output`, and then reset the in-axis grouper columns.
+
+        # Select in-axis groupers
+        in_axis_grps = ((i, ping.name) for (i, ping)
+                        in enumerate(groupings) if ping.in_axis)
+        g_nums, g_names = zip(*in_axis_grps)
+
+        output = output.drop(labels=list(g_names), axis=1)
+
+        # Set a temp index and reindex (possibly expanding)
+        output = output.set_index(self.grouper.result_index
+                                  ).reindex(index, copy=False)
+
+        # Reset in-axis grouper columns
+        # (using level numbers `g_nums` because level names may not be unique)
+        output = output.reset_index(level=g_nums)
+
+        return output.reset_index(drop=True)
+
 
 GroupBy._add_numeric_operations()
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 112f7629d735a..f24fa0daa5b18 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1,3 +1,4 @@
+from collections import OrderedDict
 from datetime import datetime
 
 import numpy as np
@@ -25,7 +26,7 @@ def f(a):
                                        ordered=a.ordered)
         return a
 
-    index = pd.MultiIndex.from_product(map(f, args), names=names)
+    index = MultiIndex.from_product(map(f, args), names=names)
     return result.reindex(index).sort_index()
 
 
@@ -189,7 +190,7 @@ def test_level_get_group(observed):
     # GH15155
     df = DataFrame(data=np.arange(2, 22, 2),
                    index=MultiIndex(
-                       levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
+                       levels=[CategoricalIndex(["a", "b"]), range(10)],
                        codes=[[0] * 5 + [1] * 5, range(10)],
                        names=["Index1", "Index2"]))
     g = df.groupby(level=["Index1"], observed=observed)
@@ -197,7 +198,7 @@ def test_level_get_group(observed):
     # expected should equal test.loc[["a"]]
     # GH15166
     expected = DataFrame(data=np.arange(2, 12, 2),
-                         index=pd.MultiIndex(levels=[pd.CategoricalIndex(
+                         index=MultiIndex(levels=[CategoricalIndex(
                              ["a", "b"]), range(5)],
         codes=[[0] * 5, range(5)],
         names=["Index1", "Index2"]))
@@ -265,7 +266,7 @@ def test_observed(observed):
 
     # multiple groupers with a non-cat
     gb = df.groupby(['A', 'B', 'C'], observed=observed)
-    exp_index = pd.MultiIndex.from_arrays(
+    exp_index = MultiIndex.from_arrays(
         [cat1, cat2, ['foo', 'bar'] * 2],
         names=['A', 'B', 'C'])
     expected = DataFrame({'values': Series(
@@ -280,7 +281,7 @@ def test_observed(observed):
     tm.assert_frame_equal(result, expected)
 
     gb = df.groupby(['A', 'B'], observed=observed)
-    exp_index = pd.MultiIndex.from_arrays(
+    exp_index = MultiIndex.from_arrays(
         [cat1, cat2],
         names=['A', 'B'])
     expected = DataFrame({'values': [1, 2, 3, 4]},
@@ -296,25 +297,25 @@ def test_observed(observed):
 
     # https://github.com/pandas-dev/pandas/issues/8138
     d = {'cat':
-         pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
-                        ordered=True),
+         Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
+                     ordered=True),
          'ints': [1, 1, 2, 2],
          'val': [10, 20, 30, 40]}
-    df = pd.DataFrame(d)
+    df = DataFrame(d)
 
     # Grouping on a single column
     groups_single_key = df.groupby("cat", observed=observed)
     result = groups_single_key.mean()
 
-    exp_index = pd.CategoricalIndex(list('ab'), name="cat",
-                                    categories=list('abc'),
-                                    ordered=True)
+    exp_index = CategoricalIndex(list('ab'), name="cat",
+                                 categories=list('abc'),
+                                 ordered=True)
     expected = DataFrame({"ints": [1.5, 1.5], "val": [20., 30]},
                          index=exp_index)
     if not observed:
-        index = pd.CategoricalIndex(list('abc'), name="cat",
-                                    categories=list('abc'),
-                                    ordered=True)
+        index = CategoricalIndex(list('abc'), name="cat",
+                                 categories=list('abc'),
+                                 ordered=True)
         expected = expected.reindex(index)
 
     tm.assert_frame_equal(result, expected)
@@ -324,9 +325,9 @@ def test_observed(observed):
     result = groups_double_key.agg('mean')
     expected = DataFrame(
         {"val": [10, 30, 20, 40],
-         "cat": pd.Categorical(['a', 'a', 'b', 'b'],
-                               categories=['a', 'b', 'c'],
-                               ordered=True),
+         "cat": Categorical(['a', 'a', 'b', 'b'],
+                            categories=['a', 'b', 'c'],
+                            ordered=True),
          "ints": [1, 2, 1, 2]}).set_index(["cat", "ints"])
     if not observed:
         expected = cartesian_product_for_groupers(
@@ -347,7 +348,7 @@ def test_observed(observed):
     # with as_index
     d = {'foo': [10, 8, 4, 8, 4, 1, 1], 'bar': [10, 20, 30, 40, 50, 60, 70],
          'baz': ['d', 'c', 'e', 'a', 'a', 'd', 'c']}
-    df = pd.DataFrame(d)
+    df = DataFrame(d)
     cat = pd.cut(df['foo'], np.linspace(0, 10, 3))
     df['range'] = cat
     groups = df.groupby(['range', 'baz'], as_index=False, observed=observed)
@@ -360,7 +361,7 @@ def test_observed(observed):
 
 def test_observed_codes_remap(observed):
     d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
-    df = pd.DataFrame(d)
+    df = DataFrame(d)
     values = pd.cut(df['C1'], [1, 2, 3, 6])
     values.name = "cat"
     groups_double_key = df.groupby([values, 'C2'], observed=observed)
@@ -401,8 +402,8 @@ def test_observed_groups(observed):
     # gh-20583
     # test that we have the appropriate groups
 
-    cat = pd.Categorical(['a', 'c', 'a'], categories=['a', 'b', 'c'])
-    df = pd.DataFrame({'cat': cat, 'vals': [1, 2, 3]})
+    cat = Categorical(['a', 'c', 'a'], categories=['a', 'b', 'c'])
+    df = DataFrame({'cat': cat, 'vals': [1, 2, 3]})
     g = df.groupby('cat', observed=observed)
 
     result = g.groups
@@ -419,9 +420,9 @@ def test_observed_groups(observed):
 
 def test_observed_groups_with_nan(observed):
     # GH 24740
-    df = pd.DataFrame({'cat': pd.Categorical(['a', np.nan, 'a'],
-                       categories=['a', 'b', 'd']),
-                       'vals': [1, 2, 3]})
+    df = DataFrame({'cat': Categorical(['a', np.nan, 'a'],
+                    categories=['a', 'b', 'd']),
+                    'vals': [1, 2, 3]})
     g = df.groupby('cat', observed=observed)
     result = g.groups
     if observed:
@@ -435,16 +436,16 @@ def test_observed_groups_with_nan(observed):
 
 def test_dataframe_categorical_with_nan(observed):
     # GH 21151
-    s1 = pd.Categorical([np.nan, 'a', np.nan, 'a'],
-                        categories=['a', 'b', 'c'])
-    s2 = pd.Series([1, 2, 3, 4])
-    df = pd.DataFrame({'s1': s1, 's2': s2})
+    s1 = Categorical([np.nan, 'a', np.nan, 'a'],
+                     categories=['a', 'b', 'c'])
+    s2 = Series([1, 2, 3, 4])
+    df = DataFrame({'s1': s1, 's2': s2})
     result = df.groupby('s1', observed=observed).first().reset_index()
     if observed:
-        expected = DataFrame({'s1': pd.Categorical(['a'],
+        expected = DataFrame({'s1': Categorical(['a'],
                               categories=['a', 'b', 'c']), 's2': [2]})
     else:
-        expected = DataFrame({'s1': pd.Categorical(['a', 'b', 'c'],
+        expected = DataFrame({'s1': Categorical(['a', 'b', 'c'],
                               categories=['a', 'b', 'c']),
                               's2': [2, np.nan, np.nan]})
     tm.assert_frame_equal(result, expected)
@@ -459,11 +460,11 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
 
     # Build a dataframe with cat having one unobserved category ('missing'),
     # and a Series with identical values
-    label = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'],
-                           categories=['a', 'b', 'missing', 'd'],
-                           ordered=ordered)
-    val = pd.Series(['d', 'a', 'b', 'a', 'd', 'b'])
-    df = pd.DataFrame({'label': label, 'val': val})
+    label = Categorical(['d', 'a', 'b', 'a', 'd', 'b'],
+                        categories=['a', 'b', 'missing', 'd'],
+                        ordered=ordered)
+    val = Series(['d', 'a', 'b', 'a', 'd', 'b'])
+    df = DataFrame({'label': label, 'val': val})
 
     # aggregate on the Categorical
     result = (df.groupby('label', observed=observed, sort=sort)['val']
@@ -471,8 +472,8 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
 
     # If ordering works, we expect index labels equal to aggregation results,
     # except for 'observed=False': label 'missing' has aggregation None
-    label = pd.Series(result.index.array, dtype='object')
-    aggr = pd.Series(result.array)
+    label = Series(result.index.array, dtype='object')
+    aggr = Series(result.array)
     if not observed:
         aggr[aggr.isna()] = 'missing'
     if not all(label == aggr):
@@ -555,9 +556,9 @@ def test_categorical_index():
 
 def test_describe_categorical_columns():
     # GH 11558
-    cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
-                               categories=['foo', 'bar', 'baz', 'qux'],
-                               ordered=True)
+    cats = CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
+                            categories=['foo', 'bar', 'baz', 'qux'],
+                            ordered=True)
     df = DataFrame(np.random.randn(20, 4), columns=cats)
     result = df.groupby([1, 2, 3, 4] * 5).describe()
 
@@ -567,22 +568,22 @@ def test_describe_categorical_columns():
 
 def test_unstack_categorical():
     # GH11558 (example is taken from the original issue)
-    df = pd.DataFrame({'a': range(10),
-                       'medium': ['A', 'B'] * 5,
-                       'artist': list('XYXXY') * 2})
+    df = DataFrame({'a': range(10),
+                    'medium': ['A', 'B'] * 5,
+                    'artist': list('XYXXY') * 2})
     df['medium'] = df['medium'].astype('category')
 
     gcat = df.groupby(
         ['artist', 'medium'], observed=False)['a'].count().unstack()
     result = gcat.describe()
 
-    exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
-                                      name='medium')
+    exp_columns = CategoricalIndex(['A', 'B'], ordered=False,
+                                   name='medium')
     tm.assert_index_equal(result.columns, exp_columns)
     tm.assert_categorical_equal(result.columns.values, exp_columns.values)
 
     result = gcat['A'] + gcat['B']
-    expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
+    expected = Series([6, 4], index=Index(['X', 'Y'], name='artist'))
     tm.assert_series_equal(result, expected)
 
 
@@ -644,22 +645,22 @@ def test_preserve_categories():
     categories = list('abc')
 
     # ordered=True
-    df = DataFrame({'A': pd.Categorical(list('ba'),
-                                        categories=categories,
-                                        ordered=True)})
-    index = pd.CategoricalIndex(categories, categories, ordered=True)
+    df = DataFrame({'A': Categorical(list('ba'),
+                                     categories=categories,
+                                     ordered=True)})
+    index = CategoricalIndex(categories, categories, ordered=True)
     tm.assert_index_equal(
         df.groupby('A', sort=True, observed=False).first().index, index)
     tm.assert_index_equal(
         df.groupby('A', sort=False, observed=False).first().index, index)
 
     # ordered=False
-    df = DataFrame({'A': pd.Categorical(list('ba'),
-                                        categories=categories,
-                                        ordered=False)})
-    sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
-    nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
-                                       ordered=False)
+    df = DataFrame({'A': Categorical(list('ba'),
+                                     categories=categories,
+                                     ordered=False)})
+    sort_index = CategoricalIndex(categories, categories, ordered=False)
+    nosort_index = CategoricalIndex(list('bac'), list('bac'),
+                                    ordered=False)
     tm.assert_index_equal(
         df.groupby('A', sort=True, observed=False).first().index,
         sort_index)
@@ -857,94 +858,94 @@ def test_sort_datetimelike():
 
 def test_empty_sum():
     # https://github.com/pandas-dev/pandas/issues/18678
-    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                           categories=['a', 'b', 'c']),
-                       'B': [1, 2, 1]})
-    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+    df = DataFrame({"A": Categorical(['a', 'a', 'b'],
+                                     categories=['a', 'b', 'c']),
+                    'B': [1, 2, 1]})
+    expected_idx = CategoricalIndex(['a', 'b', 'c'], name='A')
 
     # 0 by default
     result = df.groupby("A", observed=False).B.sum()
-    expected = pd.Series([3, 1, 0], expected_idx, name='B')
+    expected = Series([3, 1, 0], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count=0
     result = df.groupby("A", observed=False).B.sum(min_count=0)
-    expected = pd.Series([3, 1, 0], expected_idx, name='B')
+    expected = Series([3, 1, 0], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count=1
     result = df.groupby("A", observed=False).B.sum(min_count=1)
-    expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
+    expected = Series([3, 1, np.nan], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count>1
     result = df.groupby("A", observed=False).B.sum(min_count=2)
-    expected = pd.Series([3, np.nan, np.nan], expected_idx, name='B')
+    expected = Series([3, np.nan, np.nan], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
 
 def test_empty_prod():
     # https://github.com/pandas-dev/pandas/issues/18678
-    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                           categories=['a', 'b', 'c']),
-                       'B': [1, 2, 1]})
+    df = DataFrame({"A": Categorical(['a', 'a', 'b'],
+                                     categories=['a', 'b', 'c']),
+                    'B': [1, 2, 1]})
 
-    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+    expected_idx = CategoricalIndex(['a', 'b', 'c'], name='A')
 
     # 1 by default
     result = df.groupby("A", observed=False).B.prod()
-    expected = pd.Series([2, 1, 1], expected_idx, name='B')
+    expected = Series([2, 1, 1], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count=0
     result = df.groupby("A", observed=False).B.prod(min_count=0)
-    expected = pd.Series([2, 1, 1], expected_idx, name='B')
+    expected = Series([2, 1, 1], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count=1
     result = df.groupby("A", observed=False).B.prod(min_count=1)
-    expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
+    expected = Series([2, 1, np.nan], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
 
 def test_groupby_multiindex_categorical_datetime():
     # https://github.com/pandas-dev/pandas/issues/21390
 
-    df = pd.DataFrame({
-        'key1': pd.Categorical(list('abcbabcba')),
-        'key2': pd.Categorical(
+    df = DataFrame({
+        'key1': Categorical(list('abcbabcba')),
+        'key2': Categorical(
             list(pd.date_range('2018-06-01 00', freq='1T', periods=3)) * 3),
         'values': np.arange(9),
     })
     result = df.groupby(['key1', 'key2']).mean()
 
-    idx = pd.MultiIndex.from_product(
-        [pd.Categorical(['a', 'b', 'c']),
-         pd.Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
+    idx = MultiIndex.from_product(
+        [Categorical(['a', 'b', 'c']),
+         Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
         names=['key1', 'key2'])
-    expected = pd.DataFrame(
+    expected = DataFrame(
         {'values': [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx)
     assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize("as_index, expected", [
-    (True, pd.Series(
-        index=pd.MultiIndex.from_arrays(
-            [pd.Series([1, 1, 2], dtype='category'),
-                [1, 2, 2]], names=['a', 'b']
+    (True, Series(
+        index=MultiIndex.from_arrays(
+            [Series([1, 1, 2], dtype='category'),
+             [1, 2, 2]], names=['a', 'b']
         ),
         data=[1, 2, 3], name='x'
     )),
-    (False, pd.DataFrame({
-        'a': pd.Series([1, 1, 2], dtype='category'),
+    (False, DataFrame({
+        'a': Series([1, 1, 2], dtype='category'),
         'b': [1, 2, 2],
         'x': [1, 2, 3]
     }))
 ])
 def test_groupby_agg_observed_true_single_column(as_index, expected):
     # GH-23970
-    df = pd.DataFrame({
-        'a': pd.Series([1, 1, 2], dtype='category'),
+    df = DataFrame({
+        'a': Series([1, 1, 2], dtype='category'),
         'b': [1, 2, 2],
         'x': [1, 2, 3]
     })
@@ -957,9 +958,92 @@ def test_groupby_agg_observed_true_single_column(as_index, expected):
 
 @pytest.mark.parametrize('fill_value', [None, np.nan, pd.NaT])
 def test_shift(fill_value):
-    ct = pd.Categorical(['a', 'b', 'c', 'd'],
-                        categories=['a', 'b', 'c', 'd'], ordered=False)
-    expected = pd.Categorical([None, 'a', 'b', 'c'],
-                              categories=['a', 'b', 'c', 'd'], ordered=False)
+    ct = Categorical(['a', 'b', 'c', 'd'],
+                     categories=['a', 'b', 'c', 'd'], ordered=False)
+    expected = Categorical([None, 'a', 'b', 'c'],
+                           categories=['a', 'b', 'c', 'd'], ordered=False)
     res = ct.shift(1, fill_value=fill_value)
     assert_equal(res, expected)
+
+
+@pytest.fixture
+def df_cat(df):
+    """
+    DataFrame with multiple categorical columns and a column of integers.
+    Shortened so as not to contain all possible combinations of categories.
+    Useful for testing `observed` kwarg functionality on GroupBy objects.
+
+    Parameters
+    ----------
+    df: DataFrame
+        Non-categorical, longer DataFrame from another fixture, used to derive
+        this one
+
+    Returns
+    -------
+    df_cat: DataFrame
+    """
+    df_cat = df.copy()[:4]  # leave out some groups
+    df_cat['A'] = df_cat['A'].astype('category')
+    df_cat['B'] = df_cat['B'].astype('category')
+    df_cat['C'] = Series([1, 2, 3, 4])
+    df_cat = df_cat.drop(['D'], axis=1)
+    return df_cat
+
+
+@pytest.mark.parametrize('operation, kwargs', [
+    ('agg', dict(dtype='category')),
+    ('apply', dict())])
+def test_seriesgroupby_observed_true(df_cat, operation, kwargs):
+    # GH 24880
+    index = MultiIndex.from_frame(
+        DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
+                   'B': ['one', 'two', 'one', 'three']
+                   }, **kwargs))
+    expected = Series(data=[1, 3, 2, 4], index=index, name='C')
+    grouped = df_cat.groupby(['A', 'B'], observed=True)['C']
+    result = getattr(grouped, operation)(sum)
+    assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('operation', ['agg', 'apply'])
+@pytest.mark.parametrize('observed', [False, None])
+def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
+    # GH 24880
+    index, _ = MultiIndex.from_product(
+        [CategoricalIndex(['bar', 'foo'], ordered=False),
+         CategoricalIndex(['one', 'three', 'two'], ordered=False)],
+        names=['A', 'B']).sortlevel()
+
+    expected = Series(data=[2, 4, np.nan, 1, np.nan, 3],
+                      index=index, name='C')
+    grouped = df_cat.groupby(['A', 'B'], observed=observed)['C']
+    result = getattr(grouped, operation)(sum)
+    assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("observed, index, data", [
+    (True, MultiIndex.from_tuples(
+        [('foo', 'one', 'min'), ('foo', 'one', 'max'),
+         ('foo', 'two', 'min'), ('foo', 'two', 'max'),
+         ('bar', 'one', 'min'), ('bar', 'one', 'max'),
+         ('bar', 'three', 'min'), ('bar', 'three', 'max')],
+        names=['A', 'B', None]), [1, 1, 3, 3, 2, 2, 4, 4]),
+    (False, MultiIndex.from_product(
+        [CategoricalIndex(['bar', 'foo'], ordered=False),
+         CategoricalIndex(['one', 'three', 'two'], ordered=False),
+         Index(['min', 'max'])],
+        names=['A', 'B', None]),
+     [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3]),
+    (None, MultiIndex.from_product(
+        [CategoricalIndex(['bar', 'foo'], ordered=False),
+         CategoricalIndex(['one', 'three', 'two'], ordered=False),
+         Index(['min', 'max'])],
+        names=['A', 'B', None]),
+     [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3])])
+def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
+    # GH 24880
+    expected = Series(data=data, index=index, name='C')
+    result = df_cat.groupby(['A', 'B'], observed=observed)['C'].apply(
+        lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
+    assert_series_equal(result, expected)

From 8154efb0c1a64295cf54e00025b4ab09bcd02752 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Wed, 29 May 2019 18:49:16 -0700
Subject: [PATCH 07/51] Remove Unnecessary Subclasses from test_excel (#26553)

---
 pandas/io/excel/_base.py      |  2 +-
 pandas/tests/io/test_excel.py | 42 ++++++++++++++++-------------------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 3af6be7a371e7..24412b26b021b 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -591,7 +591,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
     def __new__(cls, path, engine=None, **kwargs):
         # only switch class if generic(ExcelWriter)
 
-        if issubclass(cls, ExcelWriter):
+        if cls is ExcelWriter:
             if engine is None or (isinstance(engine, str) and
                                   engine == 'auto'):
                 if isinstance(path, str):
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 6db3d1d4ab34d..1421fc94b67f4 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -1995,11 +1995,10 @@ def test_path_local_path(self, merge_cells, engine, ext):
 
 
 @td.skip_if_no('openpyxl')
-@pytest.mark.parametrize("merge_cells,ext,engine", [
-    (None, '.xlsx', 'openpyxl')])
-class TestOpenpyxlTests(_WriterBase):
+@pytest.mark.parametrize("ext", ['.xlsx'])
+class TestOpenpyxlTests:
 
-    def test_to_excel_styleconverter(self, merge_cells, ext, engine):
+    def test_to_excel_styleconverter(self, ext):
         from openpyxl import styles
 
         hstyle = {
@@ -2053,7 +2052,7 @@ def test_to_excel_styleconverter(self, merge_cells, ext, engine):
         assert kw['number_format'] == number_format
         assert kw['protection'] == protection
 
-    def test_write_cells_merge_styled(self, merge_cells, ext, engine):
+    def test_write_cells_merge_styled(self, ext):
         from pandas.io.formats.excel import ExcelCell
 
         sheet_name = 'merge_styled'
@@ -2087,7 +2086,7 @@ def test_write_cells_merge_styled(self, merge_cells, ext, engine):
 
     @pytest.mark.parametrize("mode,expected", [
         ('w', ['baz']), ('a', ['foo', 'bar', 'baz'])])
-    def test_write_append_mode(self, merge_cells, ext, engine, mode, expected):
+    def test_write_append_mode(self, ext, mode, expected):
         import openpyxl
         df = DataFrame([1], columns=['baz'])
 
@@ -2099,7 +2098,7 @@ def test_write_append_mode(self, merge_cells, ext, engine, mode, expected):
             wb.worksheets[1]['A1'].value = 'bar'
             wb.save(f)
 
-            writer = ExcelWriter(f, engine=engine, mode=mode)
+            writer = ExcelWriter(f, engine='openpyxl', mode=mode)
             df.to_excel(writer, sheet_name='baz', index=False)
             writer.save()
 
@@ -2112,12 +2111,11 @@ def test_write_append_mode(self, merge_cells, ext, engine, mode, expected):
 
 
 @td.skip_if_no('xlwt')
-@pytest.mark.parametrize("merge_cells,ext,engine", [
-    (None, '.xls', 'xlwt')])
-class TestXlwtTests(_WriterBase):
+@pytest.mark.parametrize("ext,", ['.xls'])
+class TestXlwtTests:
 
     def test_excel_raise_error_on_multiindex_columns_and_no_index(
-            self, merge_cells, ext, engine):
+            self, ext):
         # MultiIndex as columns is not yet implemented 9794
         cols = MultiIndex.from_tuples([('site', ''),
                                        ('2014', 'height'),
@@ -2127,8 +2125,7 @@ def test_excel_raise_error_on_multiindex_columns_and_no_index(
             with ensure_clean(ext) as path:
                 df.to_excel(path, index=False)
 
-    def test_excel_multiindex_columns_and_index_true(self, merge_cells, ext,
-                                                     engine):
+    def test_excel_multiindex_columns_and_index_true(self, ext):
         cols = MultiIndex.from_tuples([('site', ''),
                                        ('2014', 'height'),
                                        ('2014', 'weight')])
@@ -2136,7 +2133,7 @@ def test_excel_multiindex_columns_and_index_true(self, merge_cells, ext,
         with ensure_clean(ext) as path:
             df.to_excel(path, index=True)
 
-    def test_excel_multiindex_index(self, merge_cells, ext, engine):
+    def test_excel_multiindex_index(self, ext):
         # MultiIndex as index works so assert no error #9794
         cols = MultiIndex.from_tuples([('site', ''),
                                        ('2014', 'height'),
@@ -2145,7 +2142,7 @@ def test_excel_multiindex_index(self, merge_cells, ext, engine):
         with ensure_clean(ext) as path:
             df.to_excel(path, index=False)
 
-    def test_to_excel_styleconverter(self, merge_cells, ext, engine):
+    def test_to_excel_styleconverter(self, ext):
         import xlwt
 
         hstyle = {"font": {"bold": True},
@@ -2164,21 +2161,20 @@ def test_to_excel_styleconverter(self, merge_cells, ext, engine):
         assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz
         assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert
 
-    def test_write_append_mode_raises(self, merge_cells, ext, engine):
+    def test_write_append_mode_raises(self, ext):
         msg = "Append mode is not supported with xlwt!"
 
         with ensure_clean(ext) as f:
             with pytest.raises(ValueError, match=msg):
-                ExcelWriter(f, engine=engine, mode='a')
+                ExcelWriter(f, engine='xlwt', mode='a')
 
 
 @td.skip_if_no('xlsxwriter')
-@pytest.mark.parametrize("merge_cells,ext,engine", [
-    (None, '.xlsx', 'xlsxwriter')])
-class TestXlsxWriterTests(_WriterBase):
+@pytest.mark.parametrize("ext", ['.xlsx'])
+class TestXlsxWriterTests:
 
     @td.skip_if_no('openpyxl')
-    def test_column_format(self, merge_cells, ext, engine):
+    def test_column_format(self, ext):
         # Test that column formats are applied to cells. Test for issue #9167.
         # Applicable to xlsxwriter only.
         with warnings.catch_warnings():
@@ -2222,12 +2218,12 @@ def test_column_format(self, merge_cells, ext, engine):
 
             assert read_num_format == num_format
 
-    def test_write_append_mode_raises(self, merge_cells, ext, engine):
+    def test_write_append_mode_raises(self, ext):
         msg = "Append mode is not supported with xlsxwriter!"
 
         with ensure_clean(ext) as f:
             with pytest.raises(ValueError, match=msg):
-                ExcelWriter(f, engine=engine, mode='a')
+                ExcelWriter(f, engine='xlsxwriter', mode='a')
 
 
 class TestExcelWriterEngineTests:

From a60d1bd45a99519fad5024068db956e0aa1cc6a1 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Thu, 30 May 2019 14:15:18 +0100
Subject: [PATCH 08/51] DEPR: remove Panel-specific parts of core.indexing
 (#25567)

---
 pandas/_libs/indexing.pyx              |   4 +
 pandas/core/indexing.py                |  88 ++-----------------
 pandas/tests/indexing/test_indexing.py | 113 +++++++++++++++++++++++++
 3 files changed, 123 insertions(+), 82 deletions(-)

diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx
index 6e62978c8477f..308e914b7b5b7 100644
--- a/pandas/_libs/indexing.pyx
+++ b/pandas/_libs/indexing.pyx
@@ -17,4 +17,8 @@ cdef class _NDFrameIndexerBase:
         ndim = self._ndim
         if ndim is None:
             ndim = self._ndim = self.obj.ndim
+            if ndim > 2:
+                msg = ("NDFrameIndexer does not support NDFrame objects with"
+                       " ndim > 2")
+                raise ValueError(msg)
         return ndim
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 93e56834b62f6..86158fa9ee529 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -11,7 +11,7 @@
 from pandas.core.dtypes.common import (
     ensure_platform_int, is_float, is_integer, is_integer_dtype, is_iterator,
     is_list_like, is_numeric_dtype, is_scalar, is_sequence, is_sparse)
-from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
+from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 from pandas.core.dtypes.missing import _infer_fill_value, isna
 
 import pandas.core.common as com
@@ -450,10 +450,6 @@ def _setitem_with_indexer(self, indexer, value):
                     self.obj._maybe_update_cacher(clear=True)
                     return self.obj
 
-                # set using setitem (Panel and > dims)
-                elif self.ndim >= 3:
-                    return self.obj.__setitem__(indexer, value)
-
         # set
         item_labels = self.obj._get_axis(info_axis)
 
@@ -642,9 +638,6 @@ def can_do_equal_len():
             elif isinstance(value, ABCDataFrame):
                 value = self._align_frame(indexer, value)
 
-            if isinstance(value, ABCPanel):
-                value = self._align_panel(indexer, value)
-
             # check for chained assignment
             self.obj._check_is_chained_assignment_possible()
 
@@ -690,7 +683,6 @@ def ravel(i):
             sum_aligners = sum(aligners)
             single_aligner = sum_aligners == 1
             is_frame = self.obj.ndim == 2
-            is_panel = self.obj.ndim >= 3
             obj = self.obj
 
             # are we a single alignable value on a non-primary
@@ -702,11 +694,6 @@ def ravel(i):
             if is_frame:
                 single_aligner = single_aligner and aligners[0]
 
-            # panel
-            elif is_panel:
-                single_aligner = (single_aligner and
-                                  (aligners[1] or aligners[2]))
-
             # we have a frame, with multiple indexers on both axes; and a
             # series, so need to broadcast (see GH5206)
             if (sum_aligners == self.ndim and
@@ -738,7 +725,7 @@ def ravel(i):
                     return ser.reindex(new_ix)._values
 
                 # 2 dims
-                elif single_aligner and is_frame:
+                elif single_aligner:
 
                     # reindex along index
                     ax = self.obj.axes[1]
@@ -746,30 +733,6 @@ def ravel(i):
                         return ser._values.copy()
                     return ser.reindex(ax)._values
 
-                # >2 dims
-                elif single_aligner:
-
-                    broadcast = []
-                    for n, labels in enumerate(self.obj._get_plane_axes(i)):
-
-                        # reindex along the matching dimensions
-                        if len(labels & ser.index):
-                            ser = ser.reindex(labels)
-                        else:
-                            broadcast.append((n, len(labels)))
-
-                    # broadcast along other dims
-                    ser = ser._values.copy()
-                    for (axis, l) in broadcast:
-                        shape = [-1] * (len(broadcast) + 1)
-                        shape[axis] = l
-                        ser = np.tile(ser, l).reshape(shape)
-
-                    if self.obj.ndim == 3:
-                        ser = ser.T
-
-                    return ser
-
         elif is_scalar(indexer):
             ax = self.obj._get_axis(1)
 
@@ -782,7 +745,6 @@ def ravel(i):
 
     def _align_frame(self, indexer, df):
         is_frame = self.obj.ndim == 2
-        is_panel = self.obj.ndim >= 3
 
         if isinstance(indexer, tuple):
 
@@ -802,21 +764,6 @@ def _align_frame(self, indexer, df):
                 else:
                     sindexers.append(i)
 
-            # panel
-            if is_panel:
-
-                # need to conform to the convention
-                # as we are not selecting on the items axis
-                # and we have a single indexer
-                # GH 7763
-                if len(sindexers) == 1 and sindexers[0] != 0:
-                    df = df.T
-
-                if idx is None:
-                    idx = df.index
-                if cols is None:
-                    cols = df.columns
-
             if idx is not None and cols is not None:
 
                 if df.index.equals(idx) and df.columns.equals(cols):
@@ -843,24 +790,8 @@ def _align_frame(self, indexer, df):
                 val = df.reindex(index=ax)._values
             return val
 
-        elif is_scalar(indexer) and is_panel:
-            idx = self.obj.axes[1]
-            cols = self.obj.axes[2]
-
-            # by definition we are indexing on the 0th axis
-            # a passed in dataframe which is actually a transpose
-            # of what is needed
-            if idx.equals(df.index) and cols.equals(df.columns):
-                return df.copy()._values
-
-            return df.reindex(idx, columns=cols)._values
-
         raise ValueError('Incompatible indexer with DataFrame')
 
-    def _align_panel(self, indexer, df):
-        raise NotImplementedError("cannot set using an indexer with a Panel "
-                                  "yet!")
-
     def _getitem_tuple(self, tup):
         try:
             return self._getitem_lowerdim(tup)
@@ -1059,13 +990,6 @@ def _getitem_nested_tuple(self, tup):
             # has the dim of the obj changed?
             # GH 7199
             if obj.ndim < current_ndim:
-
-                # GH 7516
-                # if had a 3 dim and are going to a 2d
-                # axes are reversed on a DataFrame
-                if i >= 1 and current_ndim == 3 and obj.ndim == 2:
-                    obj = obj.T
-
                 axis -= 1
 
         return obj
@@ -1562,8 +1486,8 @@ class _LocIndexer(_LocationIndexer):
 
     - A boolean array of the same length as the axis being sliced,
       e.g. ``[True, False, True]``.
-    - A ``callable`` function with one argument (the calling Series, DataFrame
-      or Panel) and that returns valid output for indexing (one of the above)
+    - A ``callable`` function with one argument (the calling Series or
+      DataFrame) and that returns valid output for indexing (one of the above)
 
     See more at :ref:`Selection by Label <indexing.label>`
 
@@ -1931,8 +1855,8 @@ class _iLocIndexer(_LocationIndexer):
     - A list or array of integers, e.g. ``[4, 3, 0]``.
     - A slice object with ints, e.g. ``1:7``.
     - A boolean array.
-    - A ``callable`` function with one argument (the calling Series, DataFrame
-      or Panel) and that returns valid output for indexing (one of the above).
+    - A ``callable`` function with one argument (the calling Series or
+      DataFrame) and that returns valid output for indexing (one of the above).
       This is useful in method chains, when you don't have a reference to the
       calling object, but would like to base your selection on some value.
 
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index 22f6855717e80..a0e3df182b129 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -11,11 +11,14 @@
 
 import pandas as pd
 from pandas import DataFrame, Index, NaT, Series
+from pandas.core.generic import NDFrame
 from pandas.core.indexing import (
     _maybe_numeric_slice, _non_reducing_slice, validate_indices)
 from pandas.tests.indexing.common import Base, _mklbl
 import pandas.util.testing as tm
 
+ignore_ix = pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")
+
 # ------------------------------------------------------------------------
 # Indexing test cases
 
@@ -53,6 +56,93 @@ def test_setitem_ndarray_1d(self):
         with pytest.raises(ValueError):
             df[2:5] = np.arange(1, 4) * 1j
 
+    @pytest.mark.parametrize('index', tm.all_index_generator(5),
+                             ids=lambda x: type(x).__name__)
+    @pytest.mark.parametrize('obj', [
+        lambda i: Series(np.arange(len(i)), index=i),
+        lambda i: DataFrame(
+            np.random.randn(len(i), len(i)), index=i, columns=i)
+    ], ids=['Series', 'DataFrame'])
+    @pytest.mark.parametrize('idxr, idxr_id', [
+        (lambda x: x, 'getitem'),
+        (lambda x: x.loc, 'loc'),
+        (lambda x: x.iloc, 'iloc'),
+        pytest.param(lambda x: x.ix, 'ix', marks=ignore_ix)
+    ])
+    def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
+        # GH 25567
+        obj = obj(index)
+        idxr = idxr(obj)
+        nd3 = np.random.randint(5, size=(2, 2, 2))
+
+        msg = (r"Buffer has wrong number of dimensions \(expected 1,"
+               r" got 3\)|"
+               "The truth value of an array with more than one element is"
+               " ambiguous|"
+               "Cannot index with multidimensional key|"
+               r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|"
+               "unhashable type: 'numpy.ndarray'"  # TypeError
+               )
+
+        if (isinstance(obj, Series) and idxr_id == 'getitem'
+                and index.inferred_type in [
+                    'string', 'datetime64', 'period', 'timedelta64',
+                    'boolean', 'categorical']):
+            idxr[nd3]
+        else:
+            if (isinstance(obj, DataFrame) and idxr_id == 'getitem'
+                    and index.inferred_type == 'boolean'):
+                error = TypeError
+            else:
+                error = ValueError
+
+            with pytest.raises(error, match=msg):
+                idxr[nd3]
+
+    @pytest.mark.parametrize('index', tm.all_index_generator(5),
+                             ids=lambda x: type(x).__name__)
+    @pytest.mark.parametrize('obj', [
+        lambda i: Series(np.arange(len(i)), index=i),
+        lambda i: DataFrame(
+            np.random.randn(len(i), len(i)), index=i, columns=i)
+    ], ids=['Series', 'DataFrame'])
+    @pytest.mark.parametrize('idxr, idxr_id', [
+        (lambda x: x, 'setitem'),
+        (lambda x: x.loc, 'loc'),
+        (lambda x: x.iloc, 'iloc'),
+        pytest.param(lambda x: x.ix, 'ix', marks=ignore_ix)
+    ])
+    def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
+        # GH 25567
+        obj = obj(index)
+        idxr = idxr(obj)
+        nd3 = np.random.randint(5, size=(2, 2, 2))
+
+        msg = (r"Buffer has wrong number of dimensions \(expected 1,"
+               r" got 3\)|"
+               "The truth value of an array with more than one element is"
+               " ambiguous|"
+               "Only 1-dimensional input arrays are supported|"
+               "'pandas._libs.interval.IntervalTree' object has no attribute"
+               " 'set_value'|"  # AttributeError
+               "unhashable type: 'numpy.ndarray'|"  # TypeError
+               r"^\[\[\["  # pandas.core.indexing.IndexingError
+               )
+
+        if ((idxr_id == 'iloc')
+            or ((isinstance(obj, Series) and idxr_id == 'setitem'
+                 and index.inferred_type in [
+                'floating', 'string', 'datetime64', 'period', 'timedelta64',
+                'boolean', 'categorical']))
+                or (idxr_id == 'ix' and index.inferred_type in [
+                'string', 'datetime64', 'period', 'boolean'])):
+            idxr[nd3] = 0
+        else:
+            with pytest.raises(
+                    (ValueError, AttributeError, TypeError,
+                     pd.core.indexing.IndexingError), match=msg):
+                idxr[nd3] = 0
+
     def test_inf_upcast(self):
         # GH 16957
         # We should be able to use np.inf as a key
@@ -1015,3 +1105,26 @@ def test_extension_array_cross_section_converts():
 
     result = df.iloc[0]
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('idxr, error, error_message', [
+    (lambda x: x,
+     AttributeError,
+     "'numpy.ndarray' object has no attribute 'get'"),
+    (lambda x: x.loc,
+     AttributeError,
+     "type object 'NDFrame' has no attribute '_AXIS_ALIASES'"),
+    (lambda x: x.iloc,
+     AttributeError,
+     "type object 'NDFrame' has no attribute '_AXIS_ALIASES'"),
+    pytest.param(
+        lambda x: x.ix,
+        ValueError,
+        "NDFrameIndexer does not support NDFrame objects with ndim > 2",
+        marks=ignore_ix)
+])
+def test_ndframe_indexing_raises(idxr, error, error_message):
+    # GH 25567
+    frame = NDFrame(np.random.randint(5, size=(2, 2, 2)))
+    with pytest.raises(error, match=error_message):
+        idxr(frame)[0]

From 7c8041b9b6dd44a7388bc8518dc0cd2f7303c2d2 Mon Sep 17 00:00:00 2001
From: Pauli Virtanen <pav@iki.fi>
Date: Thu, 30 May 2019 19:16:57 +0000
Subject: [PATCH 09/51] PERF/CI: fix benchmark import error + run asv check on
 all builds (#26575)

* PERF: fix asv import error

* CI: run asv check on all builds

* PERF: since TimeGrouper was removed, remove benchmarks concerning it

* PERF: fix benchmark frame_methods.Iteration.mem_itertuples_to_list

The runtime of the benchmark increased in asv 0.4 (which has upgraded
asv.extern.asizeof), so bump the timeout upward.
---
 asv_bench/benchmarks/frame_methods.py | 2 ++
 asv_bench/benchmarks/groupby.py       | 7 +------
 asv_bench/benchmarks/io/parsers.py    | 8 ++++++--
 azure-pipelines.yml                   | 3 ++-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index 0c1d861ce0839..5b76eeba115a4 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -96,6 +96,8 @@ def time_dict_rename_both_axes(self):
 
 
 class Iteration:
+    # mem_itertuples_* benchmarks are slow
+    timeout = 120
 
     def setup(self):
         N = 1000
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 4dfce079dd09c..3097ada6d2022 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -1,12 +1,11 @@
 from functools import partial
 from itertools import product
 from string import ascii_letters
-import warnings
 
 import numpy as np
 
 from pandas import (
-    Categorical, DataFrame, MultiIndex, Series, TimeGrouper, Timestamp,
+    Categorical, DataFrame, MultiIndex, Series, Timestamp,
     date_range, period_range)
 import pandas.util.testing as tm
 
@@ -301,10 +300,6 @@ def setup(self):
     def time_multi_size(self):
         self.df.groupby(['key1', 'key2']).size()
 
-    def time_dt_timegrouper_size(self):
-        with warnings.catch_warnings(record=True):
-            self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
-
     def time_category_size(self):
         self.draws.groupby(self.cats).size()
 
diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py
index 493955d394443..edba0358c821a 100644
--- a/asv_bench/benchmarks/io/parsers.py
+++ b/asv_bench/benchmarks/io/parsers.py
@@ -1,7 +1,11 @@
 import numpy as np
 
-from pandas._libs.tslibs.parsing import (
-    _concat_date_cols, _does_string_look_like_datetime)
+try:
+    from pandas._libs.tslibs.parsing import (
+        _concat_date_cols, _does_string_look_like_datetime)
+except ImportError:
+    # Avoid whole benchmark suite import failure on asv (currently 0.4)
+    pass
 
 
 class DoesStringLookLikeDatetime(object):
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index eee38dadfab90..17eaee5458af8 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -97,10 +97,11 @@ jobs:
   - script: |
       export PATH=$HOME/miniconda3/bin:$PATH
       source activate pandas-dev
+      cd asv_bench
+      asv check -E existing
       git remote add upstream https://github.com/pandas-dev/pandas.git
       git fetch upstream
       if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
-          cd asv_bench
           asv machine --yes
           ASV_OUTPUT="$(asv dev)"
           if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then

From 4c54dd298692783f417cbaa57d5fc1c0dc1f7c72 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Thu, 30 May 2019 20:51:03 +0100
Subject: [PATCH 10/51] TST: update tests\plotting\test_frame.py for mpl 3.1.0
 (#26577)

---
 pandas/plotting/_compat.py          | 1 +
 pandas/tests/plotting/test_frame.py | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_compat.py
index 4077bef8f36f5..36bbe0f4ec174 100644
--- a/pandas/plotting/_compat.py
+++ b/pandas/plotting/_compat.py
@@ -17,3 +17,4 @@ def inner():
 
 _mpl_ge_2_2_3 = _mpl_version('2.2.3', operator.ge)
 _mpl_ge_3_0_0 = _mpl_version('3.0.0', operator.ge)
+_mpl_ge_3_1_0 = _mpl_version('3.1.0', operator.ge)
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index aede84ac831a6..f42f86540e46b 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -23,6 +23,7 @@
 
 from pandas.io.formats.printing import pprint_thing
 import pandas.plotting as plotting
+from pandas.plotting._compat import _mpl_ge_3_1_0
 
 
 @td.skip_if_no_mpl
@@ -68,7 +69,11 @@ def test_plot(self):
         self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
 
         df = DataFrame({'x': [1, 2], 'y': [3, 4]})
-        with pytest.raises(AttributeError, match='Unknown property blarg'):
+        if _mpl_ge_3_1_0():
+            msg = "'Line2D' object has no property 'blarg'"
+        else:
+            msg = "Unknown property blarg"
+        with pytest.raises(AttributeError, match=msg):
             df.plot.line(blarg=True)
 
         df = DataFrame(np.random.rand(10, 3),

From 0041935572774c6599dd9b48e9acc7cceb559004 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 30 May 2019 22:40:36 -0500
Subject: [PATCH 11/51] Revert test_constructors xfail (#26586)

Reverts https://github.com/pandas-dev/pandas/pull/26548

xref https://github.com/numpy/numpy/pull/13663
Closes https://github.com/pandas-dev/pandas/issues/26546
---
 pandas/tests/frame/test_constructors.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index f371f4e93a29e..68017786eb6a6 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -15,7 +15,7 @@
 import pandas as pd
 from pandas import (
     Categorical, DataFrame, Index, MultiIndex, RangeIndex, Series, Timedelta,
-    Timestamp, compat, date_range, isna)
+    Timestamp, date_range, isna)
 from pandas.tests.frame.common import TestData
 import pandas.util.testing as tm
 
@@ -113,7 +113,6 @@ def test_constructor_dtype_list_data(self):
         assert df.loc[1, 0] is None
         assert df.loc[0, 1] == '2'
 
-    @pytest.mark.xfail(compat.numpy._is_numpy_dev, reason="GH-26546")
     def test_constructor_list_frames(self):
         # see gh-3243
         result = DataFrame([DataFrame()])

From 7f318658b92155678b31780722277d1f8c8df569 Mon Sep 17 00:00:00 2001
From: lrjball <50599110+lrjball@users.noreply.github.com>
Date: Fri, 31 May 2019 13:41:10 +0100
Subject: [PATCH 12/51] DOC: Fixed redirects in various parts of the
 documentation (#26497)

---
 pandas/core/arrays/categorical.py |  3 ++-
 pandas/core/arrays/interval.py    |  2 +-
 pandas/core/dtypes/concat.py      |  2 +-
 pandas/core/generic.py            | 10 +++++-----
 pandas/core/groupby/groupby.py    |  2 +-
 pandas/core/groupby/grouper.py    |  2 +-
 pandas/core/indexes/datetimes.py  |  8 ++++----
 pandas/core/indexes/interval.py   |  2 +-
 pandas/core/indexes/multi.py      |  3 ++-
 pandas/core/indexes/period.py     |  2 +-
 pandas/core/indexes/timedeltas.py |  4 ++--
 pandas/core/indexing.py           |  4 ++--
 pandas/core/reshape/concat.py     |  2 +-
 pandas/core/tools/datetimes.py    |  2 +-
 pandas/core/window.py             |  6 +++---
 pandas/io/json/json.py            |  4 ++--
 pandas/io/parsers.py              |  4 ++--
 pandas/io/pytables.py             |  6 +++---
 18 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 0fa705369908a..89b86c66d7b05 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -272,7 +272,8 @@ class Categorical(ExtensionArray, PandasObject):
     Notes
     -----
     See the `user guide
-    <http://pandas.pydata.org/pandas-docs/stable/categorical.html>`_ for more.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html>`_
+    for more.
 
     Examples
     --------
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 94b9dc8ebab55..4f628eff43167 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -95,7 +95,7 @@
 Notes
 -----
 See the `user guide
-<http://pandas.pydata.org/pandas-docs/stable/advanced.html#intervalindex>`_
+<http://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`_
 for more.
 
 %(examples)s\
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index f8488b7a153e3..b22ed45642cf6 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -244,7 +244,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
     -----
 
     To learn more about categories, see `link
-    <http://pandas.pydata.org/pandas-docs/stable/categorical.html#unioning>`__
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html#unioning>`__
 
     Examples
     --------
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 87db069d94893..0596d0ab844ec 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3328,8 +3328,8 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
                      "A value is trying to be set on a copy of a slice from a "
                      "DataFrame\n\n"
                      "See the caveats in the documentation: "
-                     "http://pandas.pydata.org/pandas-docs/stable/"
-                     "indexing.html#indexing-view-versus-copy"
+                     "http://pandas.pydata.org/pandas-docs/stable/user_guide/"
+                     "indexing.html#returning-a-view-versus-a-copy"
                      )
 
             else:
@@ -3338,8 +3338,8 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
                      "DataFrame.\n"
                      "Try using .loc[row_indexer,col_indexer] = value "
                      "instead\n\nSee the caveats in the documentation: "
-                     "http://pandas.pydata.org/pandas-docs/stable/"
-                     "indexing.html#indexing-view-versus-copy"
+                     "http://pandas.pydata.org/pandas-docs/stable/user_guide/"
+                     "indexing.html#returning-a-view-versus-a-copy"
                      )
 
             if value == 'raise':
@@ -7762,7 +7762,7 @@ def asfreq(self, freq, method=None, how=None, normalize=False,
         Notes
         -----
         To learn more about the frequency strings, please see `this link
-        <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+        <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
         Examples
         --------
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 91bb71a1a8af7..2b190c53da53d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -219,7 +219,7 @@ class providing the base-class of operations.
 Notes
 -----
 See more `here
-<http://pandas.pydata.org/pandas-docs/stable/groupby.html#piping-function-calls>`_
+<http://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_
 
 Examples
 --------
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 04d407ebc670d..febfdc7bdf908 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -49,7 +49,7 @@ class Grouper:
         This will groupby the specified frequency if the target selection
         (via key or level) is a datetime-like object. For full specification
         of available frequencies, please see `here
-        <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`_.
+        <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
     axis : number/name of the axis, defaults to 0
     sort : boolean, default to False
         whether to sort the resulting labels
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index e68431b79dcd3..1bf3cb86811cb 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -215,7 +215,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin):
     Notes
     -----
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Creating a DatetimeIndex based on `start`, `periods`, and `end` has
     been deprecated in favor of :func:`date_range`.
@@ -1377,7 +1377,7 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
     ``start`` and ``end`` (closed on both sides).
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
@@ -1533,7 +1533,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
     desired.
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
@@ -1605,7 +1605,7 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
     must be specified.
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Returns
     -------
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 53e1a36c48994..41cf23c5542a9 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -1215,7 +1215,7 @@ def interval_range(start=None, end=None, periods=None, freq=None,
     ``start`` and ``end``, inclusively.
 
     To learn more about datetime-like frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index f1553d9db835f..ec2cc70d1a352 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -182,7 +182,8 @@ class MultiIndex(Index):
     Notes
     -----
     See the `user guide
-    <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`_
+    for more.
 
     Examples
     --------
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 64272431cf703..b20b0c6f853d9 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -939,7 +939,7 @@ def period_range(start=None, end=None, periods=None, freq=None, name=None):
     must be specified.
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 6ae17e62b49c6..0574a4b41c920 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -141,7 +141,7 @@ class TimedeltaIndex(DatetimeIndexOpsMixin, dtl.TimelikeOps, Int64Index,
     Notes
     -----
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Creating a TimedeltaIndex based on `start`, `periods`, and `end` has
     been deprecated in favor of :func:`timedelta_range`.
@@ -730,7 +730,7 @@ def timedelta_range(start=None, end=None, periods=None, freq=None,
     ``start`` and ``end`` (closed on both sides).
 
     To learn more about the frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     Examples
     --------
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 86158fa9ee529..7f4827be6dff7 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1190,7 +1190,7 @@ def _validate_read_indexer(self, key, indexer, axis, raise_missing=False):
             KeyError in the future, you can use .reindex() as an alternative.
 
             See the documentation here:
-            https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""")  # noqa
+            https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike""")  # noqa
 
             if not (ax.is_categorical() or ax.is_interval()):
                 warnings.warn(_missing_key_warning,
@@ -1339,7 +1339,7 @@ class _IXIndexer(_NDFrameIndexer):
         .iloc for positional indexing
 
         See the documentation here:
-        http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""")  # noqa
+        http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated""")  # noqa
 
     def __init__(self, name, obj):
         warnings.warn(self._ix_deprecation_warning,
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index ee3ed3899a55f..4523a6ad48f19 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -100,7 +100,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
 
     A walkthrough of how this method fits in with other tools for combining
     pandas objects can be found `here
-    <http://pandas.pydata.org/pandas-docs/stable/merging.html>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
 
     Examples
     --------
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 817d539d4ad6f..0756bdb3777ec 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -533,7 +533,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
     dtype: datetime64[ns]
 
     If a date does not meet the `timestamp limitations
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
     #timeseries-timestamp-limits>`_, passing errors='ignore'
     will return the original input instead of raising any exception.
 
diff --git a/pandas/core/window.py b/pandas/core/window.py
index d51e12035c829..f332075380c79 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -462,7 +462,7 @@ class Window(_Window):
         See the notes below for further information.
     on : str, optional
         For a DataFrame, column on which to calculate
-        the rolling window, rather than the index
+        the rolling window, rather than the index.
     axis : int or str, default 0
     closed : str, default None
         Make the interval closed on the 'right', 'left', 'both' or
@@ -488,7 +488,7 @@ class Window(_Window):
     changed to the center of the window by setting ``center=True``.
 
     To learn more about the offsets & frequency strings, please see `this link
-    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
 
     The recognized win_types are:
 
@@ -2188,7 +2188,7 @@ class EWM(_Rolling):
     (if adjust is True), and 1-alpha and alpha (if adjust is False).
 
     More details can be found at
-    http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
+    http://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows
 
     Examples
     --------
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index ee9d9e000d7e3..20bed9bff7383 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -330,8 +330,8 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
 
     chunksize : integer, default None
         Return JsonReader object for iteration.
-        See the `line-delimted json docs
-        <http://pandas.pydata.org/pandas-docs/stable/io.html#io-jsonl>`_
+        See the `line-delimited json docs
+        <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#line-delimited-json>`_
         for more information on ``chunksize``.
         This can only be passed if `lines=True`.
         If this is None, the file will be read into memory all at once.
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index c65c11e840c27..bcbdd80865360 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -58,7 +58,7 @@
 into chunks.
 
 Additional help can be found in the online docs for
-`IO Tools <http://pandas.pydata.org/pandas-docs/stable/io.html>`_.
+`IO Tools <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
 
 Parameters
 ----------
@@ -753,7 +753,7 @@ def read_fwf(filepath_or_buffer: FilePathOrBuffer,
     into chunks.
 
     Additional help can be found in the `online docs for IO Tools
-    <http://pandas.pydata.org/pandas-docs/stable/io.html>`_.
+    <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
 
     Parameters
     ----------
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 11f705e88179d..53ef2395a302a 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -867,8 +867,8 @@ def put(self, key, value, format=None, append=False, **kwargs):
             This will force Table format, append the input data to the
             existing.
         data_columns : list of columns to create as data columns, or True to
-            use all columns. See
-            `here <http://pandas.pydata.org/pandas-docs/stable/io.html#query-via-data-columns>`__ # noqa
+            use all columns. See `here
+            <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
         encoding : default None, provide an encoding for strings
         dropna   : boolean, default False, do not write an ALL nan row to
             the store settable by the option 'io.hdf.dropna_table'
@@ -949,7 +949,7 @@ def append(self, key, value, format=None, append=True, columns=None,
             List of columns to create as indexed data columns for on-disk
             queries, or True to use all columns. By default only the axes
             of the object are indexed. See `here
-            <http://pandas.pydata.org/pandas-docs/stable/io.html#query-via-data-columns>`__.
+            <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
         min_itemsize : dict of columns that specify minimum string sizes
         nan_rep      : string to use as string nan represenation
         chunksize    : size to chunk the writing

From c6a7cc1e08f9203caf57599244cd1c51f6347875 Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Sat, 1 Jun 2019 02:17:53 +0200
Subject: [PATCH 13/51] TST: Datetime conftest.py improvements (#26596)

xref gh-23537
---
 pandas/conftest.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 3c411f8ba3e31..8f71028f51ab4 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -376,10 +376,16 @@ def unique_nulls_fixture(request):
              FixedOffset(0), FixedOffset(-300), timezone.utc,
              timezone(timedelta(hours=1)),
              timezone(timedelta(hours=-1), name='foo')]
+TIMEZONE_IDS = ['None', 'UTC', 'US/Eastern', 'Asia/Tokyp',
+                'dateutil/US/Pacific', 'dateutil/Asia/Singapore',
+                'dateutil.tz.tzutz()', 'dateutil.tz.tzlocal()',
+                'pytz.FixedOffset(300)', 'pytz.FixedOffset(0)',
+                'pytz.FixedOffset(-300)', 'datetime.timezone.utc',
+                'datetime.timezone.+1', 'datetime.timezone.-1.named']
 
 
-@td.parametrize_fixture_doc(str(TIMEZONES))
-@pytest.fixture(params=TIMEZONES)
+@td.parametrize_fixture_doc(str(TIMEZONE_IDS))
+@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS)
 def tz_naive_fixture(request):
     """
     Fixture for trying timezones including default (None): {0}
@@ -387,8 +393,8 @@ def tz_naive_fixture(request):
     return request.param
 
 
-@td.parametrize_fixture_doc(str(TIMEZONES[1:]))
-@pytest.fixture(params=TIMEZONES[1:])
+@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:]))
+@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:])
 def tz_aware_fixture(request):
     """
     Fixture for trying explicit timezones: {0}
@@ -398,6 +404,8 @@ def tz_aware_fixture(request):
 
 # ----------------------------------------------------------------
 # Dtypes
+# ----------------------------------------------------------------
+
 UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
 UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"]
 SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
@@ -409,8 +417,8 @@ def tz_aware_fixture(request):
 COMPLEX_DTYPES = [complex, "complex64", "complex128"]
 STRING_DTYPES = [str, 'str', 'U']
 
-DATETIME_DTYPES = ['datetime64[ns]', 'M8[ns]']
-TIMEDELTA_DTYPES = ['timedelta64[ns]', 'm8[ns]']
+DATETIME64_DTYPES = ['datetime64[ns]', 'M8[ns]']
+TIMEDELTA64_DTYPES = ['timedelta64[ns]', 'm8[ns]']
 
 BOOL_DTYPES = [bool, 'bool']
 BYTES_DTYPES = [bytes, 'bytes']
@@ -418,7 +426,7 @@ def tz_aware_fixture(request):
 
 ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
 ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES +
-                    DATETIME_DTYPES + TIMEDELTA_DTYPES + BOOL_DTYPES +
+                    DATETIME64_DTYPES + TIMEDELTA64_DTYPES + BOOL_DTYPES +
                     OBJECT_DTYPES + BYTES_DTYPES)
 
 

From 7f3423c5a22122a01e69a1fc5090bb86deb845fb Mon Sep 17 00:00:00 2001
From: Alexander Nordin <alexander.f.nordin+git@gmail.com>
Date: Sat, 1 Jun 2019 10:04:14 -0400
Subject: [PATCH 14/51] ERR: better error message on too large excel sheet
 (#26080)

---
 doc/source/whatsnew/v0.25.0.rst |  1 +
 pandas/io/formats/excel.py      | 10 ++++++++++
 pandas/tests/io/test_excel.py   | 18 ++++++++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 89a9da4a73b35..ae5b6aafe4c7d 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -533,6 +533,7 @@ I/O
 - Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`)
 - Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`)
 - Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`)
+- :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index fd6e3304ec4ef..4db00e34b39e2 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -341,6 +341,9 @@ class ExcelFormatter:
         This is only called for body cells.
     """
 
+    max_rows = 2**20
+    max_cols = 2**14
+
     def __init__(self, df, na_rep='', float_format=None, cols=None,
                  header=True, index=True, index_label=None, merge_cells=False,
                  inf_rep='inf', style_converter=None):
@@ -648,6 +651,13 @@ def write(self, writer, sheet_name='Sheet1', startrow=0,
         from pandas.io.excel import ExcelWriter
         from pandas.io.common import _stringify_path
 
+        num_rows, num_cols = self.df.shape
+        if num_rows > self.max_rows or num_cols > self.max_cols:
+            raise ValueError("This sheet is too large! Your sheet size is: " +
+                             "{}, {} ".format(num_rows, num_cols) +
+                             "Max sheet size is: {}, {}".
+                             format(self.max_rows, self.max_cols))
+
         if isinstance(writer, ExcelWriter):
             need_save = False
         else:
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 1421fc94b67f4..7693caf3b31d2 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -1118,6 +1118,24 @@ class and any subclasses, on account of the `autouse=True`
 class TestExcelWriter(_WriterBase):
     # Base class for test cases to run with different Excel writers.
 
+    def test_excel_sheet_size(self):
+
+        # GH 26080
+        breaking_row_count = 2**20 + 1
+        breaking_col_count = 2**14 + 1
+        # purposely using two arrays to prevent memory issues while testing
+        row_arr = np.zeros(shape=(breaking_row_count, 1))
+        col_arr = np.zeros(shape=(1, breaking_col_count))
+        row_df = pd.DataFrame(row_arr)
+        col_df = pd.DataFrame(col_arr)
+
+        msg = "sheet is too large"
+        with pytest.raises(ValueError, match=msg):
+            row_df.to_excel(self.path)
+
+        with pytest.raises(ValueError, match=msg):
+            col_df.to_excel(self.path)
+
     def test_excel_sheet_by_name_raise(self, *_):
         import xlrd
 

From 3fbe6270980e1234d94b65103d0085f73c7c7f10 Mon Sep 17 00:00:00 2001
From: Pauli Virtanen <pav@iki.fi>
Date: Sat, 1 Jun 2019 14:08:20 +0000
Subject: [PATCH 15/51] CLN: remove sample_time attributes from benchmarks
 (#26598)

---
 asv_bench/benchmarks/index_object.py | 1 -
 asv_bench/benchmarks/rolling.py      | 6 ------
 2 files changed, 7 deletions(-)

diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
index 0fdf46e7c64de..896a20bae2069 100644
--- a/asv_bench/benchmarks/index_object.py
+++ b/asv_bench/benchmarks/index_object.py
@@ -52,7 +52,6 @@ def time_is_dates_only(self):
 
 class Ops:
 
-    sample_time = 0.2
     params = ['float', 'int']
     param_names = ['dtype']
 
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index 2532d326dff4b..033b466c8b9be 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -4,7 +4,6 @@
 
 class Methods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],
@@ -23,7 +22,6 @@ def time_rolling(self, constructor, window, dtype, method):
 
 class ExpandingMethods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               ['int', 'float'],
               ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
@@ -41,7 +39,6 @@ def time_expanding(self, constructor, dtype, method):
 
 class EWMMethods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],
@@ -58,7 +55,6 @@ def time_ewm(self, constructor, window, dtype, method):
 
 
 class VariableWindowMethods(Methods):
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               ['50s', '1h', '1d'],
               ['int', 'float'],
@@ -75,7 +71,6 @@ def setup(self, constructor, window, dtype, method):
 
 class Pairwise:
 
-    sample_time = 0.2
     params = ([10, 1000, None],
               ['corr', 'cov'],
               [True, False])
@@ -95,7 +90,6 @@ def time_pairwise(self, window, method, pairwise):
 
 
 class Quantile:
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],

From a498a2e609291d4e3007f83d6e82a0b7283ecfb1 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sat, 1 Jun 2019 15:09:27 +0100
Subject: [PATCH 16/51] TST: add concrete examples of dataframe fixtures to
 docstrings (#26593)

---
 pandas/tests/frame/conftest.py | 169 +++++++++++++++++++++++++++++++++
 1 file changed, 169 insertions(+)

diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
index 27c0e070c10c2..c451cd58f1497 100644
--- a/pandas/tests/frame/conftest.py
+++ b/pandas/tests/frame/conftest.py
@@ -11,6 +11,25 @@ def float_frame():
     Fixture for DataFrame of floats with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
+
+                       A         B         C         D
+    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
+    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
+    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
+    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
+    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
+    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
+    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
+    ...              ...       ...       ...       ...
+    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
+    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
+    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
+    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
+    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
+    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
+    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426
+
+    [30 rows x 4 columns]
     """
     return DataFrame(tm.getSeriesData())
 
@@ -21,6 +40,25 @@ def float_frame_with_na():
     Fixture for DataFrame of floats with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']; some entries are missing
+
+                       A         B         C         D
+    ABwBzA0ljw -1.128865 -0.897161  0.046603  0.274997
+    DJiRzmbyQF  0.728869  0.233502  0.722431 -0.890872
+    neMgPD5UBF  0.486072 -1.027393 -0.031553  1.449522
+    0yWA4n8VeX -1.937191 -1.142531  0.805215 -0.462018
+    3slYUbbqU1  0.153260  1.164691  1.489795 -0.545826
+    soujjZ0A08       NaN       NaN       NaN       NaN
+    7W6NLGsjB9       NaN       NaN       NaN       NaN
+    ...              ...       ...       ...       ...
+    uhfeaNkCR1 -0.231210 -0.340472  0.244717 -0.901590
+    n6p7GYuBIV -0.419052  1.922721 -0.125361 -0.727717
+    ZhzAeY6p1y  1.234374 -1.425359 -0.827038 -0.633189
+    uWdPsORyUh  0.046738 -0.980445 -1.102965  0.605503
+    3DJA6aN590 -0.091018 -1.684734 -1.100900  0.215947
+    2GBPAzdbMk -2.883405 -1.021071  1.209877  1.633083
+    sHadBoyVHw -2.223032 -0.326384  0.258931  0.245517
+
+    [30 rows x 4 columns]
     """
     df = DataFrame(tm.getSeriesData())
     # set some NAs
@@ -35,6 +73,25 @@ def bool_frame_with_na():
     Fixture for DataFrame of booleans with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']; some entries are missing
+
+                    A      B      C      D
+    zBZxY2IDGd  False  False  False  False
+    IhBWBMWllt  False   True   True   True
+    ctjdvZSR6R   True  False   True   True
+    AVTujptmxb  False   True  False   True
+    G9lrImrSWq  False  False  False   True
+    sFFwdIUfz2    NaN    NaN    NaN    NaN
+    s15ptEJnRb    NaN    NaN    NaN    NaN
+    ...           ...    ...    ...    ...
+    UW41KkDyZ4   True   True  False  False
+    l9l6XkOdqV   True  False  False  False
+    X2MeZfzDYA  False   True  False  False
+    xWkIKU7vfX  False   True  False   True
+    QOhL6VmpGU  False  False  False   True
+    22PwkRJdat  False   True  False  False
+    kfboQ3VeIK   True  False   True  False
+
+    [30 rows x 4 columns]
     """
     df = DataFrame(tm.getSeriesData()) > 0
     df = df.astype(object)
@@ -50,6 +107,25 @@ def int_frame():
     Fixture for DataFrame of ints with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D']
+
+                A  B  C  D
+    vpBeWjM651  1  0  1  0
+    5JyxmrP1En -1  0  0  0
+    qEDaoD49U2 -1  1  0  0
+    m66TkTfsFe  0  0  0  0
+    EHPaNzEUFm -1  0 -1  0
+    fpRJCevQhi  2  0  0  0
+    OlQvnmfi3Q  0  0 -2  0
+    ...        .. .. .. ..
+    uB1FPlz4uP  0  0  0  1
+    EcSe6yNzCU  0  0 -1  0
+    L50VudaiI8 -1  1 -2  0
+    y3bpw4nwIp  0 -1  0  0
+    H0RdLLwrCT  1  1  0  0
+    rY82K0vMwm  0  0  0  0
+    1OPIUjnkjk  2  0  0  0
+
+    [30 rows x 4 columns]
     """
     df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
     # force these all to int64 to avoid platform testing issues
@@ -62,6 +138,25 @@ def datetime_frame():
     Fixture for DataFrame of floats with DatetimeIndex
 
     Columns are ['A', 'B', 'C', 'D']
+
+                       A         B         C         D
+    2000-01-03 -1.122153  0.468535  0.122226  1.693711
+    2000-01-04  0.189378  0.486100  0.007864 -1.216052
+    2000-01-05  0.041401 -0.835752 -0.035279 -0.414357
+    2000-01-06  0.430050  0.894352  0.090719  0.036939
+    2000-01-07 -0.620982 -0.668211 -0.706153  1.466335
+    2000-01-10 -0.752633  0.328434 -0.815325  0.699674
+    2000-01-11 -2.236969  0.615737 -0.829076 -1.196106
+    ...              ...       ...       ...       ...
+    2000-02-03  1.642618 -0.579288  0.046005  1.385249
+    2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
+    2000-02-07 -2.656149 -0.601387  1.410148  0.444150
+    2000-02-08 -1.201881 -1.289040  0.772992 -1.445300
+    2000-02-09  1.377373  0.398619  1.008453 -0.928207
+    2000-02-10  0.473194 -0.636677  0.984058  0.511519
+    2000-02-11 -0.965556  0.408313 -1.312844 -0.381948
+
+    [30 rows x 4 columns]
     """
     return DataFrame(tm.getTimeSeriesData())
 
@@ -72,6 +167,25 @@ def float_string_frame():
     Fixture for DataFrame of floats and strings with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D', 'foo'].
+
+                       A         B         C         D  foo
+    w3orJvq07g -1.594062 -1.084273 -1.252457  0.356460  bar
+    PeukuVdmz2  0.109855 -0.955086 -0.809485  0.409747  bar
+    ahp2KvwiM8 -1.533729 -0.142519 -0.154666  1.302623  bar
+    3WSJ7BUCGd  2.484964  0.213829  0.034778 -2.327831  bar
+    khdAmufk0U -0.193480 -0.743518 -0.077987  0.153646  bar
+    LE2DZiFlrE -0.193566 -1.343194 -0.107321  0.959978  bar
+    HJXSJhVn7b  0.142590  1.257603 -0.659409 -0.223844  bar
+    ...              ...       ...       ...       ...  ...
+    9a1Vypttgw -1.316394  1.601354  0.173596  1.213196  bar
+    h5d1gVFbEy  0.609475  1.106738 -0.155271  0.294630  bar
+    mK9LsTQG92  1.303613  0.857040 -1.019153  0.369468  bar
+    oOLksd9gKH  0.558219 -0.134491 -0.289869 -0.951033  bar
+    9jgoOjKyHg  0.058270 -0.496110 -0.413212 -0.852659  bar
+    jZLDHclHAO  0.096298  1.267510  0.549206 -0.005235  bar
+    lR0nxDp1C2 -2.119350 -0.794384  0.544118  0.145849  bar
+
+    [30 rows x 5 columns]
     """
     df = DataFrame(tm.getSeriesData())
     df['foo'] = 'bar'
@@ -84,6 +198,25 @@ def mixed_float_frame():
     Fixture for DataFrame of different float types with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
+
+                       A         B         C         D
+    GI7bbDaEZe -0.237908 -0.246225 -0.468506  0.752993
+    KGp9mFepzA -1.140809 -0.644046 -1.225586  0.801588
+    VeVYLAb1l2 -1.154013 -1.677615  0.690430 -0.003731
+    kmPME4WKhO  0.979578  0.998274 -0.776367  0.897607
+    CPyopdXTiz  0.048119 -0.257174  0.836426  0.111266
+    0kJZQndAj0  0.274357 -0.281135 -0.344238  0.834541
+    tqdwQsaHG8 -0.979716 -0.519897  0.582031  0.144710
+    ...              ...       ...       ...       ...
+    7FhZTWILQj -2.906357  1.261039 -0.780273 -0.537237
+    4pUDPM4eGq -2.042512 -0.464382 -0.382080  1.132612
+    B8dUgUzwTi -1.506637 -0.364435  1.087891  0.297653
+    hErlVYjVv9  1.477453 -0.495515 -0.713867  1.438427
+    1BKN3o7YLs  0.127535 -0.349812 -0.881836  0.489827
+    9S4Ekn7zga  1.445518 -2.095149  0.031982  0.373204
+    xN1dNn6OV6  1.425017 -0.983995 -0.363281 -0.224502
+
+    [30 rows x 4 columns]
     """
     df = DataFrame(tm.getSeriesData())
     df.A = df.A.astype('float32')
@@ -99,6 +232,25 @@ def mixed_int_frame():
     Fixture for DataFrame of different int types with index of unique strings
 
     Columns are ['A', 'B', 'C', 'D'].
+
+                A  B    C    D
+    mUrCZ67juP  0  1    2    2
+    rw99ACYaKS  0  1    0    0
+    7QsEcpaaVU  0  1    1    1
+    xkrimI2pcE  0  1    0    0
+    dz01SuzoS8  0  1  255  255
+    ccQkqOHX75 -1  1    0    0
+    DN0iXaoDLd  0  1    0    0
+    ...        .. ..  ...  ...
+    Dfb141wAaQ  1  1  254  254
+    IPD8eQOVu5  0  1    0    0
+    CcaKulsCmv  0  1    0    0
+    rIBa8gu7E5  0  1    0    0
+    RP6peZmh5o  0  1    1    1
+    NMb9pipQWQ  0  1    0    0
+    PqgbJEzjib  0  1    3    3
+
+    [30 rows x 4 columns]
     """
     df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
     df.A = df.A.astype('int32')
@@ -114,6 +266,11 @@ def timezone_frame():
     Fixture for DataFrame of date_range Series with different time zones
 
     Columns are ['A', 'B', 'C']; some entries are missing
+
+               A                         B                         C
+    0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
+    1 2013-01-02                       NaT                       NaT
+    2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
     """
     df = DataFrame({'A': date_range('20130101', periods=3),
                     'B': date_range('20130101', periods=3,
@@ -131,6 +288,11 @@ def simple_frame():
     Fixture for simple 3x3 DataFrame
 
     Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
+
+       one  two  three
+    a  1.0  2.0    3.0
+    b  4.0  5.0    6.0
+    c  7.0  8.0    9.0
     """
     arr = np.array([[1., 2., 3.],
                     [4., 5., 6.],
@@ -147,6 +309,13 @@ def frame_of_index_cols():
 
     Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
     'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
+
+         A      B  C         D         E  (tuple, as, label)
+    0  foo    one  a  0.608477 -0.012500           -1.664297
+    1  foo    two  b -0.633460  0.249614           -0.364411
+    2  foo  three  c  0.615256  2.154968           -0.834666
+    3  bar    one  d  0.234246  1.085675            0.718445
+    4  bar    two  e  0.533841 -0.005702           -3.533912
     """
     df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
                     'B': ['one', 'two', 'three', 'one', 'two'],

From 3628e1897bfcc548e2fccb020cd436a037da4f31 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 1 Jun 2019 15:12:40 +0100
Subject: [PATCH 17/51] CI/DOC: Building documentation with azure (#26591)

---
 .travis.yml                |  4 +--
 azure-pipelines.yml        | 62 +++++++++++++++++++++++++++++++++++++-
 ci/deps/travis-36-doc.yaml | 46 ----------------------------
 3 files changed, 63 insertions(+), 49 deletions(-)
 delete mode 100644 ci/deps/travis-36-doc.yaml

diff --git a/.travis.yml b/.travis.yml
index ce8817133a477..90dd904e6cb1e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -51,14 +51,14 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
+        - JOB="3.6, doc" ENV_FILE="environment.yml" DOC=true
     allow_failures:
       - dist: trusty
         env:
           - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
       - dist: trusty
         env:
-          - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
+          - JOB="3.6, doc" ENV_FILE="environment.yml" DOC=true
 
 before_install:
   - echo "before_install"
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 17eaee5458af8..9f83917024049 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -15,7 +15,7 @@ jobs:
     name: Windows
     vmImage: vs2017-win2016
 
-- job: 'Checks_and_doc'
+- job: 'Checks'
   pool:
     vmImage: ubuntu-16.04
   timeoutInMinutes: 90
@@ -116,3 +116,63 @@ jobs:
       fi
     displayName: 'Running benchmarks'
     condition: true
+
+- job: 'Docs'
+  pool:
+    vmImage: ubuntu-16.04
+  timeoutInMinutes: 90
+  steps:
+  - script: |
+      echo '##vso[task.setvariable variable=CONDA_ENV]pandas-dev'
+      echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
+    displayName: 'Setting environment variables'
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      sudo apt-get install -y libc6-dev-i386
+      ci/setup_env.sh
+    displayName: 'Setup environment and build pandas'
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      doc/make.py
+    displayName: 'Build documentation'
+
+  - script: |
+      cd doc/build/html
+      git init
+      touch .nojekyll
+      git add --all .
+      git config user.email "pandas-dev@python.org"
+      git config user.name "pandas-docs-bot"
+      git commit -m "pandas documentation in master"
+    displayName: 'Create git repo for docs build'
+    condition : |
+      and(not(eq(variables['Build.Reason'], 'PullRequest')),
+          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
+
+  # This task to work requires next steps:
+  # 1. Got to "Library > Secure files" in the azure-pipelines dashboard: https://dev.azure.com/pandas-dev/pandas/_library?itemType=SecureFiles
+  # 2. Click on "+ Secure file"
+  # 3. Upload the private key (the name of the file must match with the specified in "sshKeySecureFile" input below, "pandas_docs_key")
+  # 4. Click on file name after it is created, tick the box "Authorize for use in all pipelines" and save
+  # 5. The public key specified in "sshPublicKey" is the pair of the uploaded private key, and needs to be specified as a deploy key of the repo where the docs will be pushed: https://github.com/pandas-dev/pandas-dev.github.io/settings/keys
+  - task: InstallSSHKey@0
+    inputs:
+      hostName: 'github.com'
+      sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHmz3l/EdqrgNxEUKkwDUuUcLv91unig03pYFGO/DMIgCmPdMG96zAgfnESd837Rm0wSSqylwSzkRJt5MV/TpFlcVifDLDQmUhqCeO8Z6dLl/oe35UKmyYICVwcvQTAaHNnYRpKC5IUlTh0JEtw9fGlnp1Ta7U1ENBLbKdpywczElhZu+hOQ892zqOj3CwA+U2329/d6cd7YnqIKoFN9DWT3kS5K6JE4IoBfQEVekIOs23bKjNLvPoOmi6CroAhu/K8j+NCWQjge5eJf2x/yTnIIP1PlEcXoHIr8io517posIx3TBup+CN8bNS1PpDW3jyD3ttl1uoBudjOQrobNnJeR6Rn67DRkG6IhSwr3BWj8alwUG5mTdZzwV5Pa9KZFdIiqX7NoDGg+itsR39QCn0thK8lGRNSR8KrWC1PSjecwelKBO7uQ7rnk/rkrZdBWR4oEA8YgNH8tirUw5WfOr5a0AIaJicKxGKNdMxZt+zmC+bS7F4YCOGIm9KHa43RrKhoGRhRf9fHHHKUPwFGqtWG4ykcUgoamDOURJyepesBAO3FiRE9rLU6ILbB3yEqqoekborHmAJD5vf7PWItW3Q/YQKuk3kkqRcKnexPyzyyq5lUgTi8CxxZdaASIOu294wjBhhdyHlXEkVTNJ9JKkj/obF+XiIIp0cBDsOXY9hDQ== pandas-dev@python.org'
+      sshKeySecureFile: 'pandas_docs_key'
+    displayName: 'Install GitHub ssh deployment key'
+    condition : |
+      and(not(eq(variables['Build.Reason'], 'PullRequest')),
+          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
+
+  - script: |
+      cd doc/build/html
+      git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git
+      git push origin master -f
+    displayName: 'Publish docs to GitHub pages'
+    condition : |
+      and(not(eq(variables['Build.Reason'], 'PullRequest')),
+          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
diff --git a/ci/deps/travis-36-doc.yaml b/ci/deps/travis-36-doc.yaml
deleted file mode 100644
index 9d6cbd82fdc05..0000000000000
--- a/ci/deps/travis-36-doc.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-name: pandas-dev
-channels:
-  - defaults
-  - conda-forge
-dependencies:
-  - beautifulsoup4
-  - bottleneck
-  - cython>=0.28.2
-  - fastparquet>=0.2.1
-  - gitpython
-  - html5lib
-  - hypothesis>=3.58.0
-  - ipykernel
-  - ipython
-  - ipywidgets
-  - lxml
-  - matplotlib
-  - nbconvert>=5.4.1
-  - nbformat
-  - nbsphinx
-  - notebook>=5.7.5
-  - numexpr
-  - numpy
-  - numpydoc
-  - openpyxl
-  - pandoc
-  - pyarrow
-  - pyqt
-  - pytables
-  - python-dateutil
-  - python-snappy
-  - python=3.6.*
-  - pytz
-  - scipy
-  - seaborn
-  - sphinx
-  - sqlalchemy
-  - statsmodels
-  - xarray
-  - xlrd
-  - xlsxwriter
-  - xlwt
-  # universal
-  - pytest>=4.0.2
-  - pytest-xdist
-  - isort

From 6904c230e29a40a110182fd42db8aaee2701c83b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sat, 1 Jun 2019 09:35:25 -0500
Subject: [PATCH 18/51] DOC: sparse doc fixups (#26571)

---
 doc/source/user_guide/sparse.rst | 2 +-
 doc/source/whatsnew/v0.16.0.rst  | 2 ++
 doc/source/whatsnew/v0.18.1.rst  | 2 ++
 doc/source/whatsnew/v0.19.0.rst  | 2 ++
 doc/source/whatsnew/v0.20.0.rst  | 1 +
 pandas/core/sparse/frame.py      | 2 +-
 pandas/core/sparse/series.py     | 2 +-
 7 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
index 8fed29d7a6316..09ed895a847ff 100644
--- a/doc/source/user_guide/sparse.rst
+++ b/doc/source/user_guide/sparse.rst
@@ -269,7 +269,7 @@ have no replacement.
 Interaction with scipy.sparse
 -----------------------------
 
-Use :meth:`DataFrame.sparse.from_coo` to create a ``DataFrame`` with sparse values from a sparse matrix.
+Use :meth:`DataFrame.sparse.from_spmatrix` to create a ``DataFrame`` with sparse values from a sparse matrix.
 
 .. versionadded:: 0.25.0
 
diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst
index 1e4ec682f0504..2cb09325c9466 100644
--- a/doc/source/whatsnew/v0.16.0.rst
+++ b/doc/source/whatsnew/v0.16.0.rst
@@ -92,6 +92,7 @@ Interaction with scipy.sparse
 Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here <sparse.scipysparse>`). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels:
 
 .. ipython:: python
+   :okwarning:
 
    s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan])
    s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0),
@@ -121,6 +122,7 @@ The from_coo method is a convenience method for creating a ``SparseSeries``
 from a ``scipy.sparse.coo_matrix``:
 
 .. ipython:: python
+   :okwarning:
 
    from scipy import sparse
    A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])),
diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst
index f099ccf284bc2..069395c2e0f36 100644
--- a/doc/source/whatsnew/v0.18.1.rst
+++ b/doc/source/whatsnew/v0.18.1.rst
@@ -394,6 +394,7 @@ used in the ``pandas`` implementation (:issue:`12644`, :issue:`12638`, :issue:`1
 An example of this signature augmentation is illustrated below:
 
 .. ipython:: python
+   :okwarning:
 
    sp = pd.SparseDataFrame([1, 2, 3])
    sp
@@ -409,6 +410,7 @@ Previous behaviour:
 New behaviour:
 
 .. ipython:: python
+   :okwarning:
 
    np.cumsum(sp, axis=0)
 
diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
index 29eeb415e2f6d..de29a1eb93709 100644
--- a/doc/source/whatsnew/v0.19.0.rst
+++ b/doc/source/whatsnew/v0.19.0.rst
@@ -1236,6 +1236,7 @@ Operators now preserve dtypes
 - Sparse data structure now can preserve ``dtype`` after arithmetic ops (:issue:`13848`)
 
 .. ipython:: python
+   :okwarning:
 
    s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64)
    s.dtype
@@ -1245,6 +1246,7 @@ Operators now preserve dtypes
 - Sparse data structure now support ``astype`` to convert internal ``dtype`` (:issue:`13900`)
 
 .. ipython:: python
+   :okwarning:
 
    s = pd.SparseSeries([1., 0., 2., 0.], fill_value=0)
    s
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index 741aa6ca143bb..6a88a5810eca4 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -339,6 +339,7 @@ See the :ref:`documentation <sparse.scipysparse>` for more information. (:issue:
 All sparse formats are supported, but matrices that are not in :mod:`COOrdinate <scipy.sparse>` format will be converted, copying data as needed.
 
 .. ipython:: python
+   :okwarning:
 
    from scipy.sparse import csr_matrix
    arr = np.random.random(size=(1000, 5))
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index fa3cd781eaf88..bf1cec7571f4d 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -42,7 +42,7 @@ class SparseDataFrame(DataFrame):
     DataFrame containing sparse floating point data in the form of SparseSeries
     objects
 
-    .. deprectaed:: 0.25.0
+    .. deprecated:: 0.25.0
 
        Use a DataFrame with sparse values instead.
 
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index e4f8579a398dd..3f95acdbfb42c 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -46,7 +46,7 @@
 class SparseSeries(Series):
     """Data structure for labeled, sparse floating point data
 
-    .. deprectaed:: 0.25.0
+    .. deprecated:: 0.25.0
 
        Use a Series with sparse values instead.
 

From 2630a0b83b61b783da336bcc3823e5d7bd302488 Mon Sep 17 00:00:00 2001
From: nathalier <nathalier@gmail.com>
Date: Sat, 1 Jun 2019 15:45:06 +0100
Subject: [PATCH 19/51] BUG: ignore errors for invalid dates in to_datetime()
 with errors=coerce (#25512) (#26561)

---
 doc/source/whatsnew/v0.25.0.rst              |  1 +
 pandas/core/tools/datetimes.py               |  6 +++---
 pandas/tests/indexes/datetimes/test_tools.py | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index ae5b6aafe4c7d..a62cac7a94bbd 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -427,6 +427,7 @@ Datetimelike
 - Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`)
 - Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
 - Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`)
+- Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`)
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 0756bdb3777ec..73119671550a5 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -775,21 +775,21 @@ def calc_with_mask(carg, mask):
     # try intlike / strings that are ints
     try:
         return calc(arg.astype(np.int64))
-    except ValueError:
+    except (ValueError, OverflowError):
         pass
 
     # a float with actual np.nan
     try:
         carg = arg.astype(np.float64)
         return calc_with_mask(carg, notna(carg))
-    except ValueError:
+    except (ValueError, OverflowError):
         pass
 
     # string with NaN-like
     try:
         mask = ~algorithms.isin(arg, list(tslib.nat_strings))
         return calc_with_mask(arg, mask)
-    except ValueError:
+    except (ValueError, OverflowError):
         pass
 
     return None
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index d62d8d1276fec..c507c31ee54dd 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -96,6 +96,25 @@ def test_to_datetime_format_YYYYMMDD(self, cache):
         result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
                                 cache=cache)
         expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]')
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("input_s, expected", [
+        # NaN before strings with invalid date values
+        [Series(['19801222', np.nan, '20010012', '10019999']),
+         Series([Timestamp('19801222'), np.nan, np.nan, np.nan])],
+        # NaN after strings with invalid date values
+        [Series(['19801222', '20010012', '10019999', np.nan]),
+         Series([Timestamp('19801222'), np.nan, np.nan, np.nan])],
+        # NaN before integers with invalid date values
+        [Series([20190813, np.nan, 20010012, 20019999]),
+         Series([Timestamp('20190813'), np.nan, np.nan, np.nan])],
+        # NaN after integers with invalid date values
+        [Series([20190813, 20010012, np.nan, 20019999]),
+         Series([Timestamp('20190813'), np.nan, np.nan, np.nan])]])
+    def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
+        # GH 25512
+        # format='%Y%m%d', errors='coerce'
+        result = pd.to_datetime(input_s, format='%Y%m%d', errors='coerce')
         assert_series_equal(result, expected)
 
     @pytest.mark.parametrize('cache', [True, False])

From 4ec92eb45ac5ae2f7bc97e53a36294dab548e0c4 Mon Sep 17 00:00:00 2001
From: Mak Sze Chun <makszechun@gmail.com>
Date: Sat, 1 Jun 2019 22:48:37 +0800
Subject: [PATCH 20/51] TST/CLN: Fixturize tests/frame/test_quantile.py
 (#26556)

---
 pandas/tests/frame/test_quantile.py | 56 +++++++++++++++++------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py
index a5771839e0997..9ccbd290923ba 100644
--- a/pandas/tests/frame/test_quantile.py
+++ b/pandas/tests/frame/test_quantile.py
@@ -3,24 +3,24 @@
 
 import pandas as pd
 from pandas import DataFrame, Series, Timestamp
-from pandas.tests.frame.common import TestData
 import pandas.util.testing as tm
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 
 
-class TestDataFrameQuantile(TestData):
+class TestDataFrameQuantile:
 
-    def test_quantile(self):
+    def test_quantile(self, datetime_frame):
         from numpy import percentile
 
-        q = self.tsframe.quantile(0.1, axis=0)
-        assert q['A'] == percentile(self.tsframe['A'], 10)
-        tm.assert_index_equal(q.index, self.tsframe.columns)
+        df = datetime_frame
+        q = df.quantile(0.1, axis=0)
+        assert q['A'] == percentile(df['A'], 10)
+        tm.assert_index_equal(q.index, df.columns)
 
-        q = self.tsframe.quantile(0.9, axis=1)
+        q = df.quantile(0.9, axis=1)
         assert (q['2000-01-17'] ==
-                percentile(self.tsframe.loc['2000-01-17'], 90))
-        tm.assert_index_equal(q.index, self.tsframe.index)
+                percentile(df.loc['2000-01-17'], 90))
+        tm.assert_index_equal(q.index, df.index)
 
         # test degenerate case
         q = DataFrame({'x': [], 'y': []}).quantile(0.1, axis=0)
@@ -99,18 +99,6 @@ def test_quantile_axis_parameter(self):
 
     def test_quantile_interpolation(self):
         # see gh-10174
-        from numpy import percentile
-
-        # interpolation = linear (default case)
-        q = self.tsframe.quantile(0.1, axis=0, interpolation='linear')
-        assert q['A'] == percentile(self.tsframe['A'], 10)
-        q = self.intframe.quantile(0.1)
-        assert q['A'] == percentile(self.intframe['A'], 10)
-
-        # test with and without interpolation keyword
-        q1 = self.intframe.quantile(0.1)
-        assert q1['A'] == np.percentile(self.intframe['A'], 10)
-        tm.assert_series_equal(q, q1)
 
         # interpolation method other than default linear
         df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
@@ -155,6 +143,28 @@ def test_quantile_interpolation(self):
                              index=[.25, .5], columns=['a', 'b', 'c'])
         assert_frame_equal(result, expected)
 
+    def test_quantile_interpolation_datetime(self, datetime_frame):
+        # see gh-10174
+
+        # interpolation = linear (default case)
+        df = datetime_frame
+        q = df.quantile(0.1, axis=0, interpolation='linear')
+        assert q['A'] == np.percentile(df['A'], 10)
+
+    def test_quantile_interpolation_int(self, int_frame):
+        # see gh-10174
+
+        df = int_frame
+        # interpolation = linear (default case)
+        q = df.quantile(0.1)
+        assert q['A'] == np.percentile(df['A'], 10)
+
+        # test with and without interpolation keyword
+        # TODO: q1 is not different from q
+        q1 = df.quantile(0.1)
+        assert q1['A'] == np.percentile(df['A'], 10)
+        tm.assert_series_equal(q, q1)
+
     def test_quantile_multi(self):
         df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
                        columns=['a', 'b', 'c'])
@@ -214,11 +224,11 @@ def test_quantile_datetime(self):
         # result = df[['a', 'c']].quantile(.5)
         # result = df[['a', 'c']].quantile([.5])
 
-    def test_quantile_invalid(self):
+    def test_quantile_invalid(self, datetime_frame):
         msg = 'percentiles should all be in the interval \\[0, 1\\]'
         for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
             with pytest.raises(ValueError, match=msg):
-                self.tsframe.quantile(invalid)
+                datetime_frame.quantile(invalid)
 
     def test_quantile_box(self):
         df = DataFrame({'A': [pd.Timestamp('2011-01-01'),

From 9ebbe1bc8f054fcb128181b95a7d22d33da78f36 Mon Sep 17 00:00:00 2001
From: Big Head <yanglinlee@gmail.com>
Date: Sat, 1 Jun 2019 10:51:27 -0400
Subject: [PATCH 21/51] BUG: fix categorical comparison with missing values
 (#26504 ) (#26514)

---
 doc/source/whatsnew/v0.25.0.rst               |  2 +-
 pandas/core/arrays/categorical.py             | 13 +++++---
 .../arrays/categorical/test_operators.py      | 32 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index a62cac7a94bbd..61182b9fa32f2 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -414,7 +414,7 @@ Categorical
 ^^^^^^^^^^^
 
 - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
--
+- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
 -
 
 Datetimelike
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 89b86c66d7b05..44bb44457bc25 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -89,18 +89,23 @@ def f(self, other):
             else:
                 other_codes = other._codes
 
-            na_mask = (self._codes == -1) | (other_codes == -1)
+            mask = (self._codes == -1) | (other_codes == -1)
             f = getattr(self._codes, op)
             ret = f(other_codes)
-            if na_mask.any():
+            if mask.any():
                 # In other series, the leads to False, so do that here too
-                ret[na_mask] = False
+                ret[mask] = False
             return ret
 
         if is_scalar(other):
             if other in self.categories:
                 i = self.categories.get_loc(other)
-                return getattr(self._codes, op)(i)
+                ret = getattr(self._codes, op)(i)
+
+                # check for NaN in self
+                mask = (self._codes == -1)
+                ret[mask] = False
+                return ret
             else:
                 if op == '__eq__':
                     return np.repeat(False, len(self))
diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py
index dc6e1a5bc36b3..a443408bf9479 100644
--- a/pandas/tests/arrays/categorical/test_operators.py
+++ b/pandas/tests/arrays/categorical/test_operators.py
@@ -1,4 +1,5 @@
 import operator
+import warnings
 
 import numpy as np
 import pytest
@@ -17,7 +18,6 @@ def test_categories_none_comparisons(self):
         tm.assert_categorical_equal(factor, self.factor)
 
     def test_comparisons(self):
-
         result = self.factor[self.factor == 'a']
         expected = self.factor[np.asarray(self.factor) == 'a']
         tm.assert_categorical_equal(result, expected)
@@ -186,6 +186,36 @@ def test_comparison_with_unknown_scalars(self):
         tm.assert_numpy_array_equal(cat != 4,
                                     np.array([True, True, True]))
 
+    def test_comparison_of_ordered_categorical_with_nan_to_scalar(
+            self, compare_operators_no_eq_ne):
+        # https://github.com/pandas-dev/pandas/issues/26504
+        # BUG: fix ordered categorical comparison with missing values (#26504 )
+        # and following comparisons with scalars in categories with missing
+        # values should be evaluated as False
+
+        cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
+        scalar = 2
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            expected = getattr(np.array(cat),
+                               compare_operators_no_eq_ne)(scalar)
+        actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
+        tm.assert_numpy_array_equal(actual, expected)
+
+    def test_comparison_of_ordered_categorical_with_nan_to_listlike(
+            self, compare_operators_no_eq_ne):
+        # https://github.com/pandas-dev/pandas/issues/26504
+        # and following comparisons of missing values in ordered Categorical
+        # with listlike should be evaluated as False
+
+        cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
+        other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
+        actual = getattr(cat, compare_operators_no_eq_ne)(other)
+        tm.assert_numpy_array_equal(actual, expected)
+
     @pytest.mark.parametrize('data,reverse,base', [
         (list("abc"), list("cba"), list("bbb")),
         ([1, 2, 3], [3, 2, 1], [2, 2, 2])]

From 2c6d005d073ad48a4a2795f1965e660df3fbfa8c Mon Sep 17 00:00:00 2001
From: enisnazif <enisnazif@gmail.com>
Date: Sat, 1 Jun 2019 15:52:35 +0100
Subject: [PATCH 22/51] Fix the output of df.describe on an empty categorical /
 object column  (#26474)

---
 doc/source/whatsnew/v0.25.0.rst      | 28 ++++++++++++++++++++++++++++
 pandas/core/arrays/categorical.py    |  2 +-
 pandas/core/generic.py               |  6 ++++++
 pandas/tests/frame/test_analytics.py | 11 +++++++++++
 4 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 61182b9fa32f2..ebca80025b9f7 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -253,6 +253,34 @@ are returned. (:issue:`21521`)
 
     df.groupby("a").ffill()
 
+``DataFrame`` describe on an empty categorical / object column will return top and freq
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When calling :meth:`DataFrame.describe` with an empty categorical / object
+column, the 'top' and 'freq' columns were previously omitted, which was inconsistent with
+the output for non-empty columns. Now the 'top' and 'freq' columns will always be included,
+with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({"empty_col": pd.Categorical([])})
+   df
+
+*Previous Behavior*:
+
+.. code-block:: python
+
+   In [3]: df.describe()
+   Out[3]:
+           empty_col
+   count           0
+   unique          0
+
+*New Behavior*:
+
+.. ipython:: python
+
+    df.describe()
 
 ``__str__`` methods now call ``__repr__`` rather than vica-versa
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 44bb44457bc25..49dd0041854bc 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1483,7 +1483,7 @@ def value_counts(self, dropna=True):
 
         if dropna or clean:
             obs = code if clean else code[mask]
-            count = bincount(obs, minlength=ncat or None)
+            count = bincount(obs, minlength=ncat or 0)
         else:
             count = bincount(np.where(mask, code, ncat))
             ix = np.append(ix, -1)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0596d0ab844ec..7ca2c52e18c41 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -9920,6 +9920,12 @@ def describe_categorical_1d(data):
                     names += ['top', 'freq']
                     result += [top, freq]
 
+            # If the DataFrame is empty, set 'top' and 'freq' to None
+            # to maintain output shape consistency
+            else:
+                names += ['top', 'freq']
+                result += [None, None]
+
             return pd.Series(result, index=names, name=data.name)
 
         def describe_1d(data):
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index effe7eb47323d..487ff7932ec5f 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -588,6 +588,16 @@ def test_describe_categorical(self):
         result = df3.describe()
         tm.assert_numpy_array_equal(result["cat"].values, result["s"].values)
 
+    def test_describe_empty_categorical_column(self):
+        # GH 26397
+        # Ensure the index of an an empty categoric DataFrame column
+        # also contains (count, unique, top, freq)
+        df = pd.DataFrame({"empty_col": Categorical([])})
+        result = df.describe()
+        expected = DataFrame({'empty_col': [0, 0, None, None]},
+                             index=['count', 'unique', 'top', 'freq'])
+        tm.assert_frame_equal(result, expected)
+
     def test_describe_categorical_columns(self):
         # GH 11558
         columns = pd.CategoricalIndex(['int1', 'int2', 'obj'],
@@ -608,6 +618,7 @@ def test_describe_categorical_columns(self):
                              index=['count', 'mean', 'std', 'min', '25%',
                                     '50%', '75%', 'max'],
                              columns=exp_columns)
+
         tm.assert_frame_equal(result, expected)
         tm.assert_categorical_equal(result.columns.values,
                                     expected.columns.values)

From 73d8f96bac5bb0bc58eb6f69d47ea4329b07c6ae Mon Sep 17 00:00:00 2001
From: Jiang Yue <35633013+jiangyue12392@users.noreply.github.com>
Date: Sat, 1 Jun 2019 22:56:34 +0800
Subject: [PATCH 23/51] BUG: MultiIndex not dropping nan level and invalid code
 value (#26408)

---
 doc/source/whatsnew/v0.25.0.rst               | 37 ++++++++++-
 pandas/core/indexes/multi.py                  | 62 ++++++++++++++++---
 .../tests/indexes/multi/test_constructor.py   | 41 +++++++++++-
 pandas/tests/indexes/multi/test_missing.py    | 15 +++++
 4 files changed, 143 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index ebca80025b9f7..3275223b159f8 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -119,6 +119,42 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
 
     df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
 
+
+.. _whatsnew_0250.api_breaking.multi_indexing:
+
+
+MultiIndex constructed from levels and codes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Constructing a :class:`MultiIndex` with NaN levels or codes value < -1 was allowed previously.
+Now, construction with codes value < -1 is not allowed and NaN levels' corresponding codes
+would be reassigned as -1. (:issue:`19387`)
+
+.. ipython:: python
+
+    mi1 = pd.MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
+                        codes=[[0, -1, 1, 2, 3, 4]])
+    mi2 = pd.MultiIndex(levels=[[1, 2]], codes=[[0, -2]])
+
+*Previous Behavior*:
+
+.. code-block:: ipython
+
+    In [1]: mi1
+    Out[1]: MultiIndex(levels=[[nan, None, NaT, 128, 2]],
+                       codes=[[0, -1, 1, 2, 3, 4]])
+    In [2]: mi2
+    Out[2]: MultiIndex(levels=[[1, 2]],
+                       codes=[[0, -2]])
+
+*New Behavior*:
+
+.. ipython:: python
+
+    mi1
+    mi2
+
+
 .. _whatsnew_0250.api_breaking.groupby_apply_first_group_once:
 
 GroupBy.apply on ``DataFrame`` evaluates first group only once
@@ -536,7 +572,6 @@ MultiIndex
 
 - Bug in which incorrect exception raised by :class:`Timedelta` when testing the membership of :class:`MultiIndex` (:issue:`24570`)
 -
--
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index ec2cc70d1a352..9217b388ce86b 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -243,11 +243,35 @@ def __new__(cls, levels=None, codes=None, sortorder=None, names=None,
             result.sortorder = sortorder
 
         if verify_integrity:
-            result._verify_integrity()
+            new_codes = result._verify_integrity()
+            result._codes = new_codes
+
         if _set_identity:
             result._reset_identity()
+
         return result
 
+    def _validate_codes(self, level: list, code: list):
+        """
+        Reassign code values as -1 if their corresponding levels are NaN.
+
+        Parameters
+        ----------
+        code : list
+            Code to reassign.
+        level : list
+            Level to check for missing values (NaN, NaT, None).
+
+        Returns
+        -------
+        code : new code where code value = -1 if it corresponds
+        to a level with missing values (NaN, NaT, None).
+        """
+        null_mask = isna(level)
+        if np.any(null_mask):
+            code = np.where(null_mask[code], -1, code)
+        return code
+
     def _verify_integrity(self, codes=None, levels=None):
         """
 
@@ -263,6 +287,11 @@ def _verify_integrity(self, codes=None, levels=None):
         ValueError
             If length of levels and codes don't match, if the codes for any
             level would exceed level bounds, or there are any duplicate levels.
+
+        Returns
+        -------
+        codes : new codes where code value = -1 if it corresponds to a
+        NaN level.
         """
         # NOTE: Currently does not check, among other things, that cached
         # nlevels matches nor that sortorder matches actually sortorder.
@@ -272,22 +301,33 @@ def _verify_integrity(self, codes=None, levels=None):
         if len(levels) != len(codes):
             raise ValueError("Length of levels and codes must match. NOTE:"
                              " this index is in an inconsistent state.")
-        codes_length = len(self.codes[0])
+        codes_length = len(codes[0])
         for i, (level, level_codes) in enumerate(zip(levels, codes)):
             if len(level_codes) != codes_length:
                 raise ValueError("Unequal code lengths: %s" %
                                  ([len(code_) for code_ in codes]))
             if len(level_codes) and level_codes.max() >= len(level):
-                raise ValueError("On level %d, code max (%d) >= length of"
-                                 " level  (%d). NOTE: this index is in an"
-                                 " inconsistent state" % (i, level_codes.max(),
-                                                          len(level)))
+                msg = ("On level {level}, code max ({max_code}) >= length of "
+                       "level ({level_len}). NOTE: this index is in an "
+                       "inconsistent state".format(
+                           level=i, max_code=level_codes.max(),
+                           level_len=len(level)))
+                raise ValueError(msg)
+            if len(level_codes) and level_codes.min() < -1:
+                raise ValueError("On level {level}, code value ({code})"
+                                 " < -1".format(
+                                     level=i, code=level_codes.min()))
             if not level.is_unique:
                 raise ValueError("Level values must be unique: {values} on "
                                  "level {level}".format(
                                      values=[value for value in level],
                                      level=i))
 
+        codes = [self._validate_codes(level, code)
+                 for level, code in zip(levels, codes)]
+        new_codes = FrozenList(codes)
+        return new_codes
+
     @classmethod
     def from_arrays(cls, arrays, sortorder=None, names=None):
         """
@@ -586,7 +626,8 @@ def _set_levels(self, levels, level=None, copy=False, validate=True,
             new_levels = FrozenList(new_levels)
 
         if verify_integrity:
-            self._verify_integrity(levels=new_levels)
+            new_codes = self._verify_integrity(levels=new_levels)
+            self._codes = new_codes
 
         names = self.names
         self._levels = new_levels
@@ -676,7 +717,6 @@ def labels(self):
 
     def _set_codes(self, codes, level=None, copy=False, validate=True,
                    verify_integrity=False):
-
         if validate and level is None and len(codes) != self.nlevels:
             raise ValueError("Length of codes must match number of levels")
         if validate and level is not None and len(codes) != len(level):
@@ -696,9 +736,10 @@ def _set_codes(self, codes, level=None, copy=False, validate=True,
             new_codes = FrozenList(new_codes)
 
         if verify_integrity:
-            self._verify_integrity(codes=new_codes)
+            new_codes = self._verify_integrity(codes=new_codes)
 
         self._codes = new_codes
+
         self._tuples = None
         self._reset_cache()
 
@@ -1763,9 +1804,10 @@ def __setstate__(self, state):
 
         self._set_levels([Index(x) for x in levels], validate=False)
         self._set_codes(codes)
+        new_codes = self._verify_integrity()
+        self._set_codes(new_codes)
         self._set_names(names)
         self.sortorder = sortorder
-        self._verify_integrity()
         self._reset_identity()
 
     def __getitem__(self, key):
diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py
index 37290bc6eb1c0..7cab05660ac49 100644
--- a/pandas/tests/indexes/multi/test_constructor.py
+++ b/pandas/tests/indexes/multi/test_constructor.py
@@ -63,9 +63,10 @@ def test_constructor_mismatched_codes_levels(idx):
     with pytest.raises(ValueError, match=msg):
         MultiIndex(levels=levels, codes=codes)
 
-    length_error = (r"On level 0, code max \(3\) >= length of level  \(1\)\."
+    length_error = (r"On level 0, code max \(3\) >= length of level \(1\)\."
                     " NOTE: this index is in an inconsistent state")
     label_error = r"Unequal code lengths: \[4, 2\]"
+    code_value_error = r"On level 0, code value \(-2\) < -1"
 
     # important to check that it's looking at the right thing.
     with pytest.raises(ValueError, match=length_error):
@@ -82,6 +83,44 @@ def test_constructor_mismatched_codes_levels(idx):
     with pytest.raises(ValueError, match=label_error):
         idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
 
+    # test set_codes with verify_integrity=False
+    # the setting should not raise any value error
+    idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]],
+                         verify_integrity=False)
+
+    # code value smaller than -1
+    with pytest.raises(ValueError, match=code_value_error):
+        MultiIndex(levels=[['a'], ['b']], codes=[[0, -2], [0, 0]])
+
+
+def test_na_levels():
+    # GH26408
+    # test if codes are re-assigned value -1 for levels
+    # with mising values (NaN, NaT, None)
+    result = MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
+                        codes=[[0, -1, 1, 2, 3, 4]])
+    expected = MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]],
+                          codes=[[-1, -1, -1, -1, 3, 4]])
+    tm.assert_index_equal(result, expected)
+
+    result = MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]],
+                        codes=[[0, -1, 1, 2, 3, 4]])
+    expected = MultiIndex(levels=[[np.nan, 's', pd.NaT, 128, None]],
+                          codes=[[-1, -1, 1, -1, 3, -1]])
+    tm.assert_index_equal(result, expected)
+
+    # verify set_levels and set_codes
+    result = MultiIndex(
+        levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]).set_levels(
+            [[np.nan, 's', pd.NaT, 128, None]])
+    tm.assert_index_equal(result, expected)
+
+    result = MultiIndex(
+        levels=[[np.nan, 's', pd.NaT, 128, None]],
+        codes=[[1, 2, 2, 2, 2, 2]]).set_codes(
+            [[0, -1, 1, 2, 3, 4]])
+    tm.assert_index_equal(result, expected)
+
 
 def test_labels_deprecated(idx):
     # GH23752
diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py
index ed90f74d80989..518c12bb20e13 100644
--- a/pandas/tests/indexes/multi/test_missing.py
+++ b/pandas/tests/indexes/multi/test_missing.py
@@ -73,6 +73,21 @@ def test_dropna():
     with pytest.raises(ValueError, match=msg):
         idx.dropna(how='xxx')
 
+    # GH26408
+    # test if missing values are dropped for mutiindex constructed
+    # from codes and values
+    idx = MultiIndex(levels=[[np.nan, None, pd.NaT, "128", 2],
+                             [np.nan, None, pd.NaT, "128", 2]],
+                     codes=[[0, -1, 1, 2, 3, 4],
+                            [0, -1, 3, 3, 3, 4]])
+    expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
+    tm.assert_index_equal(idx.dropna(), expected)
+    tm.assert_index_equal(idx.dropna(how='any'), expected)
+
+    expected = MultiIndex.from_arrays([[np.nan, np.nan, "128", 2],
+                                       ["128", "128", "128", 2]])
+    tm.assert_index_equal(idx.dropna(how='all'), expected)
+
 
 def test_nulls(idx):
     # this is really a smoke test for the methods

From 9a42cbe85461c28417a5130bc80b035044c5575a Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Sat, 1 Jun 2019 17:03:06 +0200
Subject: [PATCH 24/51] API: Series.str-accessor infers dtype (and Index.str
 does not raise on all-NA) (#23167)

---
 doc/source/user_guide/text.rst  |  10 ++
 doc/source/whatsnew/v0.25.0.rst |  40 +++++-
 pandas/core/strings.py          | 214 +++++++++++++++++++++++++-------
 pandas/tests/test_strings.py    |  48 +++----
 4 files changed, 233 insertions(+), 79 deletions(-)

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index f7fdfcf8bf882..87c75e8bcd91f 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -70,6 +70,16 @@ and replacing any remaining whitespaces with underscores:
     ``.str`` methods which operate on elements of type ``list`` are not available on such a
     ``Series``.
 
+.. _text.warn_types:
+
+.. warning::
+
+    Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with
+    v.0.25.0, the type of the Series is inferred and the allowed types (i.e. strings) are enforced more rigorously.
+
+    Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few
+    exceptions, other uses are not supported, and may be disabled at a later point.
+
 
 Splitting and Replacing Strings
 -------------------------------
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 3275223b159f8..87a8010998bd0 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -231,6 +231,43 @@ returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwi
 Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
 cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
 
+The ``.str``-accessor performs stricter type checks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Due to the lack of more fine-grained dtypes, :attr:`Series.str` so far only checked whether the data was
+of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* the Series; in particular,
+``'bytes'``-only data will raise an exception (except for :meth:`Series.str.decode`, :meth:`Series.str.get`,
+:meth:`Series.str.len`, :meth:`Series.str.slice`), see :issue:`23163`, :issue:`23011`, :issue:`23551`.
+
+*Previous Behaviour*:
+
+.. code-block:: python
+
+    In [1]: s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object)
+
+    In [2]: s
+    Out[2]:
+    0      b'a'
+    1     b'ba'
+    2    b'cba'
+    dtype: object
+
+    In [3]: s.str.startswith(b'a')
+    Out[3]:
+    0     True
+    1    False
+    2    False
+    dtype: bool
+
+*New Behaviour*:
+
+.. ipython:: python
+    :okexcept:
+
+    s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object)
+    s
+    s.str.startswith(b'a')
+
 .. _whatsnew_0250.api_breaking.incompatible_index_unions
 
 Incompatible Index Type Unions
@@ -331,7 +368,6 @@ This change is backward compatible for direct usage of Pandas, but if you subcla
 Pandas objects *and* give your subclasses specific ``__str__``/``__repr__`` methods,
 you may have to adjust your ``__str__``/``__repr__`` methods (:issue:`26495`).
 
-
 .. _whatsnew_0250.api_breaking.deps:
 
 Increased minimum versions for dependencies
@@ -537,7 +573,7 @@ Conversion
 Strings
 ^^^^^^^
 
--
+- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
 -
 -
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index ee3796241690d..bd756491abd2f 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1,4 +1,5 @@
 import codecs
+from functools import wraps
 import re
 import textwrap
 from typing import Dict
@@ -12,8 +13,8 @@
 
 from pandas.core.dtypes.common import (
     ensure_object, is_bool_dtype, is_categorical_dtype, is_integer,
-    is_list_like, is_object_dtype, is_re, is_scalar, is_string_like)
-from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
+    is_list_like, is_re, is_scalar, is_string_like)
+from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.algorithms import take_1d
@@ -1720,12 +1721,78 @@ def str_encode(arr, encoding, errors="strict"):
     return _na_map(f, arr)
 
 
-def _noarg_wrapper(f, docstring=None, **kargs):
+def forbid_nonstring_types(forbidden, name=None):
+    """
+    Decorator to forbid specific types for a method of StringMethods.
+
+    For calling `.str.{method}` on a Series or Index, it is necessary to first
+    initialize the :class:`StringMethods` object, and then call the method.
+    However, different methods allow different input types, and so this can not
+    be checked during :meth:`StringMethods.__init__`, but must be done on a
+    per-method basis. This decorator exists to facilitate this process, and
+    make it explicit which (inferred) types are disallowed by the method.
+
+    :meth:`StringMethods.__init__` allows the *union* of types its different
+    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
+    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
+
+    The default string types ['string', 'empty'] are allowed for all methods.
+    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
+    then needs to forbid the types it is not intended for.
+
+    Parameters
+    ----------
+    forbidden : list-of-str or None
+        List of forbidden non-string types, may be one or more of
+        `['bytes', 'mixed', 'mixed-integer']`.
+    name : str, default None
+        Name of the method to use in the error message. By default, this is
+        None, in which case the name from the method being wrapped will be
+        copied. However, for working with further wrappers (like _pat_wrapper
+        and _noarg_wrapper), it is necessary to specify the name.
+
+    Returns
+    -------
+    func : wrapper
+        The method to which the decorator is applied, with an added check that
+        enforces the inferred type to not be in the list of forbidden types.
+
+    Raises
+    ------
+    TypeError
+        If the inferred type of the underlying data is in `forbidden`.
+    """
+
+    # deal with None
+    forbidden = [] if forbidden is None else forbidden
+
+    allowed_types = {'string', 'empty', 'bytes',
+                     'mixed', 'mixed-integer'} - set(forbidden)
+
+    def _forbid_nonstring_types(func):
+        func_name = func.__name__ if name is None else name
+
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self._inferred_dtype not in allowed_types:
+                msg = ('Cannot use .str.{name} with values of inferred dtype '
+                       '{inf_type!r}.'.format(name=func_name,
+                                              inf_type=self._inferred_dtype))
+                raise TypeError(msg)
+            return func(self, *args, **kwargs)
+        wrapper.__name__ = func_name
+        return wrapper
+    return _forbid_nonstring_types
+
+
+def _noarg_wrapper(f, name=None, docstring=None, forbidden_types=['bytes'],
+                   **kargs):
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper(self):
         result = _na_map(f, self._parent, **kargs)
         return self._wrap_result(result)
 
-    wrapper.__name__ = f.__name__
+    wrapper.__name__ = f.__name__ if name is None else name
     if docstring is not None:
         wrapper.__doc__ = docstring
     else:
@@ -1734,22 +1801,26 @@ def wrapper(self):
     return wrapper
 
 
-def _pat_wrapper(f, flags=False, na=False, **kwargs):
+def _pat_wrapper(f, flags=False, na=False, name=None,
+                 forbidden_types=['bytes'], **kwargs):
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper1(self, pat):
         result = f(self._parent, pat)
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper2(self, pat, flags=0, **kwargs):
         result = f(self._parent, pat, flags=flags, **kwargs)
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper3(self, pat, na=np.nan):
         result = f(self._parent, pat, na=na)
         return self._wrap_result(result)
 
     wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
 
-    wrapper.__name__ = f.__name__
+    wrapper.__name__ = f.__name__ if name is None else name
     if f.__doc__:
         wrapper.__doc__ = f.__doc__
 
@@ -1780,7 +1851,7 @@ class StringMethods(NoNewAttributesMixin):
     """
 
     def __init__(self, data):
-        self._validate(data)
+        self._inferred_dtype = self._validate(data)
         self._is_categorical = is_categorical_dtype(data)
 
         # .values.categories works for both Series/Index
@@ -1791,38 +1862,44 @@ def __init__(self, data):
 
     @staticmethod
     def _validate(data):
-        from pandas.core.index import Index
-
-        if (isinstance(data, ABCSeries) and
-                not ((is_categorical_dtype(data.dtype) and
-                      is_object_dtype(data.values.categories)) or
-                     (is_object_dtype(data.dtype)))):
-            # it's neither a string series not a categorical series with
-            # strings inside the categories.
-            # this really should exclude all series with any non-string values
-            # (instead of test for object dtype), but that isn't practical for
-            # performance reasons until we have a str dtype (GH 9343)
+        """
+        Auxiliary function for StringMethods, infers and checks dtype of data.
+
+        This is a "first line of defence" at the creation of the StringMethods-
+        object (see _make_accessor), and just checks that the dtype is in the
+        *union* of the allowed types over all string methods below; this
+        restriction is then refined on a per-method basis using the decorator
+        @forbid_nonstring_types (more info in the corresponding docstring).
+
+        This really should exclude all series/index with any non-string values,
+        but that isn't practical for performance reasons until we have a str
+        dtype (GH 9343 / 13877)
+
+        Parameters
+        ----------
+        data : The content of the Series
+
+        Returns
+        -------
+        dtype : inferred dtype of data
+        """
+        if isinstance(data, ABCMultiIndex):
+            raise AttributeError('Can only use .str accessor with Index, '
+                                 'not MultiIndex')
+
+        # see _libs/lib.pyx for list of inferred types
+        allowed_types = ['string', 'empty', 'bytes', 'mixed', 'mixed-integer']
+
+        values = getattr(data, 'values', data)  # Series / Index
+        values = getattr(values, 'categories', values)  # categorical / normal
+
+        # missing values obfuscate type inference -> skip
+        inferred_dtype = lib.infer_dtype(values, skipna=True)
+
+        if inferred_dtype not in allowed_types:
             raise AttributeError("Can only use .str accessor with string "
-                                 "values, which use np.object_ dtype in "
-                                 "pandas")
-        elif isinstance(data, Index):
-            # can't use ABCIndex to exclude non-str
-
-            # see src/inference.pyx which can contain string values
-            allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
-            if is_categorical_dtype(data.dtype):
-                inf_type = data.categories.inferred_type
-            else:
-                inf_type = data.inferred_type
-            if inf_type not in allowed_types:
-                message = ("Can only use .str accessor with string values "
-                           "(i.e. inferred_type is 'string', 'unicode' or "
-                           "'mixed')")
-                raise AttributeError(message)
-            if data.nlevels > 1:
-                message = ("Can only use .str accessor with Index, not "
-                           "MultiIndex")
-                raise AttributeError(message)
+                                 "values!")
+        return inferred_dtype
 
     def __getitem__(self, key):
         if isinstance(key, slice):
@@ -2025,12 +2102,13 @@ def _get_series_list(self, others, ignore_index=False):
                     warnings.warn('list-likes other than Series, Index, or '
                                   'np.ndarray WITHIN another list-like are '
                                   'deprecated and will be removed in a future '
-                                  'version.', FutureWarning, stacklevel=3)
+                                  'version.', FutureWarning, stacklevel=4)
                 return (los, join_warn)
             elif all(not is_list_like(x) for x in others):
                 return ([Series(others, index=idx)], False)
         raise TypeError(err_msg)
 
+    @forbid_nonstring_types(['bytes', 'mixed', 'mixed-integer'])
     def cat(self, others=None, sep=None, na_rep=None, join=None):
         """
         Concatenate strings in the Series/Index with given separator.
@@ -2211,7 +2289,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
                           "Index/DataFrame in `others`. To enable alignment "
                           "and silence this warning, pass `join='left'|"
                           "'outer'|'inner'|'right'`. The future default will "
-                          "be `join='left'`.", FutureWarning, stacklevel=2)
+                          "be `join='left'`.", FutureWarning, stacklevel=3)
 
         # if join is None, _get_series_list already force-aligned indexes
         join = 'left' if join is None else join
@@ -2384,6 +2462,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
     @Appender(_shared_docs['str_split'] % {
         'side': 'beginning',
         'method': 'split'})
+    @forbid_nonstring_types(['bytes'])
     def split(self, pat=None, n=-1, expand=False):
         result = str_split(self._parent, pat, n=n)
         return self._wrap_result(result, expand=expand)
@@ -2391,6 +2470,7 @@ def split(self, pat=None, n=-1, expand=False):
     @Appender(_shared_docs['str_split'] % {
         'side': 'end',
         'method': 'rsplit'})
+    @forbid_nonstring_types(['bytes'])
     def rsplit(self, pat=None, n=-1, expand=False):
         result = str_rsplit(self._parent, pat, n=n)
         return self._wrap_result(result, expand=expand)
@@ -2485,6 +2565,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
                 '`sep`.'
     })
     @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep')
+    @forbid_nonstring_types(['bytes'])
     def partition(self, sep=' ', expand=True):
         f = lambda x: x.partition(sep)
         result = _na_map(f, self._parent)
@@ -2498,6 +2579,7 @@ def partition(self, sep=' ', expand=True):
                 '`sep`.'
     })
     @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep')
+    @forbid_nonstring_types(['bytes'])
     def rpartition(self, sep=' ', expand=True):
         f = lambda x: x.rpartition(sep)
         result = _na_map(f, self._parent)
@@ -2509,33 +2591,39 @@ def get(self, i):
         return self._wrap_result(result)
 
     @copy(str_join)
+    @forbid_nonstring_types(['bytes'])
     def join(self, sep):
         result = str_join(self._parent, sep)
         return self._wrap_result(result)
 
     @copy(str_contains)
+    @forbid_nonstring_types(['bytes'])
     def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
         result = str_contains(self._parent, pat, case=case, flags=flags, na=na,
                               regex=regex)
         return self._wrap_result(result, fill_value=na)
 
     @copy(str_match)
+    @forbid_nonstring_types(['bytes'])
     def match(self, pat, case=True, flags=0, na=np.nan):
         result = str_match(self._parent, pat, case=case, flags=flags, na=na)
         return self._wrap_result(result, fill_value=na)
 
     @copy(str_replace)
+    @forbid_nonstring_types(['bytes'])
     def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         result = str_replace(self._parent, pat, repl, n=n, case=case,
                              flags=flags, regex=regex)
         return self._wrap_result(result)
 
     @copy(str_repeat)
+    @forbid_nonstring_types(['bytes'])
     def repeat(self, repeats):
         result = str_repeat(self._parent, repeats)
         return self._wrap_result(result)
 
     @copy(str_pad)
+    @forbid_nonstring_types(['bytes'])
     def pad(self, width, side='left', fillchar=' '):
         result = str_pad(self._parent, width, side=side, fillchar=fillchar)
         return self._wrap_result(result)
@@ -2559,17 +2647,21 @@ def pad(self, width, side='left', fillchar=' '):
 
     @Appender(_shared_docs['str_pad'] % dict(side='left and right',
                                              method='center'))
+    @forbid_nonstring_types(['bytes'])
     def center(self, width, fillchar=' '):
         return self.pad(width, side='both', fillchar=fillchar)
 
     @Appender(_shared_docs['str_pad'] % dict(side='right', method='ljust'))
+    @forbid_nonstring_types(['bytes'])
     def ljust(self, width, fillchar=' '):
         return self.pad(width, side='right', fillchar=fillchar)
 
     @Appender(_shared_docs['str_pad'] % dict(side='left', method='rjust'))
+    @forbid_nonstring_types(['bytes'])
     def rjust(self, width, fillchar=' '):
         return self.pad(width, side='left', fillchar=fillchar)
 
+    @forbid_nonstring_types(['bytes'])
     def zfill(self, width):
         """
         Pad strings in the Series/Index by prepending '0' characters.
@@ -2639,16 +2731,19 @@ def slice(self, start=None, stop=None, step=None):
         return self._wrap_result(result)
 
     @copy(str_slice_replace)
+    @forbid_nonstring_types(['bytes'])
     def slice_replace(self, start=None, stop=None, repl=None):
         result = str_slice_replace(self._parent, start, stop, repl)
         return self._wrap_result(result)
 
     @copy(str_decode)
     def decode(self, encoding, errors="strict"):
+        # need to allow bytes here
         result = str_decode(self._parent, encoding, errors)
         return self._wrap_result(result)
 
     @copy(str_encode)
+    @forbid_nonstring_types(['bytes'])
     def encode(self, encoding, errors="strict"):
         result = str_encode(self._parent, encoding, errors)
         return self._wrap_result(result)
@@ -2718,28 +2813,33 @@ def encode(self, encoding, errors="strict"):
 
     @Appender(_shared_docs['str_strip'] % dict(side='left and right sides',
                                                method='strip'))
+    @forbid_nonstring_types(['bytes'])
     def strip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='both')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='left side',
                                                method='lstrip'))
+    @forbid_nonstring_types(['bytes'])
     def lstrip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='left')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='right side',
                                                method='rstrip'))
+    @forbid_nonstring_types(['bytes'])
     def rstrip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='right')
         return self._wrap_result(result)
 
     @copy(str_wrap)
+    @forbid_nonstring_types(['bytes'])
     def wrap(self, width, **kwargs):
         result = str_wrap(self._parent, width, **kwargs)
         return self._wrap_result(result)
 
     @copy(str_get_dummies)
+    @forbid_nonstring_types(['bytes'])
     def get_dummies(self, sep='|'):
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
@@ -2749,20 +2849,23 @@ def get_dummies(self, sep='|'):
                                  name=name, expand=True)
 
     @copy(str_translate)
+    @forbid_nonstring_types(['bytes'])
     def translate(self, table):
         result = str_translate(self._parent, table)
         return self._wrap_result(result)
 
-    count = _pat_wrapper(str_count, flags=True)
-    startswith = _pat_wrapper(str_startswith, na=True)
-    endswith = _pat_wrapper(str_endswith, na=True)
-    findall = _pat_wrapper(str_findall, flags=True)
+    count = _pat_wrapper(str_count, flags=True, name='count')
+    startswith = _pat_wrapper(str_startswith, na=True, name='startswith')
+    endswith = _pat_wrapper(str_endswith, na=True, name='endswith')
+    findall = _pat_wrapper(str_findall, flags=True, name='findall')
 
     @copy(str_extract)
+    @forbid_nonstring_types(['bytes'])
     def extract(self, pat, flags=0, expand=True):
         return str_extract(self, pat, flags=flags, expand=expand)
 
     @copy(str_extractall)
+    @forbid_nonstring_types(['bytes'])
     def extractall(self, pat, flags=0):
         return str_extractall(self._orig, pat, flags=flags)
 
@@ -2792,6 +2895,7 @@ def extractall(self, pat, flags=0):
     @Appender(_shared_docs['find'] %
               dict(side='lowest', method='find',
                    also='rfind : Return highest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def find(self, sub, start=0, end=None):
         result = str_find(self._parent, sub, start=start, end=end, side='left')
         return self._wrap_result(result)
@@ -2799,11 +2903,13 @@ def find(self, sub, start=0, end=None):
     @Appender(_shared_docs['find'] %
               dict(side='highest', method='rfind',
                    also='find : Return lowest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def rfind(self, sub, start=0, end=None):
         result = str_find(self._parent, sub,
                           start=start, end=end, side='right')
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(['bytes'])
     def normalize(self, form):
         """
         Return the Unicode normal form for the strings in the Series/Index.
@@ -2851,6 +2957,7 @@ def normalize(self, form):
     @Appender(_shared_docs['index'] %
               dict(side='lowest', similar='find', method='index',
                    also='rindex : Return highest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def index(self, sub, start=0, end=None):
         result = str_index(self._parent, sub,
                            start=start, end=end, side='left')
@@ -2859,6 +2966,7 @@ def index(self, sub, start=0, end=None):
     @Appender(_shared_docs['index'] %
               dict(side='highest', similar='rfind', method='rindex',
                    also='index : Return lowest indexes in each strings.'))
+    @forbid_nonstring_types(['bytes'])
     def rindex(self, sub, start=0, end=None):
         result = str_index(self._parent, sub,
                            start=start, end=end, side='right')
@@ -2908,7 +3016,8 @@ def rindex(self, sub, start=0, end=None):
     5    3.0
     dtype: float64
     """)
-    len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)
+    len = _noarg_wrapper(len, docstring=_shared_docs['len'],
+                         forbidden_types=None, dtype=int)
 
     _shared_docs['casemethods'] = ("""
     Convert strings in the Series/Index to %(type)s.
@@ -2989,21 +3098,27 @@ def rindex(self, sub, start=0, end=None):
     _doc_args['casefold'] = dict(type='be casefolded', method='casefold',
                                  version='\n    .. versionadded:: 0.25.0\n')
     lower = _noarg_wrapper(lambda x: x.lower(),
+                           name='lower',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['lower'])
     upper = _noarg_wrapper(lambda x: x.upper(),
+                           name='upper',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['upper'])
     title = _noarg_wrapper(lambda x: x.title(),
+                           name='title',
                            docstring=_shared_docs['casemethods'] %
                            _doc_args['title'])
     capitalize = _noarg_wrapper(lambda x: x.capitalize(),
+                                name='capitalize',
                                 docstring=_shared_docs['casemethods'] %
                                 _doc_args['capitalize'])
     swapcase = _noarg_wrapper(lambda x: x.swapcase(),
+                              name='swapcase',
                               docstring=_shared_docs['casemethods'] %
                               _doc_args['swapcase'])
     casefold = _noarg_wrapper(lambda x: x.casefold(),
+                              name='casefold',
                               docstring=_shared_docs['casemethods'] %
                               _doc_args['casefold'])
 
@@ -3157,30 +3272,39 @@ def rindex(self, sub, start=0, end=None):
     _doc_args['isnumeric'] = dict(type='numeric', method='isnumeric')
     _doc_args['isdecimal'] = dict(type='decimal', method='isdecimal')
     isalnum = _noarg_wrapper(lambda x: x.isalnum(),
+                             name='isalnum',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isalnum'])
     isalpha = _noarg_wrapper(lambda x: x.isalpha(),
+                             name='isalpha',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isalpha'])
     isdigit = _noarg_wrapper(lambda x: x.isdigit(),
+                             name='isdigit',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isdigit'])
     isspace = _noarg_wrapper(lambda x: x.isspace(),
+                             name='isspace',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isspace'])
     islower = _noarg_wrapper(lambda x: x.islower(),
+                             name='islower',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['islower'])
     isupper = _noarg_wrapper(lambda x: x.isupper(),
+                             name='isupper',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['isupper'])
     istitle = _noarg_wrapper(lambda x: x.istitle(),
+                             name='istitle',
                              docstring=_shared_docs['ismethods'] %
                              _doc_args['istitle'])
     isnumeric = _noarg_wrapper(lambda x: x.isnumeric(),
+                               name='isnumeric',
                                docstring=_shared_docs['ismethods'] %
                                _doc_args['isnumeric'])
     isdecimal = _noarg_wrapper(lambda x: x.isdecimal(),
+                               name='isdecimal',
                                docstring=_shared_docs['ismethods'] %
                                _doc_args['isdecimal'])
 
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 2951ca24fa7ff..1ba0ef3918fb7 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -150,6 +150,9 @@ def any_allowed_skipna_inferred_dtype(request):
     ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
     ...     # will pass
     ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
+    ...
+    ...     # constructor for .str-accessor will also pass
+    ...     pd.Series(values).str
     """
     inferred_dtype, values = request.param
     values = np.array(values, dtype=object)  # object dtype to avoid casting
@@ -179,20 +182,6 @@ def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
             pytest.xfail(reason='Conversion to numpy array fails because '
                          'the ._values-attribute is not a numpy array for '
                          'PeriodArray/IntervalArray; see GH 23553')
-        if box == Index and inferred_dtype in ['empty', 'bytes']:
-            pytest.xfail(reason='Raising too restrictively; '
-                         'solved by GH 23167')
-        if (box == Index and dtype == object
-                and inferred_dtype in ['boolean', 'date', 'time']):
-            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
-                         'solved by GH 23167')
-        if (box == Series
-                and (dtype == object and inferred_dtype not in [
-                    'string', 'unicode', 'empty',
-                    'bytes', 'mixed', 'mixed-integer'])
-                or (dtype == 'category'
-                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
-            pytest.xfail(reason='Not raising correctly; solved by GH 23167')
 
         types_passing_constructor = ['string', 'unicode', 'empty',
                                      'bytes', 'mixed', 'mixed-integer']
@@ -220,27 +209,21 @@ def test_api_per_method(self, box, dtype,
         method_name, args, kwargs = any_string_method
 
         # TODO: get rid of these xfails
-        if (method_name not in ['encode', 'decode', 'len']
-                and inferred_dtype == 'bytes'):
-            pytest.xfail(reason='Not raising for "bytes", see GH 23011;'
-                         'Also: malformed method names, see GH 23551; '
-                         'solved by GH 23167')
-        if (method_name == 'cat'
-                and inferred_dtype in ['mixed', 'mixed-integer']):
-            pytest.xfail(reason='Bad error message; should raise better; '
-                         'solved by GH 23167')
-        if box == Index and inferred_dtype in ['empty', 'bytes']:
-            pytest.xfail(reason='Raising too restrictively; '
-                         'solved by GH 23167')
-        if (box == Index and dtype == object
-                and inferred_dtype in ['boolean', 'date', 'time']):
-            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
-                         'solved by GH 23167')
+        if (method_name in ['partition', 'rpartition'] and box == Index
+                and inferred_dtype == 'empty'):
+            pytest.xfail(reason='Method cannot deal with empty Index')
+        if (method_name == 'split' and box == Index and values.size == 0
+                and kwargs.get('expand', None) is not None):
+            pytest.xfail(reason='Split fails on empty Series when expand=True')
+        if (method_name == 'get_dummies' and box == Index
+                and inferred_dtype == 'empty' and (dtype == object
+                                                   or values.size == 0)):
+            pytest.xfail(reason='Need to fortify get_dummies corner cases')
 
         t = box(values, dtype=dtype)  # explicit dtype to avoid casting
         method = getattr(t.str, method_name)
 
-        bytes_allowed = method_name in ['encode', 'decode', 'len']
+        bytes_allowed = method_name in ['decode', 'get', 'len', 'slice']
         # as of v0.23.4, all methods except 'cat' are very lenient with the
         # allowed data types, just returning NaN for entries that error.
         # This could be changed with an 'errors'-kwarg to the `str`-accessor,
@@ -3167,7 +3150,8 @@ def test_str_accessor_no_new_attributes(self):
     def test_method_on_bytes(self):
         lhs = Series(np.array(list('abc'), 'S1').astype(object))
         rhs = Series(np.array(list('def'), 'S1').astype(object))
-        with pytest.raises(TypeError, match="can't concat str to bytes"):
+        with pytest.raises(TypeError,
+                           match="Cannot use .str.cat with values of.*"):
             lhs.str.cat(rhs)
 
     def test_casefold(self):

From 0dbb99efc259c5182ac88f116ebb76ae6e2db6ee Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 1 Jun 2019 16:34:57 +0100
Subject: [PATCH 25/51] Changing dev docs ssh key (#26604)

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 9f83917024049..0064d0a932960 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -161,7 +161,7 @@ jobs:
   - task: InstallSSHKey@0
     inputs:
       hostName: 'github.com'
-      sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHmz3l/EdqrgNxEUKkwDUuUcLv91unig03pYFGO/DMIgCmPdMG96zAgfnESd837Rm0wSSqylwSzkRJt5MV/TpFlcVifDLDQmUhqCeO8Z6dLl/oe35UKmyYICVwcvQTAaHNnYRpKC5IUlTh0JEtw9fGlnp1Ta7U1ENBLbKdpywczElhZu+hOQ892zqOj3CwA+U2329/d6cd7YnqIKoFN9DWT3kS5K6JE4IoBfQEVekIOs23bKjNLvPoOmi6CroAhu/K8j+NCWQjge5eJf2x/yTnIIP1PlEcXoHIr8io517posIx3TBup+CN8bNS1PpDW3jyD3ttl1uoBudjOQrobNnJeR6Rn67DRkG6IhSwr3BWj8alwUG5mTdZzwV5Pa9KZFdIiqX7NoDGg+itsR39QCn0thK8lGRNSR8KrWC1PSjecwelKBO7uQ7rnk/rkrZdBWR4oEA8YgNH8tirUw5WfOr5a0AIaJicKxGKNdMxZt+zmC+bS7F4YCOGIm9KHa43RrKhoGRhRf9fHHHKUPwFGqtWG4ykcUgoamDOURJyepesBAO3FiRE9rLU6ILbB3yEqqoekborHmAJD5vf7PWItW3Q/YQKuk3kkqRcKnexPyzyyq5lUgTi8CxxZdaASIOu294wjBhhdyHlXEkVTNJ9JKkj/obF+XiIIp0cBDsOXY9hDQ== pandas-dev@python.org'
+      sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDfF0BSddjvZx/z4/2TXsy+RxjwBpgdHkmjtL9WfRHxEw1TchBuEj5vWWcxBNTK+9oVzD/Lca89HAXXrklsfkdAK3LvLfGCxTGpP8t/3CxxFdnSg3EN+4cDGKuDlbeTyzdASdPBOq0GTZjUFekl9ZfFrFJ9SoPpqZ4mmPRPapPrkwTs4xIrBly0eWcISFYgZcG58m65+XQpyyBMbpsO5ZHBBxE8kkWN0yY+gKt5PeeIO82xE+7F+3Qhlc67fKfB4FEitQ5SKrbKyGNNdFtEGcC6CEtD0B0vJxssltQEl5dDWPJP6tH4cIm/J6m28mpSYc5fEBhr75jE4Ybw6NtGgBZEdtFRFlnb91mSiVSjM/HEkV7/xYai+H1Gk+I/8tcl8cf3JCiJSP2glz8bp52+i5it29FUL8ITxdJSo0duUkVm3nZ8cDI6zag+nSSmzdZ1I9Fw7M7RRPHM2zd5+6RskeqamR5lY3Iv+t8Yo8cRX10IiHNF89b+3vI5ZkIKqytrPfrY45jGVMXA6x/whMh94Ac94qm+Do7P3eT/66a1lX0r+UfV6UnfwHE6cZ1ZFX2AzlmSiYMKmTD3hn1GNyHHuvk3Mneanbk4+x+8SjAXIK354zJ8c1Qgk1iEicDvna2IBd94R4tBWjYZ8xH7avmPlhs0HwbjiNOFDc45UXvwIl+D7w== pandas-dev@python.org'
       sshKeySecureFile: 'pandas_docs_key'
     displayName: 'Install GitHub ssh deployment key'
     condition : |

From 3db9dc308bad04f180950630f5966cbee27916a7 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 1 Jun 2019 17:46:56 +0100
Subject: [PATCH 26/51] CI: Removing doc build in azure (#26609)

---
 azure-pipelines.yml | 60 ---------------------------------------------
 1 file changed, 60 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 0064d0a932960..85325c52e7e6d 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -116,63 +116,3 @@ jobs:
       fi
     displayName: 'Running benchmarks'
     condition: true
-
-- job: 'Docs'
-  pool:
-    vmImage: ubuntu-16.04
-  timeoutInMinutes: 90
-  steps:
-  - script: |
-      echo '##vso[task.setvariable variable=CONDA_ENV]pandas-dev'
-      echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
-    displayName: 'Setting environment variables'
-
-  - script: |
-      export PATH=$HOME/miniconda3/bin:$PATH
-      sudo apt-get install -y libc6-dev-i386
-      ci/setup_env.sh
-    displayName: 'Setup environment and build pandas'
-
-  - script: |
-      export PATH=$HOME/miniconda3/bin:$PATH
-      source activate pandas-dev
-      doc/make.py
-    displayName: 'Build documentation'
-
-  - script: |
-      cd doc/build/html
-      git init
-      touch .nojekyll
-      git add --all .
-      git config user.email "pandas-dev@python.org"
-      git config user.name "pandas-docs-bot"
-      git commit -m "pandas documentation in master"
-    displayName: 'Create git repo for docs build'
-    condition : |
-      and(not(eq(variables['Build.Reason'], 'PullRequest')),
-          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
-
-  # This task to work requires next steps:
-  # 1. Got to "Library > Secure files" in the azure-pipelines dashboard: https://dev.azure.com/pandas-dev/pandas/_library?itemType=SecureFiles
-  # 2. Click on "+ Secure file"
-  # 3. Upload the private key (the name of the file must match with the specified in "sshKeySecureFile" input below, "pandas_docs_key")
-  # 4. Click on file name after it is created, tick the box "Authorize for use in all pipelines" and save
-  # 5. The public key specified in "sshPublicKey" is the pair of the uploaded private key, and needs to be specified as a deploy key of the repo where the docs will be pushed: https://github.com/pandas-dev/pandas-dev.github.io/settings/keys
-  - task: InstallSSHKey@0
-    inputs:
-      hostName: 'github.com'
-      sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDfF0BSddjvZx/z4/2TXsy+RxjwBpgdHkmjtL9WfRHxEw1TchBuEj5vWWcxBNTK+9oVzD/Lca89HAXXrklsfkdAK3LvLfGCxTGpP8t/3CxxFdnSg3EN+4cDGKuDlbeTyzdASdPBOq0GTZjUFekl9ZfFrFJ9SoPpqZ4mmPRPapPrkwTs4xIrBly0eWcISFYgZcG58m65+XQpyyBMbpsO5ZHBBxE8kkWN0yY+gKt5PeeIO82xE+7F+3Qhlc67fKfB4FEitQ5SKrbKyGNNdFtEGcC6CEtD0B0vJxssltQEl5dDWPJP6tH4cIm/J6m28mpSYc5fEBhr75jE4Ybw6NtGgBZEdtFRFlnb91mSiVSjM/HEkV7/xYai+H1Gk+I/8tcl8cf3JCiJSP2glz8bp52+i5it29FUL8ITxdJSo0duUkVm3nZ8cDI6zag+nSSmzdZ1I9Fw7M7RRPHM2zd5+6RskeqamR5lY3Iv+t8Yo8cRX10IiHNF89b+3vI5ZkIKqytrPfrY45jGVMXA6x/whMh94Ac94qm+Do7P3eT/66a1lX0r+UfV6UnfwHE6cZ1ZFX2AzlmSiYMKmTD3hn1GNyHHuvk3Mneanbk4+x+8SjAXIK354zJ8c1Qgk1iEicDvna2IBd94R4tBWjYZ8xH7avmPlhs0HwbjiNOFDc45UXvwIl+D7w== pandas-dev@python.org'
-      sshKeySecureFile: 'pandas_docs_key'
-    displayName: 'Install GitHub ssh deployment key'
-    condition : |
-      and(not(eq(variables['Build.Reason'], 'PullRequest')),
-          eq(variables['Build.SourceBranch'], 'refs/heads/master'))
-
-  - script: |
-      cd doc/build/html
-      git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git
-      git push origin master -f
-    displayName: 'Publish docs to GitHub pages'
-    condition : |
-      and(not(eq(variables['Build.Reason'], 'PullRequest')),
-          eq(variables['Build.SourceBranch'], 'refs/heads/master'))

From 437efa6e974e506c7cc5f142d5186bf6a7f5ce13 Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Sat, 1 Jun 2019 17:03:58 +0000
Subject: [PATCH 27/51] PERF: don't call RangeIndex._data unnecessarily
 (#26565)

---
 asv_bench/benchmarks/index_object.py |  6 +++++
 doc/source/whatsnew/v0.25.0.rst      |  1 +
 pandas/core/indexes/range.py         | 32 +++++++++++++++++++++++--
 pandas/tests/indexes/test_range.py   | 36 ++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
index 896a20bae2069..78fe2ae966896 100644
--- a/asv_bench/benchmarks/index_object.py
+++ b/asv_bench/benchmarks/index_object.py
@@ -94,6 +94,12 @@ def time_min(self):
     def time_min_trivial(self):
         self.idx_inc.min()
 
+    def time_get_loc_inc(self):
+        self.idx_inc.get_loc(900000)
+
+    def time_get_loc_dec(self):
+        self.idx_dec.get_loc(100000)
+
 
 class IndexAppend:
 
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 87a8010998bd0..1619ba1a45739 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -493,6 +493,7 @@ Performance Improvements
 - Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is
   int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
 - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
+- Improved performance when slicing :class:`RangeIndex` (:issue:`26565`)
 - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
 - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
 - Improved performance of :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.is_monotonic_increasing` and :meth:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index ea14a4c789cd3..9401de3346ccd 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -22,6 +22,8 @@
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.indexes.numeric import Int64Index
 
+from pandas.io.formats.printing import pprint_thing
+
 
 class RangeIndex(Int64Index):
     """
@@ -64,6 +66,8 @@ class RangeIndex(Int64Index):
     _typ = 'rangeindex'
     _engine_type = libindex.Int64Engine
 
+    # check whether self._data has benn called
+    _cached_data = None  # type: np.ndarray
     # --------------------------------------------------------------------
     # Constructors
 
@@ -164,6 +168,8 @@ def _simple_new(cls, start, stop=None, step=None, name=None,
         for k, v in kwargs.items():
             setattr(result, k, v)
 
+        result._range = range(result._start, result._stop, result._step)
+
         result._reset_identity()
         return result
 
@@ -180,9 +186,19 @@ def _constructor(self):
         """ return the class to use for construction """
         return Int64Index
 
-    @cache_readonly
+    @property
     def _data(self):
-        return np.arange(self._start, self._stop, self._step, dtype=np.int64)
+        """
+        An int array that for performance reasons is created only when needed.
+
+        The constructed array is saved in ``_cached_data``. This allows us to
+        check if the array has been created without accessing ``_data`` and
+        triggering the construction.
+        """
+        if self._cached_data is None:
+            self._cached_data = np.arange(self._start, self._stop, self._step,
+                                          dtype=np.int64)
+        return self._cached_data
 
     @cache_readonly
     def _int64index(self):
@@ -215,6 +231,9 @@ def _format_data(self, name=None):
         # we are formatting thru the attributes
         return None
 
+    def _format_with_header(self, header, na_rep='NaN', **kwargs):
+        return header + list(map(pprint_thing, self._range))
+
     # --------------------------------------------------------------------
     @property
     def start(self):
@@ -296,6 +315,15 @@ def is_monotonic_decreasing(self):
     def has_duplicates(self):
         return False
 
+    @Appender(_index_shared_docs['get_loc'])
+    def get_loc(self, key, method=None, tolerance=None):
+        if is_integer(key) and method is None and tolerance is None:
+            try:
+                return self._range.index(key)
+            except ValueError:
+                raise KeyError(key)
+        return super().get_loc(key, method=method, tolerance=tolerance)
+
     def tolist(self):
         return list(range(self._start, self._stop, self._step))
 
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index b2c330015081c..477a4e527f278 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -241,6 +241,42 @@ def test_view(self):
     def test_dtype(self):
         assert self.index.dtype == np.int64
 
+    def test_cached_data(self):
+        # GH 26565
+        # Calling RangeIndex._data caches an int64 array of the same length at
+        # self._cached_data. This tests whether _cached_data has been set.
+        idx = RangeIndex(0, 100, 10)
+
+        assert idx._cached_data is None
+
+        repr(idx)
+        assert idx._cached_data is None
+
+        str(idx)
+        assert idx._cached_data is None
+
+        idx.get_loc(20)
+        assert idx._cached_data is None
+
+        df = pd.DataFrame({'a': range(10)}, index=idx)
+
+        df.loc[50]
+        assert idx._cached_data is None
+
+        with pytest.raises(KeyError):
+            df.loc[51]
+        assert idx._cached_data is None
+
+        df.loc[10:50]
+        assert idx._cached_data is None
+
+        df.iloc[5:10]
+        assert idx._cached_data is None
+
+        # actually calling data._data
+        assert isinstance(idx._data, np.ndarray)
+        assert isinstance(idx._cached_data, np.ndarray)
+
     def test_is_monotonic(self):
         assert self.index.is_monotonic is True
         assert self.index.is_monotonic_increasing is True

From addc5fcd95064b765d4ee4260304d44822fdee3b Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sun, 2 Jun 2019 12:47:34 +0100
Subject: [PATCH 28/51] CI: pin pytest version on Python 3.5 (#26619)

---
 ci/deps/azure-35-compat.yaml | 2 +-
 ci/deps/azure-macos-35.yaml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/deps/azure-35-compat.yaml b/ci/deps/azure-35-compat.yaml
index d0a48bd3f8b27..e55a4fbdf3fa9 100644
--- a/ci/deps/azure-35-compat.yaml
+++ b/ci/deps/azure-35-compat.yaml
@@ -26,5 +26,5 @@ dependencies:
   - pip
   - pip:
     # for python 3.5, pytest>=4.0.2 is not available in conda
-    - pytest>=4.0.2
+    - pytest==4.5.0
     - html5lib==1.0b2
diff --git a/ci/deps/azure-macos-35.yaml b/ci/deps/azure-macos-35.yaml
index 591266348a5f1..00c2051f29760 100644
--- a/ci/deps/azure-macos-35.yaml
+++ b/ci/deps/azure-macos-35.yaml
@@ -25,7 +25,7 @@ dependencies:
   - pip:
     - python-dateutil==2.5.3
     # universal
-    - pytest>=4.0.2
+    - pytest==4.5.0
     - pytest-xdist
     - pytest-mock
     - hypothesis>=3.58.0

From 33552913376cf23a890d482d8661b3140062496c Mon Sep 17 00:00:00 2001
From: Chuanzhu Xu <chuanzhu.xu@gmail.com>
Date: Sun, 2 Jun 2019 17:09:44 -0400
Subject: [PATCH 29/51] remove outdated gtk package from code (#26590)

---
 doc/source/install.rst            |  1 -
 doc/source/user_guide/io.rst      |  2 +-
 doc/source/whatsnew/v0.25.0.rst   |  1 +
 pandas/core/generic.py            |  2 +-
 pandas/io/clipboard/__init__.py   | 21 +++++----------------
 pandas/io/clipboard/clipboards.py | 16 ----------------
 pandas/io/clipboards.py           |  2 +-
 7 files changed, 9 insertions(+), 36 deletions(-)

diff --git a/doc/source/install.rst b/doc/source/install.rst
index b3b5945cc515e..98443ede2e965 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -281,7 +281,6 @@ Optional Dependencies
   `qtpy  <https://github.com/spyder-ide/qtpy>`__ (requires PyQt or PySide),
   `PyQt5 <https://www.riverbankcomputing.com/software/pyqt/download5>`__,
   `PyQt4 <http://www.riverbankcomputing.com/software/pyqt/download>`__,
-  `pygtk <http://www.pygtk.org/>`__,
   `xsel <http://www.vergenet.net/~conrad/software/xsel/>`__, or
   `xclip <https://github.com/astrand/xclip/>`__: necessary to use
   :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 88d8ccbbe036e..4aacb6fa1e278 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -3272,7 +3272,7 @@ We can see that we got the same content back, which we had earlier written to th
 
 .. note::
 
-   You may need to install xclip or xsel (with gtk, PyQt5, PyQt4 or qtpy) on Linux to use these methods.
+   You may need to install xclip or xsel (with PyQt5, PyQt4 or qtpy) on Linux to use these methods.
 
 .. _io.pickle:
 
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 1619ba1a45739..f122c73325b7d 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -434,6 +434,7 @@ Other API Changes
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
 - The ``arg`` argument in :meth:`pandas.core.window._Window.aggregate` has been renamed to ``func`` (:issue:`26372`)
 - Most Pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`)
+- Removed support of gtk package for clipboards (:issue:`26563`)
 
 .. _whatsnew_0250.deprecations:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7ca2c52e18c41..33b0035e74913 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2679,7 +2679,7 @@ def to_clipboard(self, excel=True, sep=None, **kwargs):
         -----
         Requirements for your platform.
 
-          - Linux : `xclip`, or `xsel` (with `gtk` or `PyQt4` modules)
+          - Linux : `xclip`, or `xsel` (with `PyQt4` modules)
           - Windows : none
           - OS X : none
 
diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py
index b76a843e3e7f2..2063978c76c5a 100644
--- a/pandas/io/clipboard/__init__.py
+++ b/pandas/io/clipboard/__init__.py
@@ -18,21 +18,19 @@
 On Linux, install xclip or xsel via package manager. For example, in Debian:
 sudo apt-get install xclip
 
-Otherwise on Linux, you will need the gtk, qtpy or PyQt modules installed.
+Otherwise on Linux, you will need the qtpy or PyQt modules installed.
 qtpy also requires a python-qt-bindings module: PyQt4, PyQt5, PySide, PySide2
 
-gtk and PyQt4 modules are not available for Python 3,
-and this module does not work with PyGObject yet.
+This module does not work with PyGObject yet.
 """
 __version__ = '1.5.27'
 
 import platform
 import os
 import subprocess
-from .clipboards import (init_osx_clipboard,
-                         init_gtk_clipboard, init_qt_clipboard,
-                         init_xclip_clipboard, init_xsel_clipboard,
-                         init_klipper_clipboard, init_no_clipboard)
+from .clipboards import (
+    init_osx_clipboard, init_qt_clipboard, init_xclip_clipboard,
+    init_xsel_clipboard, init_klipper_clipboard, init_no_clipboard)
 from .windows import init_windows_clipboard
 
 # `import qtpy` sys.exit()s if DISPLAY is not in the environment.
@@ -60,14 +58,6 @@ def determine_clipboard():
         return init_osx_clipboard()
     if HAS_DISPLAY:
         # Determine which command/module is installed, if any.
-        try:
-            # Check if gtk is installed
-            import gtk  # noqa
-        except ImportError:
-            pass
-        else:
-            return init_gtk_clipboard()
-
         try:
             # qtpy is a small abstraction layer that lets you write
             # applications using a single api call to either PyQt or PySide
@@ -104,7 +94,6 @@ def set_clipboard(clipboard):
     global copy, paste
 
     clipboard_types = {'osx': init_osx_clipboard,
-                       'gtk': init_gtk_clipboard,
                        'qt': init_qt_clipboard,
                        'xclip': init_xclip_clipboard,
                        'xsel': init_xsel_clipboard,
diff --git a/pandas/io/clipboard/clipboards.py b/pandas/io/clipboard/clipboards.py
index 66e2e35bf0c59..52abdeafb5ecc 100644
--- a/pandas/io/clipboard/clipboards.py
+++ b/pandas/io/clipboard/clipboards.py
@@ -22,22 +22,6 @@ def paste_osx():
     return copy_osx, paste_osx
 
 
-def init_gtk_clipboard():
-    import gtk
-
-    def copy_gtk(text):
-        global cb
-        cb = gtk.Clipboard()
-        cb.set_text(text)
-        cb.store()
-
-    def paste_gtk():
-        clipboardContents = gtk.Clipboard().wait_for_text()
-        return clipboardContents
-
-    return copy_gtk, paste_gtk
-
-
 def init_qt_clipboard():
     # $DISPLAY should exist
 
diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
index be1256edf7afe..dc30285895dd5 100644
--- a/pandas/io/clipboards.py
+++ b/pandas/io/clipboards.py
@@ -91,7 +91,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs):  # pragma: no cover
     Notes
     -----
     Requirements for your platform
-      - Linux: xclip, or xsel (with gtk or PyQt4 modules)
+      - Linux: xclip, or xsel (with PyQt4 modules)
       - Windows:
       - OS X:
     """

From efc2adaa3553f647737307aec85399b627002c03 Mon Sep 17 00:00:00 2001
From: iamshwin <23633545+iamshwin@users.noreply.github.com>
Date: Mon, 3 Jun 2019 00:11:48 +0100
Subject: [PATCH 30/51] Tidy documentation about plotting Series histograms
 (#26624)

---
 pandas/plotting/_core.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index fed4b0d90983c..3f6a30c4639bc 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -2477,8 +2477,6 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
         bin edges are calculated and returned. If bins is a sequence, gives
         bin edges, including left edge of first bin and right edge of last
         bin. In this case, bins is returned unmodified.
-    bins : integer, default 10
-        Number of histogram bins to be used
     `**kwds` : keywords
         To be passed to the actual plotting function
 

From 5c6dd43e3e85235f32444df73abb66528336b319 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Mon, 3 Jun 2019 00:13:08 +0100
Subject: [PATCH 31/51] TST/CLN: deduplicate fixture from test_to_latex.py
 (#26603)

---
 pandas/conftest.py                       | 31 ++++++++++++++++++++++++
 pandas/tests/frame/conftest.py           | 29 ----------------------
 pandas/tests/io/formats/test_to_latex.py | 27 +++++++++------------
 3 files changed, 42 insertions(+), 45 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 8f71028f51ab4..09fe8e0829fa1 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -12,6 +12,8 @@
 import pandas.util._test_decorators as td
 
 import pandas as pd
+from pandas import DataFrame
+import pandas.util.testing as tm
 
 hypothesis.settings.register_profile(
     "ci",
@@ -690,3 +692,32 @@ def tick_classes(request):
         normalize=st.booleans(),
         startingMonth=st.integers(min_value=1, max_value=12)
     ))
+
+
+@pytest.fixture
+def float_frame():
+    """
+    Fixture for DataFrame of floats with index of unique strings
+
+    Columns are ['A', 'B', 'C', 'D'].
+
+                       A         B         C         D
+    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
+    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
+    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
+    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
+    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
+    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
+    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
+    ...              ...       ...       ...       ...
+    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
+    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
+    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
+    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
+    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
+    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
+    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426
+
+    [30 rows x 4 columns]
+    """
+    return DataFrame(tm.getSeriesData())
diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
index c451cd58f1497..d8a590bc492a4 100644
--- a/pandas/tests/frame/conftest.py
+++ b/pandas/tests/frame/conftest.py
@@ -5,35 +5,6 @@
 import pandas.util.testing as tm
 
 
-@pytest.fixture
-def float_frame():
-    """
-    Fixture for DataFrame of floats with index of unique strings
-
-    Columns are ['A', 'B', 'C', 'D'].
-
-                       A         B         C         D
-    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
-    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
-    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
-    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
-    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
-    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
-    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
-    ...              ...       ...       ...       ...
-    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
-    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
-    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
-    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
-    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
-    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
-    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426
-
-    [30 rows x 4 columns]
-    """
-    return DataFrame(tm.getSeriesData())
-
-
 @pytest.fixture
 def float_frame_with_na():
     """
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 5a6511fbd20ee..b9f28ec36d021 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -8,19 +8,14 @@
 from pandas.util import testing as tm
 
 
-@pytest.fixture
-def frame():
-    return DataFrame(tm.getSeriesData())
-
-
 class TestToLatex:
 
-    def test_to_latex_filename(self, frame):
+    def test_to_latex_filename(self, float_frame):
         with tm.ensure_clean('test.tex') as path:
-            frame.to_latex(path)
+            float_frame.to_latex(path)
 
             with open(path, 'r') as f:
-                assert frame.to_latex() == f.read()
+                assert float_frame.to_latex() == f.read()
 
         # test with utf-8 and encoding option (GH 7061)
         df = DataFrame([['au\xdfgangen']])
@@ -35,9 +30,9 @@ def test_to_latex_filename(self, frame):
             with codecs.open(path, 'r', encoding='utf-8') as f:
                 assert df.to_latex() == f.read()
 
-    def test_to_latex(self, frame):
+    def test_to_latex(self, float_frame):
         # it works!
-        frame.to_latex()
+        float_frame.to_latex()
 
         df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
         withindex_result = df.to_latex()
@@ -66,9 +61,9 @@ def test_to_latex(self, frame):
 
         assert withoutindex_result == withoutindex_expected
 
-    def test_to_latex_format(self, frame):
+    def test_to_latex_format(self, float_frame):
         # GH Bug #9402
-        frame.to_latex(column_format='ccc')
+        float_frame.to_latex(column_format='ccc')
 
         df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
         withindex_result = df.to_latex(column_format='ccc')
@@ -389,8 +384,8 @@ def test_to_latex_special_escape(self):
 """
         assert escaped_result == escaped_expected
 
-    def test_to_latex_longtable(self, frame):
-        frame.to_latex(longtable=True)
+    def test_to_latex_longtable(self, float_frame):
+        float_frame.to_latex(longtable=True)
 
         df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
         withindex_result = df.to_latex(longtable=True)
@@ -535,9 +530,9 @@ def test_to_latex_specified_header(self):
         with pytest.raises(ValueError):
             df.to_latex(header=['A'])
 
-    def test_to_latex_decimal(self, frame):
+    def test_to_latex_decimal(self, float_frame):
         # GH 12031
-        frame.to_latex()
+        float_frame.to_latex()
 
         df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']})
         withindex_result = df.to_latex(decimal=',')

From 4f332f6f4b27111c9ab7ba686b3bc51db2e6f7bc Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Sun, 2 Jun 2019 16:20:15 -0700
Subject: [PATCH 32/51] CLN: Remove convert_objects (#26612)

---
 doc/source/reference/frame.rst        |   1 -
 doc/source/reference/series.rst       |   1 -
 doc/source/whatsnew/v0.25.0.rst       |   1 +
 pandas/core/generic.py                |  48 +---------
 pandas/tests/series/test_internals.py | 125 --------------------------
 5 files changed, 2 insertions(+), 174 deletions(-)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index dfa475684c834..b4fb85c028b3e 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -48,7 +48,6 @@ Conversion
    :toctree: api/
 
    DataFrame.astype
-   DataFrame.convert_objects
    DataFrame.infer_objects
    DataFrame.copy
    DataFrame.isna
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index b406893e3414a..8fccdea979602 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -56,7 +56,6 @@ Conversion
 
    Series.astype
    Series.infer_objects
-   Series.convert_objects
    Series.copy
    Series.bool
    Series.to_numpy
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index f122c73325b7d..1cbec223008c4 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -483,6 +483,7 @@ Removal of prior version deprecations/changes
 - Removed the previously deprecated ``TimeGrouper`` (:issue:`16942`)
 - Removed the previously deprecated ``parse_cols`` keyword in :func:`read_excel` (:issue:`16488`)
 - Removed the previously deprecated ``pd.options.html.border`` (:issue:`16970`)
+- Removed the previously deprecated ``convert_objects`` (:issue:`11221`)
 
 .. _whatsnew_0250.performance:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 33b0035e74913..2428bbad7003b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -113,7 +113,7 @@ class NDFrame(PandasObject, SelectionMixin):
     _internal_names_set = set(_internal_names)  # type: Set[str]
     _accessors = set()  # type: Set[str]
     _deprecations = frozenset([
-        'as_blocks', 'blocks', 'convert_objects', 'is_copy'
+        'as_blocks', 'blocks', 'is_copy'
     ])  # type: FrozenSet[str]
     _metadata = []  # type: List[str]
     _is_copy = None
@@ -5913,52 +5913,6 @@ def _convert(self, datetime=False, numeric=False, timedelta=False,
                                timedelta=timedelta, coerce=coerce,
                                copy=copy)).__finalize__(self)
 
-    def convert_objects(self, convert_dates=True, convert_numeric=False,
-                        convert_timedeltas=True, copy=True):
-        """
-        Attempt to infer better dtype for object columns.
-
-        .. deprecated:: 0.21.0
-
-        Parameters
-        ----------
-        convert_dates : boolean, default True
-            If True, convert to date where possible. If 'coerce', force
-            conversion, with unconvertible values becoming NaT.
-        convert_numeric : boolean, default False
-            If True, attempt to coerce to numbers (including strings), with
-            unconvertible values becoming NaN.
-        convert_timedeltas : boolean, default True
-            If True, convert to timedelta where possible. If 'coerce', force
-            conversion, with unconvertible values becoming NaT.
-        copy : boolean, default True
-            If True, return a copy even if no copy is necessary (e.g. no
-            conversion was done). Note: This is meant for internal use, and
-            should not be confused with inplace.
-
-        Returns
-        -------
-        converted : same as input object
-
-        See Also
-        --------
-        to_datetime : Convert argument to datetime.
-        to_timedelta : Convert argument to timedelta.
-        to_numeric : Convert argument to numeric type.
-        """
-        msg = ("convert_objects is deprecated.  To re-infer data dtypes for "
-               "object columns, use {klass}.infer_objects()\nFor all "
-               "other conversions use the data-type specific converters "
-               "pd.to_datetime, pd.to_timedelta and pd.to_numeric."
-               ).format(klass=self.__class__.__name__)
-        warnings.warn(msg, FutureWarning, stacklevel=2)
-
-        return self._constructor(
-            self._data.convert(convert_dates=convert_dates,
-                               convert_numeric=convert_numeric,
-                               convert_timedeltas=convert_timedeltas,
-                               copy=copy)).__finalize__(self)
-
     def infer_objects(self):
         """
         Attempt to infer better dtypes for object columns.
diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py
index f6f4a2db359f7..29846f10dae33 100644
--- a/pandas/tests/series/test_internals.py
+++ b/pandas/tests/series/test_internals.py
@@ -12,131 +12,6 @@
 
 class TestSeriesInternals:
 
-    def test_convert_objects(self):
-
-        s = Series([1., 2, 3], index=['a', 'b', 'c'])
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates=False,
-                                       convert_numeric=True)
-        assert_series_equal(result, s)
-
-        # force numeric conversion
-        r = s.copy().astype('O')
-        r['a'] = '1'
-        with tm.assert_produces_warning(FutureWarning):
-            result = r.convert_objects(convert_dates=False,
-                                       convert_numeric=True)
-        assert_series_equal(result, s)
-
-        r = s.copy().astype('O')
-        r['a'] = '1.'
-        with tm.assert_produces_warning(FutureWarning):
-            result = r.convert_objects(convert_dates=False,
-                                       convert_numeric=True)
-        assert_series_equal(result, s)
-
-        r = s.copy().astype('O')
-        r['a'] = 'garbled'
-        expected = s.copy()
-        expected['a'] = np.nan
-        with tm.assert_produces_warning(FutureWarning):
-            result = r.convert_objects(convert_dates=False,
-                                       convert_numeric=True)
-        assert_series_equal(result, expected)
-
-        # GH 4119, not converting a mixed type (e.g.floats and object)
-        s = Series([1, 'na', 3, 4])
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_numeric=True)
-        expected = Series([1, np.nan, 3, 4])
-        assert_series_equal(result, expected)
-
-        s = Series([1, '', 3, 4])
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_numeric=True)
-        expected = Series([1, np.nan, 3, 4])
-        assert_series_equal(result, expected)
-
-        # dates
-        s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
-                    datetime(2001, 1, 3, 0, 0)])
-        s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
-                     datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
-                     Timestamp('20010104'), '20010105'],
-                    dtype='O')
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates=True,
-                                       convert_numeric=False)
-        expected = Series([Timestamp('20010101'), Timestamp('20010102'),
-                           Timestamp('20010103')], dtype='M8[ns]')
-        assert_series_equal(result, expected)
-
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates='coerce',
-                                       convert_numeric=False)
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates='coerce',
-                                       convert_numeric=True)
-        assert_series_equal(result, expected)
-
-        expected = Series([Timestamp('20010101'), Timestamp('20010102'),
-                           Timestamp('20010103'),
-                           NaT, NaT, NaT, Timestamp('20010104'),
-                           Timestamp('20010105')], dtype='M8[ns]')
-        with tm.assert_produces_warning(FutureWarning):
-            result = s2.convert_objects(convert_dates='coerce',
-                                        convert_numeric=False)
-        assert_series_equal(result, expected)
-        with tm.assert_produces_warning(FutureWarning):
-            result = s2.convert_objects(convert_dates='coerce',
-                                        convert_numeric=True)
-        assert_series_equal(result, expected)
-
-        # preserver all-nans (if convert_dates='coerce')
-        s = Series(['foo', 'bar', 1, 1.0], dtype='O')
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates='coerce',
-                                       convert_numeric=False)
-        expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
-        assert_series_equal(result, expected)
-
-        # preserver if non-object
-        s = Series([1], dtype='float32')
-        with tm.assert_produces_warning(FutureWarning):
-            result = s.convert_objects(convert_dates='coerce',
-                                       convert_numeric=False)
-        assert_series_equal(result, s)
-
-        # r = s.copy()
-        # r[0] = np.nan
-        # result = r.convert_objects(convert_dates=True,convert_numeric=False)
-        # assert result.dtype == 'M8[ns]'
-
-        # dateutil parses some single letters into today's value as a date
-        for x in 'abcdefghijklmnopqrstuvwxyz':
-            s = Series([x])
-            with tm.assert_produces_warning(FutureWarning):
-                result = s.convert_objects(convert_dates='coerce')
-            assert_series_equal(result, s)
-            s = Series([x.upper()])
-            with tm.assert_produces_warning(FutureWarning):
-                result = s.convert_objects(convert_dates='coerce')
-            assert_series_equal(result, s)
-
-    def test_convert_objects_preserve_bool(self):
-        s = Series([1, True, 3, 5], dtype=object)
-        with tm.assert_produces_warning(FutureWarning):
-            r = s.convert_objects(convert_numeric=True)
-        e = Series([1, 1, 3, 5], dtype='i8')
-        tm.assert_series_equal(r, e)
-
-    def test_convert_objects_preserve_all_bool(self):
-        s = Series([False, True, False, False], dtype=object)
-        with tm.assert_produces_warning(FutureWarning):
-            r = s.convert_objects(convert_numeric=True)
-        e = Series([False, True, False, False], dtype=bool)
-        tm.assert_series_equal(r, e)
-
     # GH 10265
     def test_convert(self):
         # Tests: All to nans, coerce, true

From 0e3bf7f3478ffb85d64e795d72888bdb9bd9cb4b Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Mon, 3 Jun 2019 01:34:27 +0200
Subject: [PATCH 33/51] Clean up ufuncs post numpy bump (#26606)

---
 pandas/core/arrays/sparse.py |  9 ---------
 pandas/core/sparse/frame.py  |  6 ------
 pandas/core/sparse/series.py | 20 --------------------
 3 files changed, 35 deletions(-)

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
index ecc06db2bd07b..926ed6a829a6d 100644
--- a/pandas/core/arrays/sparse.py
+++ b/pandas/core/arrays/sparse.py
@@ -573,7 +573,6 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin):
         Whether to explicitly copy the incoming `data` array.
     """
 
-    __array_priority__ = 15
     _pandas_ftype = 'sparse'
     _subtyp = 'sparse_array'  # register ABCSparseArray
 
@@ -1639,14 +1638,6 @@ def T(self):
     # Ufuncs
     # ------------------------------------------------------------------------
 
-    def __array_wrap__(self, array, context=None):
-        from pandas.core.dtypes.generic import ABCSparseSeries
-
-        ufunc, inputs, _ = context
-        inputs = tuple(x.to_dense() if isinstance(x, ABCSparseSeries) else x
-                       for x in inputs)
-        return self.__array_ufunc__(ufunc, '__call__', *inputs)
-
     _HANDLED_TYPES = (np.ndarray, numbers.Number)
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index bf1cec7571f4d..0320da6d9a48d 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -242,12 +242,6 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
     def to_coo(self):
         return SparseFrameAccessor(self).to_coo()
 
-    def __array_wrap__(self, result):
-        return self._constructor(
-            result, index=self.index, columns=self.columns,
-            default_kind=self._default_kind,
-            default_fill_value=self._default_fill_value).__finalize__(self)
-
     def __getstate__(self):
         # pickling
         return dict(_typ=self._typ, _subtyp=self._subtyp, _data=self._data,
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index 3f95acdbfb42c..3814d8bb66635 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -124,26 +124,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
                                  fill_value=result.fill_value,
                                  copy=False).__finalize__(self)
 
-    def __array_wrap__(self, result, context=None):
-        """
-        Gets called prior to a ufunc (and after)
-
-        See SparseArray.__array_wrap__ for detail.
-        """
-        result = self.values.__array_wrap__(result, context=context)
-        return self._constructor(result, index=self.index,
-                                 sparse_index=self.sp_index,
-                                 fill_value=result.fill_value,
-                                 copy=False).__finalize__(self)
-
-    def __array_finalize__(self, obj):
-        """
-        Gets called after any ufunc or other array operations, necessary
-        to pass on the index.
-        """
-        self.name = getattr(obj, 'name', None)
-        self.fill_value = getattr(obj, 'fill_value', None)
-
     # unary ops
     # TODO: See if this can be shared
     def __pos__(self):

From 635458029e11ff6d94e8132577075269fb79832c Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sun, 2 Jun 2019 18:42:54 -0500
Subject: [PATCH 34/51] Add more specific error message when user passes
 incorrect matrix format to from_coo (#26584)

---
 doc/source/whatsnew/v0.25.0.rst             |  2 +-
 pandas/core/sparse/scipy_sparse.py          | 11 ++++++++++-
 pandas/tests/arrays/sparse/test_accessor.py | 10 ++++++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 1cbec223008c4..461c883f542ab 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -694,7 +694,7 @@ Sparse
 - Significant speedup in :class:`SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`)
 - Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`)
 - Bug in :class:`SparseDataFrame` when adding a column in which the length of values does not match length of index, ``AssertionError`` is raised instead of raising ``ValueError`` (:issue:`25484`)
-
+- Introduce a better error message in :meth:`Series.sparse.from_coo` so it returns a ``TypeError`` for inputs that are not coo matrices (:issue:`26554`)
 
 Other
 ^^^^^
diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py
index 7630983421ff9..0dd8958e93c13 100644
--- a/pandas/core/sparse/scipy_sparse.py
+++ b/pandas/core/sparse/scipy_sparse.py
@@ -130,10 +130,19 @@ def _coo_to_sparse_series(A, dense_index: bool = False,
     Returns
     -------
     Series or SparseSeries
+
+    Raises
+    ------
+    TypeError if A is not a coo_matrix
+
     """
     from pandas import SparseDtype
 
-    s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
+    try:
+        s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
+    except AttributeError:
+        raise TypeError('Expected coo_matrix. Got {} instead.'
+                        .format(type(A).__name__))
     s = s.sort_index()
     if sparse_series:
         # TODO(SparseSeries): remove this and the sparse_series keyword.
diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
index 370d222c1ab4e..d0a188a8aff3c 100644
--- a/pandas/tests/arrays/sparse/test_accessor.py
+++ b/pandas/tests/arrays/sparse/test_accessor.py
@@ -119,3 +119,13 @@ def test_series_from_coo(self, dtype, dense_index):
             )
 
         tm.assert_series_equal(result, expected)
+
+    @td.skip_if_no_scipy
+    def test_series_from_coo_incorrect_format_raises(self):
+        # gh-26554
+        import scipy.sparse
+        m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]]))
+        with pytest.raises(TypeError,
+                           match='Expected coo_matrix. Got csr_matrix instead.'
+                           ):
+            pd.Series.sparse.from_coo(m)

From 23b0788118bd95bdf1adb8f86d667fa54a033423 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 3 Jun 2019 07:35:25 +0200
Subject: [PATCH 35/51] DOC/CI: restore travis CI doc build environment
 (#26621)

---
 .travis.yml                |  4 ++--
 ci/deps/travis-36-doc.yaml | 46 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 ci/deps/travis-36-doc.yaml

diff --git a/.travis.yml b/.travis.yml
index 90dd904e6cb1e..ce8817133a477 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -51,14 +51,14 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="3.6, doc" ENV_FILE="environment.yml" DOC=true
+        - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
     allow_failures:
       - dist: trusty
         env:
           - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
       - dist: trusty
         env:
-          - JOB="3.6, doc" ENV_FILE="environment.yml" DOC=true
+          - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
 
 before_install:
   - echo "before_install"
diff --git a/ci/deps/travis-36-doc.yaml b/ci/deps/travis-36-doc.yaml
new file mode 100644
index 0000000000000..9d6cbd82fdc05
--- /dev/null
+++ b/ci/deps/travis-36-doc.yaml
@@ -0,0 +1,46 @@
+name: pandas-dev
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - beautifulsoup4
+  - bottleneck
+  - cython>=0.28.2
+  - fastparquet>=0.2.1
+  - gitpython
+  - html5lib
+  - hypothesis>=3.58.0
+  - ipykernel
+  - ipython
+  - ipywidgets
+  - lxml
+  - matplotlib
+  - nbconvert>=5.4.1
+  - nbformat
+  - nbsphinx
+  - notebook>=5.7.5
+  - numexpr
+  - numpy
+  - numpydoc
+  - openpyxl
+  - pandoc
+  - pyarrow
+  - pyqt
+  - pytables
+  - python-dateutil
+  - python-snappy
+  - python=3.6.*
+  - pytz
+  - scipy
+  - seaborn
+  - sphinx
+  - sqlalchemy
+  - statsmodels
+  - xarray
+  - xlrd
+  - xlsxwriter
+  - xlwt
+  # universal
+  - pytest>=4.0.2
+  - pytest-xdist
+  - isort

From 8d124ea4c5200f218db7cea8e3ff504b0045a4e6 Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Mon, 3 Jun 2019 13:56:29 +0200
Subject: [PATCH 36/51] TST/API: Forbid str-accessor for 1-level MultiIndex
 (#26608)

---
 doc/source/whatsnew/v0.25.0.rst | 1 +
 pandas/tests/test_strings.py    | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 461c883f542ab..0e8cd95084a8d 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -434,6 +434,7 @@ Other API Changes
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
 - The ``arg`` argument in :meth:`pandas.core.window._Window.aggregate` has been renamed to ``func`` (:issue:`26372`)
 - Most Pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`)
+- The `.str`-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`)
 - Removed support of gtk package for clipboards (:issue:`26563`)
 
 .. _whatsnew_0250.deprecations:
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 1ba0ef3918fb7..a1d522930e9aa 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -169,6 +169,14 @@ def test_api(self):
         assert Series.str is strings.StringMethods
         assert isinstance(Series(['']).str, strings.StringMethods)
 
+    def test_api_mi_raises(self):
+        # GH 23679
+        mi = MultiIndex.from_arrays([['a', 'b', 'c']])
+        with pytest.raises(AttributeError, match='Can only use .str accessor '
+                           'with Index, not MultiIndex'):
+            mi.str
+        assert not hasattr(mi, 'str')
+
     @pytest.mark.parametrize('dtype', [object, 'category'])
     @pytest.mark.parametrize('box', [Series, Index])
     def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):

From 101370645d13e1d0f256f367f4ef56a8329b56b6 Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Mon, 3 Jun 2019 22:17:40 +0000
Subject: [PATCH 37/51] Minor doc cleanup because of Panel removal (#26638)

---
 doc/source/getting_started/basics.rst | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
index 80e334054a986..5ec0094de0a91 100644
--- a/doc/source/getting_started/basics.rst
+++ b/doc/source/getting_started/basics.rst
@@ -1455,9 +1455,8 @@ Iteration
 
 The behavior of basic iteration over pandas objects depends on the type.
 When iterating over a Series, it is regarded as array-like, and basic iteration
-produces the values. Other data structures, like DataFrame,
-follow the dict-like convention of iterating over the "keys" of the
-objects.
+produces the values. DataFrames follow the dict-like convention of iterating
+over the "keys" of the objects.
 
 In short, basic iteration (``for i in object``) produces:
 
@@ -1537,9 +1536,9 @@ For example:
 
 .. ipython:: python
 
-   for item, frame in df.iteritems():
-       print(item)
-       print(frame)
+   for label, ser in df.iteritems():
+       print(label)
+       print(ser)
 
 .. _basics.iterrows:
 

From 454b8c5cdcea0cbba981d607293b990cc704f3a1 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Mon, 3 Jun 2019 19:23:49 -0600
Subject: [PATCH 38/51] DOC: Small whatsnew cleanups (#26643)

---
 doc/source/whatsnew/v0.25.0.rst | 65 +++++++++++++++++----------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 0e8cd95084a8d..267e34efc946f 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -72,7 +72,7 @@ Other Enhancements
 - :meth:`DataFrame.pivot_table` now accepts an ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`)
 - ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`)
 - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
-- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
+- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behavior of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
 - :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`)
 - :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`)
 - :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
@@ -123,11 +123,11 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
 .. _whatsnew_0250.api_breaking.multi_indexing:
 
 
-MultiIndex constructed from levels and codes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+``MultiIndex`` constructed from levels and codes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Constructing a :class:`MultiIndex` with NaN levels or codes value < -1 was allowed previously.
-Now, construction with codes value < -1 is not allowed and NaN levels' corresponding codes
+Constructing a :class:`MultiIndex` with ``NaN`` levels or codes value < -1 was allowed previously.
+Now, construction with codes value < -1 is not allowed and ``NaN`` levels' corresponding codes
 would be reassigned as -1. (:issue:`19387`)
 
 .. ipython:: python
@@ -157,8 +157,8 @@ would be reassigned as -1. (:issue:`19387`)
 
 .. _whatsnew_0250.api_breaking.groupby_apply_first_group_once:
 
-GroupBy.apply on ``DataFrame`` evaluates first group only once
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+``GroupBy.apply`` on ``DataFrame`` evaluates first group only once
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The implementation of :meth:`DataFrameGroupBy.apply() <pandas.core.groupby.DataFrameGroupBy.apply>`
 previously evaluated the supplied function consistently twice on the first group
@@ -176,7 +176,7 @@ Now every group is evaluated only a single time.
         print(group.name)
         return group
 
-*Previous Behaviour*:
+*Previous Behavior*:
 
 .. code-block:: python
 
@@ -189,7 +189,7 @@ Now every group is evaluated only a single time.
    0  x  1
    1  y  2
 
-*New Behaviour*:
+*New Behavior*:
 
 .. ipython:: python
 
@@ -239,7 +239,7 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
 ``'bytes'``-only data will raise an exception (except for :meth:`Series.str.decode`, :meth:`Series.str.get`,
 :meth:`Series.str.len`, :meth:`Series.str.slice`), see :issue:`23163`, :issue:`23011`, :issue:`23551`.
 
-*Previous Behaviour*:
+*Previous Behavior*:
 
 .. code-block:: python
 
@@ -259,7 +259,7 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
     2    False
     dtype: bool
 
-*New Behaviour*:
+*New Behavior*:
 
 .. ipython:: python
     :okexcept:
@@ -282,6 +282,8 @@ considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`).
 
 *Previous Behavior*:
 
+.. code-block:: python
+
     In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3]))
     ...
     ValueError: can only call with other PeriodIndex-ed objects
@@ -310,7 +312,7 @@ are returned. (:issue:`21521`)
     df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
     df
 
-*Previous Behaviour*:
+*Previous Behavior*:
 
 .. code-block:: python
 
@@ -320,7 +322,7 @@ are returned. (:issue:`21521`)
    0  x  1
    1  y  2
 
-*New Behaviour*:
+*New Behavior*:
 
 .. ipython:: python
 
@@ -355,7 +357,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397
 
     df.describe()
 
-``__str__`` methods now call ``__repr__`` rather than vica-versa
+``__str__`` methods now call ``__repr__`` rather than vice versa
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Pandas has until now mostly defined string representations in a Pandas objects's
@@ -434,7 +436,7 @@ Other API Changes
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
 - The ``arg`` argument in :meth:`pandas.core.window._Window.aggregate` has been renamed to ``func`` (:issue:`26372`)
 - Most Pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`)
-- The `.str`-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`)
+- The ``.str``-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`)
 - Removed support of gtk package for clipboards (:issue:`26563`)
 
 .. _whatsnew_0250.deprecations:
@@ -468,7 +470,7 @@ The memory usage of the two approaches is identical. See :ref:`sparse.migration`
 Other Deprecations
 ^^^^^^^^^^^^^^^^^^
 
-- The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
+- The deprecated ``.ix[]`` indexer now raises a more visible ``FutureWarning`` instead of ``DeprecationWarning`` (:issue:`26438`).
 - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
 - The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
   the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
@@ -499,14 +501,13 @@ Performance Improvements
 - Improved performance when slicing :class:`RangeIndex` (:issue:`26565`)
 - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
 - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
-- Improved performance of :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.is_monotonic_increasing` and :meth:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)
+- Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`)
 - Improved performance of :meth:`DataFrame.to_csv` when writing datetime dtypes (:issue:`25708`)
 - Improved performance of :meth:`read_csv` by much faster parsing of ``MM/YYYY`` and ``DD/MM/YYYY`` datetime formats (:issue:`25922`)
 - Improved performance of nanops for dtypes that cannot store NaNs. Speedup is particularly prominent for :meth:`Series.all` and :meth:`Series.any` (:issue:`25070`)
 - Improved performance of :meth:`Series.map` for dictionary mappers on categorical series by mapping the categories instead of mapping all values (:issue:`23785`)
-- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero
-  and float NaN; by faster checking the string for the possibility of being a date (:issue:`25754`)
-- Improved performance of :meth:`IntervalIndex.is_unique` by removing conversion to `MultiIndex` (:issue:`24813`)
+- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero and float ``NaN``; by faster checking the string for the possibility of being a date (:issue:`25754`)
+- Improved performance of :attr:`IntervalIndex.is_unique` by removing conversion to ``MultiIndex`` (:issue:`24813`)
 
 .. _whatsnew_0250.bug_fixes:
 
@@ -518,7 +519,7 @@ Categorical
 ^^^^^^^^^^^
 
 - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
-- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
+- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in ``True`` (:issue:`26504`)
 -
 
 Datetimelike
@@ -570,7 +571,7 @@ Numeric
 Conversion
 ^^^^^^^^^^
 
-- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the `errors` parameter was ignored. (:issue:`25905`)
+- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the ``errors`` parameter was ignored. (:issue:`25905`)
 -
 -
 
@@ -597,7 +598,7 @@ Indexing
 - Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`).
 - Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
 - Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
-- Allow keyword arguments for callable local reference used in the :method:`DataFrame.query` string (:issue:`26426`)
+- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`)
 
 
 Missing
@@ -620,8 +621,8 @@ I/O
 - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
-- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
-- Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
+- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to :class:`Timestamp`, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
+- Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string ``"nan"`` instead of ``numpy.nan`` (:issue:`25468`)
 - :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AsseertionError`` (:issue:`25608`)
 - Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`)
 - Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`)
@@ -644,7 +645,7 @@ Plotting
 
 - Fixed bug where :class:`api.extensions.ExtensionArray` could not be used in matplotlib plotting (:issue:`25587`)
 - Bug in an error message in :meth:`DataFrame.plot`. Improved the error message if non-numerics are passed to :meth:`DataFrame.plot` (:issue:`25481`)
-- Bug in incorrect ticklabel positions when plotting an index that are non-numeric / non-datetime (:issue:`7612` :issue:`15912` :issue:`22334`)
+- Bug in incorrect ticklabel positions when plotting an index that are non-numeric / non-datetime (:issue:`7612`, :issue:`15912`, :issue:`22334`)
 - Fixed bug causing plots of :class:`PeriodIndex` timeseries to fail if the frequency is a multiple of the frequency rule code (:issue:`14763`)
 -
 -
@@ -655,7 +656,7 @@ Groupby/Resample/Rolling
 
 - Bug in :meth:`pandas.core.resample.Resampler.agg` with a timezone aware index where ``OverflowError`` would raise when passing a list of functions (:issue:`22660`)
 - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`)
-- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
+- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying an aggregation function to timezone aware data (:issue:`23683`)
 - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
 - Bug in :func:`pandas.core.groupby.GroupBy.size` when grouping only NA values (:issue:`23050`)
 - Bug in :func:`Series.groupby` where ``observed`` kwarg was previously ignored (:issue:`24880`)
@@ -663,11 +664,11 @@ Groupby/Resample/Rolling
 - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`)
 - Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`)
 - Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`)
-- Bug in :meth:`pandas.core.window.Rolling.count` and `pandas.core.window.Expanding.count` was previously ignoring the axis keyword (:issue:`13503`)
+- Bug in :meth:`pandas.core.window.Rolling.count` and ``pandas.core.window.Expanding.count`` was previously ignoring the ``axis`` keyword (:issue:`13503`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`)
-- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`)
+- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise a ``ValueError`` (:issue:`26208`)
 - Bug in :meth:`pandas.core.frame.DataFrame.groupby` where passing a :class:`pandas.core.groupby.grouper.Grouper` would return incorrect groups when using the ``.groups`` accessor (:issue:`26326`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`)
 
@@ -682,11 +683,11 @@ Reshaping
 - Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
 - Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)
 - bug in :class:`DataFrame` instantiating with a dict of iterators or generators (e.g. ``pd.DataFrame({'A': reversed(range(3))})``) raised an error (:issue:`26349`).
-- bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`).
+- Bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`).
 - Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`)
 - Bug in :func:`Series.apply` failed when the series is a timezone aware :class:`DatetimeIndex` (:issue:`25959`)
 - Bug in :func:`pandas.cut` where large bins could incorrectly raise an error due to an integer overflow (:issue:`26045`)
-- Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed DataFrame is sorted on all levels with the initial level sorted last (:issue:`26053`)
+- Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`)
 - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`)
 
 Sparse
@@ -702,7 +703,7 @@ Other
 
 - Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`)
 - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`).
-- Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions.
+- Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions (:issue:`26125`)
 
 .. _whatsnew_0.250.contributors:
 

From c07d71d13b21e0b6e22146f0f546f1f8e24a64b3 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Tue, 4 Jun 2019 12:23:42 +0100
Subject: [PATCH 39/51] DOC/CI: Removing Panel specific code from
 validate_docstrings.py (#26627)

---
 scripts/validate_docstrings.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index 63db50db45a7c..64eaf45376b2f 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -539,14 +539,9 @@ def first_line_ends_in_dot(self):
         if self.doc:
             return self.doc.split('\n')[0][-1] == '.'
 
-    @property
-    def deprecated_with_directive(self):
-        return '.. deprecated:: ' in (self.summary + self.extended_summary)
-
     @property
     def deprecated(self):
-        return (self.name.startswith('pandas.Panel')
-                or self.deprecated_with_directive)
+        return '.. deprecated:: ' in (self.summary + self.extended_summary)
 
     @property
     def mentioned_private_classes(self):
@@ -674,7 +669,7 @@ def get_validation_data(doc):
         errs.append(error('GL07',
                           correct_sections=', '.join(correct_order)))
 
-    if (doc.deprecated_with_directive
+    if (doc.deprecated
             and not doc.extended_summary.startswith('.. deprecated:: ')):
         errs.append(error('GL09'))
 
@@ -859,9 +854,9 @@ def validate_all(prefix, ignore_deprecated=False):
 
         seen[shared_code_key] = func_name
 
-    # functions from introspecting Series, DataFrame and Panel
+    # functions from introspecting Series and DataFrame
     api_item_names = set(list(zip(*api_items))[0])
-    for class_ in (pandas.Series, pandas.DataFrame, pandas.Panel):
+    for class_ in (pandas.Series, pandas.DataFrame):
         for member in inspect.getmembers(class_):
             func_name = 'pandas.{}.{}'.format(class_.__name__, member[0])
             if (not member[0].startswith('_')

From e25fd0d8ab10d6cc4dfe0f5808976f7921512c9f Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Tue, 4 Jun 2019 23:59:01 +0000
Subject: [PATCH 40/51] Remove NDFrame.select (#26641)

---
 doc/source/reference/frame.rst                |  1 -
 doc/source/reference/series.rst               |  1 -
 doc/source/whatsnew/v0.25.0.rst               |  1 +
 pandas/core/generic.py                        | 34 ------------------
 .../tests/frame/test_axis_select_reindex.py   | 35 -------------------
 pandas/tests/series/indexing/test_indexing.py | 14 --------
 6 files changed, 1 insertion(+), 85 deletions(-)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index b4fb85c028b3e..7d5cd5d245631 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -204,7 +204,6 @@ Reindexing / Selection / Label manipulation
    DataFrame.rename_axis
    DataFrame.reset_index
    DataFrame.sample
-   DataFrame.select
    DataFrame.set_axis
    DataFrame.set_index
    DataFrame.tail
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index 8fccdea979602..79beeb0022307 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -211,7 +211,6 @@ Reindexing / Selection / Label manipulation
    Series.rename_axis
    Series.reset_index
    Series.sample
-   Series.select
    Series.set_axis
    Series.take
    Series.tail
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 267e34efc946f..4e8af90b85f83 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -487,6 +487,7 @@ Removal of prior version deprecations/changes
 - Removed the previously deprecated ``parse_cols`` keyword in :func:`read_excel` (:issue:`16488`)
 - Removed the previously deprecated ``pd.options.html.border`` (:issue:`16970`)
 - Removed the previously deprecated ``convert_objects`` (:issue:`11221`)
+- Removed the previously deprecated ``select`` method of ``DataFrame`` and ``Series`` (:issue:`17633`)
 
 .. _whatsnew_0250.performance:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2428bbad7003b..19d093dd29457 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3682,40 +3682,6 @@ class   animal   locomotion
 
     _xs = xs  # type: Callable
 
-    def select(self, crit, axis=0):
-        """
-        Return data corresponding to axis labels matching criteria.
-
-        .. deprecated:: 0.21.0
-            Use df.loc[df.index.map(crit)] to select via labels
-
-        Parameters
-        ----------
-        crit : function
-            To be called on each index (label). Should return True or False
-        axis : int
-
-        Returns
-        -------
-        selection : same type as caller
-        """
-        warnings.warn("'select' is deprecated and will be removed in a "
-                      "future release. You can use "
-                      ".loc[labels.map(crit)] as a replacement",
-                      FutureWarning, stacklevel=2)
-
-        axis = self._get_axis_number(axis)
-        axis_name = self._get_axis_name(axis)
-        axis_values = self._get_axis(axis)
-
-        if len(axis_values) > 0:
-            new_axis = axis_values[
-                np.asarray([bool(crit(label)) for label in axis_values])]
-        else:
-            new_axis = axis_values
-
-        return self.reindex(**{axis_name: new_axis})
-
     def reindex_like(self, other, method=None, copy=True, limit=None,
                      tolerance=None):
         """
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
index ad6c66c911615..42f98d5c96aa5 100644
--- a/pandas/tests/frame/test_axis_select_reindex.py
+++ b/pandas/tests/frame/test_axis_select_reindex.py
@@ -895,41 +895,6 @@ def test_filter_corner(self):
         result = empty.filter(like='foo')
         assert_frame_equal(result, empty)
 
-    def test_select(self):
-
-        # deprecated: gh-12410
-        f = lambda x: x.weekday() == 2
-        index = self.tsframe.index[[f(x) for x in self.tsframe.index]]
-        expected_weekdays = self.tsframe.reindex(index=index)
-
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result = self.tsframe.select(f, axis=0)
-            assert_frame_equal(result, expected_weekdays)
-
-            result = self.frame.select(lambda x: x in ('B', 'D'), axis=1)
-            expected = self.frame.reindex(columns=['B', 'D'])
-            assert_frame_equal(result, expected, check_names=False)
-
-        # replacement
-        f = lambda x: x.weekday == 2
-        result = self.tsframe.loc(axis=0)[f(self.tsframe.index)]
-        assert_frame_equal(result, expected_weekdays)
-
-        crit = lambda x: x in ['B', 'D']
-        result = self.frame.loc(axis=1)[(self.frame.columns.map(crit))]
-        expected = self.frame.reindex(columns=['B', 'D'])
-        assert_frame_equal(result, expected, check_names=False)
-
-        # doc example
-        df = DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz'])
-
-        crit = lambda x: x in ['bar', 'baz']
-        with tm.assert_produces_warning(FutureWarning):
-            expected = df.select(crit)
-        result = df.loc[df.index.map(crit)]
-        assert_frame_equal(result, expected, check_names=False)
-
     def test_take(self):
         # homogeneous
         order = [3, 1, 2, 0]
diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
index 6641311faace2..702e22b6741e4 100644
--- a/pandas/tests/series/indexing/test_indexing.py
+++ b/pandas/tests/series/indexing/test_indexing.py
@@ -772,20 +772,6 @@ def test_setitem_slice_into_readonly_backing_data():
 """
 
 
-def test_select(test_data):
-    # deprecated: gh-12410
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False):
-        n = len(test_data.ts)
-        result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2])
-        expected = test_data.ts.reindex(test_data.ts.index[n // 2:])
-        assert_series_equal(result, expected)
-
-        result = test_data.ts.select(lambda x: x.weekday() == 2)
-        expected = test_data.ts[test_data.ts.index.weekday == 2]
-        assert_series_equal(result, expected)
-
-
 def test_pop():
     # GH 6600
     df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, })

From 01d97d48b08c546a46b91c27a5886f52b46f22c2 Mon Sep 17 00:00:00 2001
From: Mak Sze Chun <makszechun@gmail.com>
Date: Wed, 5 Jun 2019 15:22:08 +0800
Subject: [PATCH 41/51] [TST] Fix test_quantile_interpolation_int (#26633)

---
 pandas/tests/frame/test_quantile.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py
index 9ccbd290923ba..097477c42d249 100644
--- a/pandas/tests/frame/test_quantile.py
+++ b/pandas/tests/frame/test_quantile.py
@@ -160,8 +160,7 @@ def test_quantile_interpolation_int(self, int_frame):
         assert q['A'] == np.percentile(df['A'], 10)
 
         # test with and without interpolation keyword
-        # TODO: q1 is not different from q
-        q1 = df.quantile(0.1)
+        q1 = df.quantile(0.1, axis=0, interpolation='linear')
         assert q1['A'] == np.percentile(df['A'], 10)
         tm.assert_series_equal(q, q1)
 

From c57f206360108c327d8256e716080fb1a2523fd8 Mon Sep 17 00:00:00 2001
From: shawnbrown <shawnbrown@users.noreply.github.com>
Date: Wed, 5 Jun 2019 07:53:40 -0400
Subject: [PATCH 42/51] Update Accessors URL for PdVega package. (#26653)

See altair-viz/pdvega@7476a8a26b for details.
---
 doc/source/ecosystem.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index e232bd2157611..b1a5430752558 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -363,4 +363,5 @@ Library        Accessor   Classes
 ============== ========== =========================
 
 .. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest
-.. _pdvega: https://jakevdp.github.io/pdvega/
+.. _pdvega: https://altair-viz.github.io/pdvega/
+

From 758e35d7c8aa46279cbb9d6191ddb9842f1ce31b Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Wed, 5 Jun 2019 13:46:37 +0100
Subject: [PATCH 43/51] DEPS: Adding missing doc dependencies to
 environment.yml (#26657)

---
 environment.yml      | 7 +++++++
 requirements-dev.txt | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/environment.yml b/environment.yml
index cf17dc1281ec9..91ea26eef4b61 100644
--- a/environment.yml
+++ b/environment.yml
@@ -17,10 +17,17 @@ dependencies:
   - flake8-rst>=0.6.0,<=0.7.0
   - gitpython
   - hypothesis>=3.82
+  - ipywidgets
   - isort
   - moto
   - mypy
+  - nbconvert>=5.4.1
+  - nbformat
+  - notebook>=5.7.5
+  - pandoc
   - pycodestyle
+  - pyqt
+  - python-snappy
   - pytest>=4.0.2
   - pytest-mock
   - sphinx
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 115a93495c95b..e6085920a9999 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -8,10 +8,17 @@ flake8-comprehensions
 flake8-rst>=0.6.0,<=0.7.0
 gitpython
 hypothesis>=3.82
+ipywidgets
 isort
 moto
 mypy
+nbconvert>=5.4.1
+nbformat
+notebook>=5.7.5
+pandoc
 pycodestyle
+pyqt
+python-snappy
 pytest>=4.0.2
 pytest-mock
 sphinx

From 6ce7fc70a0103aaf8d6d6ff908a61b561447c218 Mon Sep 17 00:00:00 2001
From: topper-123 <contribute@tensortable.com>
Date: Wed, 5 Jun 2019 12:50:33 +0000
Subject: [PATCH 44/51] use range in RangeIndex instead of _start etc. (#26581)

---
 doc/source/whatsnew/v0.25.0.rst    |   3 +
 pandas/core/dtypes/common.py       |  29 +++
 pandas/core/dtypes/concat.py       |  21 +-
 pandas/core/frame.py               |  10 +-
 pandas/core/indexes/range.py       | 304 +++++++++++++----------------
 pandas/core/series.py              |   6 +-
 pandas/io/packers.py               |   7 +-
 pandas/tests/indexes/test_range.py |  22 ++-
 8 files changed, 202 insertions(+), 200 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 4e8af90b85f83..4018418294963 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -476,6 +476,9 @@ Other Deprecations
   the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
 - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
 - The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
+- The internal attributes ``_start``, ``_stop`` and ``_step`` attributes of :class:`RangeIndex` have been deprecated.
+  Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`).
+
 
 .. _whatsnew_0250.prior_deprecations:
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index b5cd73a81962b..4029e6f4bfdb5 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1,4 +1,5 @@
 """ common type operations """
+from typing import Union
 import warnings
 
 import numpy as np
@@ -125,6 +126,34 @@ def ensure_int_or_float(arr: ArrayLike, copy=False) -> np.array:
         return arr.astype('float64', copy=copy)
 
 
+def ensure_python_int(value: Union[int, np.integer]) -> int:
+    """
+    Ensure that a value is a python int.
+
+    Parameters
+    ----------
+    value: int or numpy.integer
+
+    Returns
+    -------
+    int
+
+    Raises
+    ------
+    TypeError: if the value isn't an int or can't be converted to one.
+    """
+    if not is_scalar(value):
+        raise TypeError("Value needs to be a scalar value, was type {}"
+                        .format(type(value)))
+    msg = "Wrong type {} for value {}"
+    try:
+        new_value = int(value)
+        assert (new_value == value)
+    except (TypeError, ValueError, AssertionError):
+        raise TypeError(msg.format(type(value), value))
+    return new_value
+
+
 def classes(*klasses):
     """ evaluate if the tipo is a subclass of the klasses """
     return lambda tipo: issubclass(tipo, klasses)
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b22ed45642cf6..e2c6fba322be0 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -541,36 +541,37 @@ def _concat_rangeindex_same_dtype(indexes):
     """
     from pandas import Int64Index, RangeIndex
 
-    start = step = next = None
+    start = step = next_ = None
 
     # Filter the empty indexes
     non_empty_indexes = [obj for obj in indexes if len(obj)]
 
     for obj in non_empty_indexes:
+        rng = obj._range  # type: range
 
         if start is None:
             # This is set by the first non-empty index
-            start = obj._start
-            if step is None and len(obj) > 1:
-                step = obj._step
+            start = rng.start
+            if step is None and len(rng) > 1:
+                step = rng.step
         elif step is None:
             # First non-empty index had only one element
-            if obj._start == start:
+            if rng.start == start:
                 return _concat_index_same_dtype(indexes, klass=Int64Index)
-            step = obj._start - start
+            step = rng.start - start
 
-        non_consecutive = ((step != obj._step and len(obj) > 1) or
-                           (next is not None and obj._start != next))
+        non_consecutive = ((step != rng.step and len(rng) > 1) or
+                           (next_ is not None and rng.start != next_))
         if non_consecutive:
             return _concat_index_same_dtype(indexes, klass=Int64Index)
 
         if step is not None:
-            next = obj[-1] + step
+            next_ = rng[-1] + step
 
     if non_empty_indexes:
         # Get the stop value from "next" or alternatively
         # from the last non-empty index
-        stop = non_empty_indexes[-1]._stop if next is None else next
+        stop = non_empty_indexes[-1].stop if next_ is None else next_
         return RangeIndex(start, stop, step)
 
     # Here all "indexes" had 0 length, i.e. were empty.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5957b23535350..48dfa57c47bf6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2282,7 +2282,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
         text_col     5 non-null object
         float_col    5 non-null float64
         dtypes: float64(1), int64(1), object(1)
-        memory usage: 200.0+ bytes
+        memory usage: 248.0+ bytes
 
         Prints a summary of columns count and its dtypes but not per column
         information:
@@ -2292,7 +2292,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
         RangeIndex: 5 entries, 0 to 4
         Columns: 3 entries, int_col to float_col
         dtypes: float64(1), int64(1), object(1)
-        memory usage: 200.0+ bytes
+        memory usage: 248.0+ bytes
 
         Pipe output of DataFrame.info to buffer instead of sys.stdout, get
         buffer content and writes to a text file:
@@ -2494,7 +2494,7 @@ def memory_usage(self, index=True, deep=False):
         4      1      1.0    1.0+0.0j       1  True
 
         >>> df.memory_usage()
-        Index            80
+        Index           128
         int64         40000
         float64       40000
         complex128    80000
@@ -2513,7 +2513,7 @@ def memory_usage(self, index=True, deep=False):
         The memory footprint of `object` dtype columns is ignored by default:
 
         >>> df.memory_usage(deep=True)
-        Index             80
+        Index            128
         int64          40000
         float64        40000
         complex128     80000
@@ -2525,7 +2525,7 @@ def memory_usage(self, index=True, deep=False):
         many repeated values.
 
         >>> df['object'].astype('category').memory_usage(deep=True)
-        5168
+        5216
         """
         result = Series([c.memory_usage(index=False, deep=deep)
                          for col, c in self.iteritems()], index=self.columns)
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 9401de3346ccd..82fd7342c027c 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -12,7 +12,8 @@
 
 from pandas.core.dtypes import concat as _concat
 from pandas.core.dtypes.common import (
-    is_int64_dtype, is_integer, is_scalar, is_timedelta64_dtype)
+    ensure_python_int, is_int64_dtype, is_integer, is_scalar,
+    is_timedelta64_dtype)
 from pandas.core.dtypes.generic import (
     ABCDataFrame, ABCSeries, ABCTimedeltaIndex)
 
@@ -65,6 +66,7 @@ class RangeIndex(Int64Index):
 
     _typ = 'rangeindex'
     _engine_type = libindex.Int64Engine
+    _range = None  # type: range
 
     # check whether self._data has benn called
     _cached_data = None  # type: np.ndarray
@@ -91,39 +93,19 @@ def __new__(cls, start=None, stop=None, step=None,
                                    **dict(start._get_data_as_items()))
 
         # validate the arguments
-        def ensure_int(value, field):
-            msg = ("RangeIndex(...) must be called with integers,"
-                   " {value} was passed for {field}")
-            if not is_scalar(value):
-                raise TypeError(msg.format(value=type(value).__name__,
-                                           field=field))
-            try:
-                new_value = int(value)
-                assert(new_value == value)
-            except (TypeError, ValueError, AssertionError):
-                raise TypeError(msg.format(value=type(value).__name__,
-                                           field=field))
+        if com._all_none(start, stop, step):
+            raise TypeError("RangeIndex(...) must be called with integers")
 
-            return new_value
+        start = ensure_python_int(start) if start is not None else 0
 
-        if com._all_none(start, stop, step):
-            msg = "RangeIndex(...) must be called with integers"
-            raise TypeError(msg)
-        elif start is None:
-            start = 0
-        else:
-            start = ensure_int(start, 'start')
         if stop is None:
-            stop = start
-            start = 0
+            start, stop = 0, start
         else:
-            stop = ensure_int(stop, 'stop')
-        if step is None:
-            step = 1
-        elif step == 0:
+            stop = ensure_python_int(stop)
+
+        step = ensure_python_int(step) if step is not None else 1
+        if step == 0:
             raise ValueError("Step must not be zero")
-        else:
-            step = ensure_int(step, 'step')
 
         return cls._simple_new(start, stop, step, name)
 
@@ -142,7 +124,7 @@ def from_range(cls, data, name=None, dtype=None, **kwargs):
                 'range, {1} was passed'.format(cls.__name__, repr(data)))
 
         start, stop, step = data.start, data.stop, data.step
-        return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs)
+        return cls(start, stop, step, dtype=dtype, name=name, **kwargs)
 
     @classmethod
     def _simple_new(cls, start, stop=None, step=None, name=None,
@@ -156,20 +138,16 @@ def _simple_new(cls, start, stop=None, step=None, name=None,
 
         if start is None or not is_integer(start):
             try:
-
-                return RangeIndex(start, stop, step, name=name, **kwargs)
+                return cls(start, stop, step, name=name, **kwargs)
             except TypeError:
                 return Index(start, stop, step, name=name, **kwargs)
 
-        result._start = start
-        result._stop = stop or 0
-        result._step = step or 1
+        result._range = range(start, stop or 0, step or 1)
+
         result.name = name
         for k, v in kwargs.items():
             setattr(result, k, v)
 
-        result._range = range(result._start, result._stop, result._step)
-
         result._reset_identity()
         return result
 
@@ -196,7 +174,7 @@ def _data(self):
         triggering the construction.
         """
         if self._cached_data is None:
-            self._cached_data = np.arange(self._start, self._stop, self._step,
+            self._cached_data = np.arange(self.start, self.stop, self.step,
                                           dtype=np.int64)
         return self._cached_data
 
@@ -206,9 +184,10 @@ def _int64index(self):
 
     def _get_data_as_items(self):
         """ return a list of tuples of start, stop, step """
-        return [('start', self._start),
-                ('stop', self._stop),
-                ('step', self._step)]
+        rng = self._range
+        return [('start', rng.start),
+                ('stop', rng.stop),
+                ('step', rng.step)]
 
     def __reduce__(self):
         d = self._get_attributes_dict()
@@ -235,39 +214,79 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
         return header + list(map(pprint_thing, self._range))
 
     # --------------------------------------------------------------------
-    @property
+    _deprecation_message = ("RangeIndex.{} is deprecated and will be "
+                            "removed in a future version. Use RangeIndex.{} "
+                            "instead")
+
+    @cache_readonly
     def start(self):
         """
-        The value of the `start` parameter (or ``0`` if this was not supplied)
+        The value of the `start` parameter (``0`` if this was not supplied)
         """
         # GH 25710
-        return self._start
+        return self._range.start
 
     @property
+    def _start(self):
+        """
+        The value of the `start` parameter (``0`` if this was not supplied)
+
+         .. deprecated:: 0.25.0
+            Use ``start`` instead.
+        """
+        warnings.warn(self._deprecation_message.format("_start", "start"),
+                      DeprecationWarning, stacklevel=2)
+        return self.start
+
+    @cache_readonly
     def stop(self):
         """
         The value of the `stop` parameter
         """
-        # GH 25710
-        return self._stop
+        return self._range.stop
 
     @property
+    def _stop(self):
+        """
+        The value of the `stop` parameter
+
+         .. deprecated:: 0.25.0
+            Use ``stop`` instead.
+        """
+        # GH 25710
+        warnings.warn(self._deprecation_message.format("_stop", "stop"),
+                      DeprecationWarning, stacklevel=2)
+        return self.stop
+
+    @cache_readonly
     def step(self):
         """
-        The value of the `step` parameter (or ``1`` if this was not supplied)
+        The value of the `step` parameter (``1`` if this was not supplied)
         """
         # GH 25710
-        return self._step
+        return self._range.step
+
+    @property
+    def _step(self):
+        """
+        The value of the `step` parameter (``1`` if this was not supplied)
+
+         .. deprecated:: 0.25.0
+            Use ``step`` instead.
+        """
+        # GH 25710
+        warnings.warn(self._deprecation_message.format("_step", "step"),
+                      DeprecationWarning, stacklevel=2)
+        return self.step
 
     @cache_readonly
     def nbytes(self):
         """
-        Return the number of bytes in the underlying data
-        On implementations where this is undetermined (PyPy)
-        assume 24 bytes for each value
+        Return the number of bytes in the underlying data.
         """
-        return sum(getsizeof(getattr(self, v), 24) for v in
-                   ['_start', '_stop', '_step'])
+        rng = self._range
+        return getsizeof(rng) + sum(getsizeof(getattr(rng, attr_name))
+                                    for attr_name in ['start', 'stop', 'step'])
 
     def memory_usage(self, deep=False):
         """
@@ -305,11 +324,11 @@ def is_unique(self):
 
     @cache_readonly
     def is_monotonic_increasing(self):
-        return self._step > 0 or len(self) <= 1
+        return self._range.step > 0 or len(self) <= 1
 
     @cache_readonly
     def is_monotonic_decreasing(self):
-        return self._step < 0 or len(self) <= 1
+        return self._range.step < 0 or len(self) <= 1
 
     @property
     def has_duplicates(self):
@@ -325,13 +344,13 @@ def get_loc(self, key, method=None, tolerance=None):
         return super().get_loc(key, method=method, tolerance=tolerance)
 
     def tolist(self):
-        return list(range(self._start, self._stop, self._step))
+        return list(self._range)
 
     @Appender(_index_shared_docs['_shallow_copy'])
     def _shallow_copy(self, values=None, **kwargs):
         if values is None:
             name = kwargs.get("name", self.name)
-            return RangeIndex._simple_new(
+            return self._simple_new(
                 name=name, **dict(self._get_data_as_items()))
         else:
             kwargs.setdefault('name', self.name)
@@ -342,18 +361,17 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs):
         self._validate_dtype(dtype)
         if name is None:
             name = self.name
-        return RangeIndex._simple_new(
-            name=name, **dict(self._get_data_as_items()))
+        return self.from_range(self._range, name=name)
 
     def _minmax(self, meth):
         no_steps = len(self) - 1
         if no_steps == -1:
             return np.nan
-        elif ((meth == 'min' and self._step > 0) or
-              (meth == 'max' and self._step < 0)):
-            return self._start
+        elif ((meth == 'min' and self.step > 0) or
+              (meth == 'max' and self.step < 0)):
+            return self.start
 
-        return self._start + self._step * no_steps
+        return self.start + self.step * no_steps
 
     def min(self, axis=None, skipna=True, *args, **kwargs):
         """The minimum value of the RangeIndex"""
@@ -382,7 +400,7 @@ def argsort(self, *args, **kwargs):
         """
         nv.validate_argsort(args, kwargs)
 
-        if self._step > 0:
+        if self._range.step > 0:
             return np.arange(len(self))
         else:
             return np.arange(len(self) - 1, -1, -1)
@@ -392,15 +410,7 @@ def equals(self, other):
         Determines if two Index objects contain the same elements.
         """
         if isinstance(other, RangeIndex):
-            ls = len(self)
-            lo = len(other)
-            return (ls == lo == 0 or
-                    ls == lo == 1 and
-                    self._start == other._start or
-                    ls == lo and
-                    self._start == other._start and
-                    self._step == other._step)
-
+            return self._range == other._range
         return super().equals(other)
 
     def intersection(self, other, sort=False):
@@ -433,39 +443,40 @@ def intersection(self, other, sort=False):
             return super().intersection(other, sort=sort)
 
         if not len(self) or not len(other):
-            return RangeIndex._simple_new(None)
+            return self._simple_new(None)
 
-        first = self[::-1] if self._step < 0 else self
-        second = other[::-1] if other._step < 0 else other
+        first = self._range[::-1] if self.step < 0 else self._range
+        second = other._range[::-1] if other.step < 0 else other._range
 
         # check whether intervals intersect
         # deals with in- and decreasing ranges
-        int_low = max(first._start, second._start)
-        int_high = min(first._stop, second._stop)
+        int_low = max(first.start, second.start)
+        int_high = min(first.stop, second.stop)
         if int_high <= int_low:
-            return RangeIndex._simple_new(None)
+            return self._simple_new(None)
 
         # Method hint: linear Diophantine equation
         # solve intersection problem
         # performance hint: for identical step sizes, could use
         # cheaper alternative
-        gcd, s, t = first._extended_gcd(first._step, second._step)
+        gcd, s, t = self._extended_gcd(first.step, second.step)
 
         # check whether element sets intersect
-        if (first._start - second._start) % gcd:
-            return RangeIndex._simple_new(None)
+        if (first.start - second.start) % gcd:
+            return self._simple_new(None)
 
         # calculate parameters for the RangeIndex describing the
         # intersection disregarding the lower bounds
-        tmp_start = first._start + (second._start - first._start) * \
-            first._step // gcd * s
-        new_step = first._step * second._step // gcd
-        new_index = RangeIndex._simple_new(tmp_start, int_high, new_step)
+        tmp_start = first.start + (second.start - first.start) * \
+            first.step // gcd * s
+        new_step = first.step * second.step // gcd
+        new_index = self._simple_new(tmp_start, int_high, new_step)
 
         # adjust index to limiting interval
-        new_index._start = new_index._min_fitting_element(int_low)
+        new_start = new_index._min_fitting_element(int_low)
+        new_index = self._simple_new(new_start, new_index.stop, new_index.step)
 
-        if (self._step < 0 and other._step < 0) is not (new_index._step < 0):
+        if (self.step < 0 and other.step < 0) is not (new_index.step < 0):
             new_index = new_index[::-1]
         if sort is None:
             new_index = new_index.sort_values()
@@ -473,13 +484,13 @@ def intersection(self, other, sort=False):
 
     def _min_fitting_element(self, lower_limit):
         """Returns the smallest element greater than or equal to the limit"""
-        no_steps = -(-(lower_limit - self._start) // abs(self._step))
-        return self._start + abs(self._step) * no_steps
+        no_steps = -(-(lower_limit - self.start) // abs(self.step))
+        return self.start + abs(self.step) * no_steps
 
     def _max_fitting_element(self, upper_limit):
         """Returns the largest element smaller than or equal to the limit"""
-        no_steps = (upper_limit - self._start) // abs(self._step)
-        return self._start + abs(self._step) * no_steps
+        no_steps = (upper_limit - self.start) // abs(self.step)
+        return self.start + abs(self.step) * no_steps
 
     def _extended_gcd(self, a, b):
         """
@@ -522,16 +533,16 @@ def _union(self, other, sort):
             return super()._union(other, sort=sort)
 
         if isinstance(other, RangeIndex) and sort is None:
-            start_s, step_s = self._start, self._step
-            end_s = self._start + self._step * (len(self) - 1)
-            start_o, step_o = other._start, other._step
-            end_o = other._start + other._step * (len(other) - 1)
-            if self._step < 0:
+            start_s, step_s = self.start, self.step
+            end_s = self.start + self.step * (len(self) - 1)
+            start_o, step_o = other.start, other.step
+            end_o = other.start + other.step * (len(other) - 1)
+            if self.step < 0:
                 start_s, step_s, end_s = end_s, -step_s, start_s
-            if other._step < 0:
+            if other.step < 0:
                 start_o, step_o, end_o = end_o, -step_o, start_o
             if len(self) == 1 and len(other) == 1:
-                step_s = step_o = abs(self._start - other._start)
+                step_s = step_o = abs(self.start - other.start)
             elif len(self) == 1:
                 step_s = step_o
             elif len(other) == 1:
@@ -542,21 +553,23 @@ def _union(self, other, sort):
                 if ((start_s - start_o) % step_s == 0 and
                         (start_s - end_o) <= step_s and
                         (start_o - end_s) <= step_s):
-                    return RangeIndex(start_r, end_r + step_s, step_s)
+                    return self.__class__(start_r, end_r + step_s, step_s)
                 if ((step_s % 2 == 0) and
                         (abs(start_s - start_o) <= step_s / 2) and
                         (abs(end_s - end_o) <= step_s / 2)):
-                    return RangeIndex(start_r, end_r + step_s / 2, step_s / 2)
+                    return self.__class__(start_r,
+                                          end_r + step_s / 2,
+                                          step_s / 2)
             elif step_o % step_s == 0:
                 if ((start_o - start_s) % step_s == 0 and
                         (start_o + step_s >= start_s) and
                         (end_o - step_s <= end_s)):
-                    return RangeIndex(start_r, end_r + step_s, step_s)
+                    return self.__class__(start_r, end_r + step_s, step_s)
             elif step_s % step_o == 0:
                 if ((start_s - start_o) % step_o == 0 and
                         (start_s + step_o >= start_o) and
                         (end_s - step_o <= end_o)):
-                    return RangeIndex(start_r, end_r + step_o, step_o)
+                    return self.__class__(start_r, end_r + step_o, step_o)
         return self._int64index._union(other, sort=sort)
 
     @Appender(_index_shared_docs['join'])
@@ -576,7 +589,7 @@ def __len__(self):
         """
         return the length of the RangeIndex
         """
-        return max(0, -(-(self._stop - self._start) // self._step))
+        return len(self._range)
 
     @property
     def size(self):
@@ -597,59 +610,15 @@ def __getitem__(self, key):
             n = com.cast_scalar_indexer(key)
             if n != key:
                 return super_getitem(key)
-            if n < 0:
-                n = len(self) + key
-            if n < 0 or n > len(self) - 1:
+            try:
+                return self._range[key]
+            except IndexError:
                 raise IndexError("index {key} is out of bounds for axis 0 "
                                  "with size {size}".format(key=key,
                                                            size=len(self)))
-            return self._start + n * self._step
-
         if isinstance(key, slice):
-
-            # This is basically PySlice_GetIndicesEx, but delegation to our
-            # super routines if we don't have integers
-
-            length = len(self)
-
-            # complete missing slice information
-            step = 1 if key.step is None else key.step
-            if key.start is None:
-                start = length - 1 if step < 0 else 0
-            else:
-                start = key.start
-
-                if start < 0:
-                    start += length
-                if start < 0:
-                    start = -1 if step < 0 else 0
-                if start >= length:
-                    start = length - 1 if step < 0 else length
-
-            if key.stop is None:
-                stop = -1 if step < 0 else length
-            else:
-                stop = key.stop
-
-                if stop < 0:
-                    stop += length
-                if stop < 0:
-                    stop = -1
-                if stop > length:
-                    stop = length
-
-            # delegate non-integer slices
-            if (start != int(start) or
-                    stop != int(stop) or
-                    step != int(step)):
-                return super_getitem(key)
-
-            # convert indexes to values
-            start = self._start + self._step * start
-            stop = self._start + self._step * stop
-            step = self._step * step
-
-            return RangeIndex._simple_new(start, stop, step, name=self.name)
+            new_range = self._range[key]
+            return self.from_range(new_range, name=self.name)
 
         # fall back to Int64Index
         return super_getitem(key)
@@ -660,17 +629,15 @@ def __floordiv__(self, other):
 
         if is_integer(other) and other != 0:
             if (len(self) == 0 or
-                    self._start % other == 0 and
-                    self._step % other == 0):
-                start = self._start // other
-                step = self._step // other
+                    self.start % other == 0 and
+                    self.step % other == 0):
+                start = self.start // other
+                step = self.step // other
                 stop = start + len(self) * step
-                return RangeIndex._simple_new(
-                    start, stop, step, name=self.name)
+                return self._simple_new(start, stop, step, name=self.name)
             if len(self) == 1:
-                start = self._start // other
-                return RangeIndex._simple_new(
-                    start, start + 1, 1, name=self.name)
+                start = self.start // other
+                return self._simple_new(start, start + 1, 1, name=self.name)
         return self._int64index // other
 
     @classmethod
@@ -712,7 +679,7 @@ def _evaluate_numeric_binop(self, other):
                     # apply if we have an override
                     if step:
                         with np.errstate(all='ignore'):
-                            rstep = step(left._step, right)
+                            rstep = step(left.step, right)
 
                         # we don't have a representable op
                         # so return a base index
@@ -720,16 +687,13 @@ def _evaluate_numeric_binop(self, other):
                             raise ValueError
 
                     else:
-                        rstep = left._step
+                        rstep = left.step
 
                     with np.errstate(all='ignore'):
-                        rstart = op(left._start, right)
-                        rstop = op(left._stop, right)
+                        rstart = op(left.start, right)
+                        rstop = op(left.stop, right)
 
-                    result = RangeIndex(rstart,
-                                        rstop,
-                                        rstep,
-                                        **attrs)
+                    result = self.__class__(rstart, rstop, rstep, **attrs)
 
                     # for compat with numpy / Int64Index
                     # even if we can represent as a RangeIndex, return
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 8fb6ad3e3ccc5..472d984234275 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4010,7 +4010,7 @@ def memory_usage(self, index=True, deep=False):
         --------
         >>> s = pd.Series(range(3))
         >>> s.memory_usage()
-        104
+        152
 
         Not including the index gives the size of the rest of the data, which
         is necessarily smaller:
@@ -4024,9 +4024,9 @@ def memory_usage(self, index=True, deep=False):
         >>> s.values
         array(['a', 'b'], dtype=object)
         >>> s.memory_usage()
-        96
+        144
         >>> s.memory_usage(deep=True)
-        212
+        260
         """
         v = super().memory_usage(deep=deep)
         if index:
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index 1309bd1fef421..ead0fbd263ebf 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -367,9 +367,10 @@ def encode(obj):
             return {'typ': 'range_index',
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
-                    'start': getattr(obj, '_start', None),
-                    'stop': getattr(obj, '_stop', None),
-                    'step': getattr(obj, '_step', None)}
+                    'start': obj._range.start,
+                    'stop': obj._range.stop,
+                    'step': obj._range.step,
+                    }
         elif isinstance(obj, PeriodIndex):
             return {'typ': 'period_index',
                     'klass': obj.__class__.__name__,
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 477a4e527f278..bca50186827de 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -51,10 +51,8 @@ def test_constructor(self, args, kwargs, start, stop, step, name):
         expected = Index(np.arange(start, stop, step, dtype=np.int64),
                          name=name)
         assert isinstance(result, RangeIndex)
-        assert result._start == start
-        assert result._stop == stop
-        assert result._step == step
         assert result.name is name
+        assert result._range == range(start, stop, step)
         tm.assert_index_equal(result, expected)
 
     def test_constructor_invalid_args(self):
@@ -169,14 +167,19 @@ def test_start_stop_step_attrs(self, index, start, stop, step):
         assert index.stop == stop
         assert index.step == step
 
+    def test_deprecated_start_stop_step_attrs(self):
+        # GH 26581
+        idx = self.create_index()
+        for attr_name in ['_start', '_stop', '_step']:
+            with tm.assert_produces_warning(DeprecationWarning):
+                getattr(idx, attr_name)
+
     def test_copy(self):
         i = RangeIndex(5, name='Foo')
         i_copy = i.copy()
         assert i_copy is not i
         assert i_copy.identical(i)
-        assert i_copy._start == 0
-        assert i_copy._stop == 5
-        assert i_copy._step == 1
+        assert i_copy._range == range(0, 5, 1)
         assert i_copy.name == 'Foo'
 
     def test_repr(self):
@@ -243,8 +246,9 @@ def test_dtype(self):
 
     def test_cached_data(self):
         # GH 26565
-        # Calling RangeIndex._data caches an int64 array of the same length at
-        # self._cached_data. This tests whether _cached_data has been set.
+        # Calling RangeIndex._data caches an int64 array of the same length as
+        # self at self._cached_data.
+        # This tests whether _cached_data is being set by various operations.
         idx = RangeIndex(0, 100, 10)
 
         assert idx._cached_data is None
@@ -273,7 +277,7 @@ def test_cached_data(self):
         df.iloc[5:10]
         assert idx._cached_data is None
 
-        # actually calling data._data
+        # actually calling idx._data
         assert isinstance(idx._data, np.ndarray)
         assert isinstance(idx._cached_data, np.ndarray)
 

From 8ef9a6356f9f00e22908dd04aa47b2a5d6c38725 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Heikkil=C3=A4?=
 <42970828+mahepe@users.noreply.github.com>
Date: Wed, 5 Jun 2019 15:54:34 +0300
Subject: [PATCH 45/51] TST: Test sorting levels not aligned with index
 (#25775) (#26492)

---
 pandas/tests/frame/test_sorting.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py
index 246ba943a4509..96aeb608ba3b8 100644
--- a/pandas/tests/frame/test_sorting.py
+++ b/pandas/tests/frame/test_sorting.py
@@ -227,6 +227,18 @@ def test_stable_descending_multicolumn_sort(self):
                                    kind='mergesort')
         assert_frame_equal(sorted_df, expected)
 
+    def test_sort_multi_index(self):
+        # GH 25775, testing that sorting by index works with a multi-index.
+        df = DataFrame({'a': [3, 1, 2], 'b': [0, 0, 0],
+                        'c': [0, 1, 2], 'd': list('abc')})
+        result = df.set_index(list('abc')).sort_index(level=list('ba'))
+
+        expected = DataFrame({'a': [1, 2, 3], 'b': [0, 0, 0],
+                              'c': [1, 2, 0], 'd': list('bca')})
+        expected = expected.set_index(list('abc'))
+
+        tm.assert_frame_equal(result, expected)
+
     def test_stable_categorial(self):
         # GH 16793
         df = DataFrame({

From e0c41f79104c5bc61952c9a14f1883cd5bda53f7 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Wed, 5 Jun 2019 08:59:12 -0400
Subject: [PATCH 46/51] Remove SharedItems from test_excel (#26579)

---
 pandas/tests/io/test_excel.py | 332 +++++++++++++++++-----------------
 1 file changed, 169 insertions(+), 163 deletions(-)

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 7693caf3b31d2..b99f0336fa4c5 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -26,13 +26,22 @@
 from pandas.io.formats.excel import ExcelFormatter
 from pandas.io.parsers import read_csv
 
-_seriesd = tm.getSeriesData()
-_tsd = tm.getTimeSeriesData()
-_frame = DataFrame(_seriesd)[:10]
-_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10]
-_tsframe = tm.makeTimeDataFrame()[:5]
-_mixed_frame = _frame.copy()
-_mixed_frame['foo'] = 'bar'
+
+@pytest.fixture
+def frame(float_frame):
+    return float_frame[:10]
+
+
+@pytest.fixture
+def frame2(float_frame):
+    float_frame = float_frame.copy()
+    float_frame.columns = ['D', 'C', 'B', 'A']
+    return float_frame[:10]
+
+
+@pytest.fixture
+def tsframe():
+    return tm.makeTimeDataFrame()[:5]
 
 
 @contextlib.contextmanager
@@ -49,18 +58,8 @@ def ignore_xlrd_time_clock_warning():
         yield
 
 
-class SharedItems:
-
-    @pytest.fixture(autouse=True)
-    def setup_method(self, datapath):
-        self.frame = _frame.copy()
-        self.frame2 = _frame2.copy()
-        self.tsframe = _tsframe.copy()
-        self.mixed_frame = _mixed_frame.copy()
-
-
 @td.skip_if_no('xlrd', '1.0.0')
-class ReadingTestsBase(SharedItems):
+class ReadingTestsBase:
     # This is based on ExcelWriterBase
 
     @pytest.fixture(autouse=True, params=['xlrd', None])
@@ -1055,9 +1054,9 @@ class TestXlrdReader(ReadingTestsBase):
     """
 
     @td.skip_if_no("xlwt")
-    def test_read_xlrd_book(self, ext):
+    def test_read_xlrd_book(self, ext, frame):
         import xlrd
-        df = self.frame
+        df = frame
 
         engine = "xlrd"
         sheet_name = "SheetA"
@@ -1075,7 +1074,7 @@ def test_read_xlrd_book(self, ext):
             tm.assert_frame_equal(df, result)
 
 
-class _WriterBase(SharedItems):
+class _WriterBase:
 
     @pytest.fixture(autouse=True)
     def set_engine_and_path(self, request, merge_cells, engine, ext):
@@ -1150,75 +1149,79 @@ def test_excel_sheet_by_name_raise(self, *_):
         with pytest.raises(xlrd.XLRDError):
             pd.read_excel(xl, "0")
 
-    def test_excel_writer_context_manager(self, *_):
+    def test_excel_writer_context_manager(self, frame, frame2, *_):
         with ExcelWriter(self.path) as writer:
-            self.frame.to_excel(writer, "Data1")
-            self.frame2.to_excel(writer, "Data2")
+            frame.to_excel(writer, "Data1")
+            frame2.to_excel(writer, "Data2")
 
         with ExcelFile(self.path) as reader:
             found_df = pd.read_excel(reader, "Data1", index_col=0)
             found_df2 = pd.read_excel(reader, "Data2", index_col=0)
 
-            tm.assert_frame_equal(found_df, self.frame)
-            tm.assert_frame_equal(found_df2, self.frame2)
+            tm.assert_frame_equal(found_df, frame)
+            tm.assert_frame_equal(found_df2, frame2)
 
-    def test_roundtrip(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
+    def test_roundtrip(self, merge_cells, engine, ext, frame):
+        frame = frame.copy()
+        frame['A'][:5] = nan
 
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
         # test roundtrip
-        self.frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1')
         recons = pd.read_excel(self.path, 'test1', index_col=0)
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1', index=False)
         recons = pd.read_excel(self.path, 'test1', index_col=None)
-        recons.index = self.frame.index
-        tm.assert_frame_equal(self.frame, recons)
+        recons.index = frame.index
+        tm.assert_frame_equal(frame, recons)
 
-        self.frame.to_excel(self.path, 'test1', na_rep='NA')
+        frame.to_excel(self.path, 'test1', na_rep='NA')
         recons = pd.read_excel(
             self.path, 'test1', index_col=0, na_values=['NA'])
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
         # GH 3611
-        self.frame.to_excel(self.path, 'test1', na_rep='88')
+        frame.to_excel(self.path, 'test1', na_rep='88')
         recons = pd.read_excel(
             self.path, 'test1', index_col=0, na_values=['88'])
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
-        self.frame.to_excel(self.path, 'test1', na_rep='88')
+        frame.to_excel(self.path, 'test1', na_rep='88')
         recons = pd.read_excel(
             self.path, 'test1', index_col=0, na_values=[88, 88.0])
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
         # GH 6573
-        self.frame.to_excel(self.path, 'Sheet1')
+        frame.to_excel(self.path, 'Sheet1')
         recons = pd.read_excel(self.path, index_col=0)
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
-        self.frame.to_excel(self.path, '0')
+        frame.to_excel(self.path, '0')
         recons = pd.read_excel(self.path, index_col=0)
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
 
         # GH 8825 Pandas Series should provide to_excel method
-        s = self.frame["A"]
+        s = frame["A"]
         s.to_excel(self.path)
         recons = pd.read_excel(self.path, index_col=0)
         tm.assert_frame_equal(s.to_frame(), recons)
 
-    def test_mixed(self, merge_cells, engine, ext):
-        self.mixed_frame.to_excel(self.path, 'test1')
+    def test_mixed(self, merge_cells, engine, ext, frame):
+        mixed_frame = frame.copy()
+        mixed_frame['foo'] = 'bar'
+
+        mixed_frame.to_excel(self.path, 'test1')
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, 'test1', index_col=0)
-        tm.assert_frame_equal(self.mixed_frame, recons)
+        tm.assert_frame_equal(mixed_frame, recons)
 
-    def test_ts_frame(self, *_):
-        df = tm.makeTimeDataFrame()[:5]
+    def test_ts_frame(self, tsframe, *_):
+        df = tsframe
 
         df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
@@ -1226,33 +1229,34 @@ def test_ts_frame(self, *_):
         recons = pd.read_excel(reader, "test1", index_col=0)
         tm.assert_frame_equal(df, recons)
 
-    def test_basics_with_nan(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+    def test_basics_with_nan(self, merge_cells, engine, ext, frame):
+        frame = frame.copy()
+        frame['A'][:5] = nan
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
     @pytest.mark.parametrize("np_type", [
         np.int8, np.int16, np.int32, np.int64])
     def test_int_types(self, merge_cells, engine, ext, np_type):
         # Test np.int values read come back as int
         # (rather than float which is Excel's format).
-        frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
-                          dtype=np_type)
-        frame.to_excel(self.path, "test1")
+        df = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
+                       dtype=np_type)
+        df.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0)
 
-        int_frame = frame.astype(np.int64)
+        int_frame = df.astype(np.int64)
         tm.assert_frame_equal(int_frame, recons)
 
         recons2 = pd.read_excel(self.path, "test1", index_col=0)
         tm.assert_frame_equal(int_frame, recons2)
 
         # Test with convert_float=False comes back as float.
-        float_frame = frame.astype(float)
+        float_frame = df.astype(float)
         recons = pd.read_excel(self.path, "test1",
                                convert_float=False, index_col=0)
         tm.assert_frame_equal(recons, float_frame,
@@ -1263,120 +1267,123 @@ def test_int_types(self, merge_cells, engine, ext, np_type):
         np.float16, np.float32, np.float64])
     def test_float_types(self, merge_cells, engine, ext, np_type):
         # Test np.float values read come back as float.
-        frame = DataFrame(np.random.random_sample(10), dtype=np_type)
-        frame.to_excel(self.path, "test1")
+        df = DataFrame(np.random.random_sample(10), dtype=np_type)
+        df.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type)
 
-        tm.assert_frame_equal(frame, recons, check_dtype=False)
+        tm.assert_frame_equal(df, recons, check_dtype=False)
 
     @pytest.mark.parametrize("np_type", [np.bool8, np.bool_])
     def test_bool_types(self, merge_cells, engine, ext, np_type):
         # Test np.bool values read come back as float.
-        frame = (DataFrame([1, 0, True, False], dtype=np_type))
-        frame.to_excel(self.path, "test1")
+        df = (DataFrame([1, 0, True, False], dtype=np_type))
+        df.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type)
 
-        tm.assert_frame_equal(frame, recons)
+        tm.assert_frame_equal(df, recons)
 
     def test_inf_roundtrip(self, *_):
-        frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
-        frame.to_excel(self.path, "test1")
+        df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
+        df.to_excel(self.path, "test1")
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0)
 
-        tm.assert_frame_equal(frame, recons)
+        tm.assert_frame_equal(df, recons)
 
-    def test_sheets(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
+    def test_sheets(self, merge_cells, engine, ext, frame, tsframe):
+        frame = frame.copy()
+        frame['A'][:5] = nan
 
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
         # Test writing to separate sheets
         writer = ExcelWriter(self.path)
-        self.frame.to_excel(writer, 'test1')
-        self.tsframe.to_excel(writer, 'test2')
+        frame.to_excel(writer, 'test1')
+        tsframe.to_excel(writer, 'test2')
         writer.save()
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, 'test1', index_col=0)
-        tm.assert_frame_equal(self.frame, recons)
+        tm.assert_frame_equal(frame, recons)
         recons = pd.read_excel(reader, 'test2', index_col=0)
-        tm.assert_frame_equal(self.tsframe, recons)
+        tm.assert_frame_equal(tsframe, recons)
         assert 2 == len(reader.sheet_names)
         assert 'test1' == reader.sheet_names[0]
         assert 'test2' == reader.sheet_names[1]
 
-    def test_colaliases(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
+    def test_colaliases(self, merge_cells, engine, ext, frame, frame2):
+        frame = frame.copy()
+        frame['A'][:5] = nan
 
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
         # column aliases
         col_aliases = Index(['AA', 'X', 'Y', 'Z'])
-        self.frame2.to_excel(self.path, 'test1', header=col_aliases)
+        frame2.to_excel(self.path, 'test1', header=col_aliases)
         reader = ExcelFile(self.path)
         rs = pd.read_excel(reader, 'test1', index_col=0)
-        xp = self.frame2.copy()
+        xp = frame2.copy()
         xp.columns = col_aliases
         tm.assert_frame_equal(xp, rs)
 
-    def test_roundtrip_indexlabels(self, merge_cells, engine, ext):
-        self.frame['A'][:5] = nan
+    def test_roundtrip_indexlabels(self, merge_cells, engine, ext, frame):
+        frame = frame.copy()
+        frame['A'][:5] = nan
 
-        self.frame.to_excel(self.path, 'test1')
-        self.frame.to_excel(self.path, 'test1', columns=['A', 'B'])
-        self.frame.to_excel(self.path, 'test1', header=False)
-        self.frame.to_excel(self.path, 'test1', index=False)
+        frame.to_excel(self.path, 'test1')
+        frame.to_excel(self.path, 'test1', columns=['A', 'B'])
+        frame.to_excel(self.path, 'test1', header=False)
+        frame.to_excel(self.path, 'test1', index=False)
 
         # test index_label
-        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
-        frame.to_excel(self.path, 'test1',
-                       index_label=['test'],
-                       merge_cells=merge_cells)
+        df = (DataFrame(np.random.randn(10, 2)) >= 0)
+        df.to_excel(self.path, 'test1',
+                    index_label=['test'],
+                    merge_cells=merge_cells)
         reader = ExcelFile(self.path)
         recons = pd.read_excel(
             reader, 'test1', index_col=0).astype(np.int64)
-        frame.index.names = ['test']
-        assert frame.index.names == recons.index.names
-
-        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
-        frame.to_excel(self.path,
-                       'test1',
-                       index_label=['test', 'dummy', 'dummy2'],
-                       merge_cells=merge_cells)
+        df.index.names = ['test']
+        assert df.index.names == recons.index.names
+
+        df = (DataFrame(np.random.randn(10, 2)) >= 0)
+        df.to_excel(self.path,
+                    'test1',
+                    index_label=['test', 'dummy', 'dummy2'],
+                    merge_cells=merge_cells)
         reader = ExcelFile(self.path)
         recons = pd.read_excel(
             reader, 'test1', index_col=0).astype(np.int64)
-        frame.index.names = ['test']
-        assert frame.index.names == recons.index.names
-
-        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
-        frame.to_excel(self.path,
-                       'test1',
-                       index_label='test',
-                       merge_cells=merge_cells)
+        df.index.names = ['test']
+        assert df.index.names == recons.index.names
+
+        df = (DataFrame(np.random.randn(10, 2)) >= 0)
+        df.to_excel(self.path,
+                    'test1',
+                    index_label='test',
+                    merge_cells=merge_cells)
         reader = ExcelFile(self.path)
         recons = pd.read_excel(
             reader, 'test1', index_col=0).astype(np.int64)
-        frame.index.names = ['test']
-        tm.assert_frame_equal(frame, recons.astype(bool))
+        df.index.names = ['test']
+        tm.assert_frame_equal(df, recons.astype(bool))
 
-        self.frame.to_excel(self.path,
-                            'test1',
-                            columns=['A', 'B', 'C', 'D'],
-                            index=False, merge_cells=merge_cells)
+        frame.to_excel(self.path,
+                       'test1',
+                       columns=['A', 'B', 'C', 'D'],
+                       index=False, merge_cells=merge_cells)
         # take 'A' and 'B' as indexes (same row as cols 'C', 'D')
-        df = self.frame.copy()
+        df = frame.copy()
         df = df.set_index(['A', 'B'])
 
         reader = ExcelFile(self.path)
@@ -1395,17 +1402,17 @@ def test_excel_roundtrip_indexname(self, merge_cells, engine, ext):
         tm.assert_frame_equal(result, df)
         assert result.index.name == 'foo'
 
-    def test_excel_roundtrip_datetime(self, merge_cells, *_):
+    def test_excel_roundtrip_datetime(self, merge_cells, tsframe, *_):
         # datetime.date, not sure what to test here exactly
-        tsf = self.tsframe.copy()
+        tsf = tsframe.copy()
 
-        tsf.index = [x.date() for x in self.tsframe.index]
+        tsf.index = [x.date() for x in tsframe.index]
         tsf.to_excel(self.path, "test1", merge_cells=merge_cells)
 
         reader = ExcelFile(self.path)
         recons = pd.read_excel(reader, "test1", index_col=0)
 
-        tm.assert_frame_equal(self.tsframe, recons)
+        tm.assert_frame_equal(tsframe, recons)
 
     def test_excel_date_datetime_format(self, merge_cells, engine, ext):
         # see gh-4133
@@ -1450,14 +1457,14 @@ def test_to_excel_interval_no_labels(self, *_):
         # see gh-19242
         #
         # Test writing Interval without labels.
-        frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
-                          dtype=np.int64)
-        expected = frame.copy()
+        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
+                       dtype=np.int64)
+        expected = df.copy()
 
-        frame["new"] = pd.cut(frame[0], 10)
+        df["new"] = pd.cut(df[0], 10)
         expected["new"] = pd.cut(expected[0], 10).astype(str)
 
-        frame.to_excel(self.path, "test1")
+        df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
         recons = pd.read_excel(reader, "test1", index_col=0)
@@ -1467,15 +1474,15 @@ def test_to_excel_interval_labels(self, *_):
         # see gh-19242
         #
         # Test writing Interval with labels.
-        frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
-                          dtype=np.int64)
-        expected = frame.copy()
-        intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E",
-                                                 "F", "G", "H", "I", "J"])
-        frame["new"] = intervals
+        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
+                       dtype=np.int64)
+        expected = df.copy()
+        intervals = pd.cut(df[0], 10, labels=["A", "B", "C", "D", "E",
+                                              "F", "G", "H", "I", "J"])
+        df["new"] = intervals
         expected["new"] = pd.Series(list(intervals))
 
-        frame.to_excel(self.path, "test1")
+        df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
         recons = pd.read_excel(reader, "test1", index_col=0)
@@ -1485,23 +1492,23 @@ def test_to_excel_timedelta(self, *_):
         # see gh-19242, gh-9155
         #
         # Test writing timedelta to xls.
-        frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
-                          columns=["A"], dtype=np.int64)
-        expected = frame.copy()
+        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
+                       columns=["A"], dtype=np.int64)
+        expected = df.copy()
 
-        frame["new"] = frame["A"].apply(lambda x: timedelta(seconds=x))
+        df["new"] = df["A"].apply(lambda x: timedelta(seconds=x))
         expected["new"] = expected["A"].apply(
             lambda x: timedelta(seconds=x).total_seconds() / float(86400))
 
-        frame.to_excel(self.path, "test1")
+        df.to_excel(self.path, "test1")
         reader = ExcelFile(self.path)
 
         recons = pd.read_excel(reader, "test1", index_col=0)
         tm.assert_frame_equal(expected, recons)
 
-    def test_to_excel_periodindex(self, merge_cells, engine, ext):
-        frame = self.tsframe
-        xp = frame.resample('M', kind='period').mean()
+    def test_to_excel_periodindex(
+            self, merge_cells, engine, ext, tsframe):
+        xp = tsframe.resample('M', kind='period').mean()
 
         xp.to_excel(self.path, 'sht1')
 
@@ -1509,8 +1516,7 @@ def test_to_excel_periodindex(self, merge_cells, engine, ext):
         rs = pd.read_excel(reader, 'sht1', index_col=0)
         tm.assert_frame_equal(xp, rs.to_period('M'))
 
-    def test_to_excel_multiindex(self, merge_cells, engine, ext):
-        frame = self.frame
+    def test_to_excel_multiindex(self, merge_cells, engine, ext, frame):
         arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
         new_index = MultiIndex.from_arrays(arrays,
                                            names=['first', 'second'])
@@ -1526,21 +1532,21 @@ def test_to_excel_multiindex(self, merge_cells, engine, ext):
         tm.assert_frame_equal(frame, df)
 
     # GH13511
-    def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext):
-        frame = pd.DataFrame({'A': [None, 2, 3],
-                              'B': [10, 20, 30],
-                              'C': np.random.sample(3)})
-        frame = frame.set_index(['A', 'B'])
-
-        frame.to_excel(self.path, merge_cells=merge_cells)
-        df = pd.read_excel(self.path, index_col=[0, 1])
-        tm.assert_frame_equal(frame, df)
+    def test_to_excel_multiindex_nan_label(
+            self, merge_cells, engine, ext):
+        df = pd.DataFrame({'A': [None, 2, 3],
+                           'B': [10, 20, 30],
+                           'C': np.random.sample(3)})
+        df = df.set_index(['A', 'B'])
+
+        df.to_excel(self.path, merge_cells=merge_cells)
+        df1 = pd.read_excel(self.path, index_col=[0, 1])
+        tm.assert_frame_equal(df, df1)
 
     # Test for Issue 11328. If column indices are integers, make
     # sure they are handled correctly for either setting of
     # merge_cells
-    def test_to_excel_multiindex_cols(self, merge_cells, engine, ext):
-        frame = self.frame
+    def test_to_excel_multiindex_cols(self, merge_cells, engine, ext, frame):
         arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
         new_index = MultiIndex.from_arrays(arrays,
                                            names=['first', 'second'])
@@ -1563,9 +1569,9 @@ def test_to_excel_multiindex_cols(self, merge_cells, engine, ext):
             frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
         tm.assert_frame_equal(frame, df)
 
-    def test_to_excel_multiindex_dates(self, merge_cells, engine, ext):
+    def test_to_excel_multiindex_dates(
+            self, merge_cells, engine, ext, tsframe):
         # try multiindex with dates
-        tsframe = self.tsframe.copy()
         new_index = [tsframe.index, np.arange(len(tsframe.index))]
         tsframe.index = MultiIndex.from_arrays(new_index)
 

From 047d32d20640898978dbf6d9855cd6fecbbcf0d5 Mon Sep 17 00:00:00 2001
From: DanielFEvans <41120183+DanielFEvans@users.noreply.github.com>
Date: Wed, 5 Jun 2019 19:44:38 +0100
Subject: [PATCH 47/51] ERR: include original error message for missing
 required dependencies (#26665)

---
 doc/source/whatsnew/v0.25.0.rst |  2 +-
 pandas/__init__.py              |  8 +++++---
 pandas/tests/test_base.py       | 27 +++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 4018418294963..8fd9f07442810 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -82,7 +82,7 @@ Other Enhancements
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
 - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
--
+- Error message for missing required imports now includes the original ImportError's text (:issue:`23868`)
 
 .. _whatsnew_0250.api_breaking:
 
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 4c494b4a62e39..11ea3047bb62a 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -10,11 +10,13 @@
     try:
         __import__(dependency)
     except ImportError as e:
-        missing_dependencies.append(dependency)
+        missing_dependencies.append((dependency, e))
 
 if missing_dependencies:
-    raise ImportError(
-        "Missing required dependencies {0}".format(missing_dependencies))
+    msg = "Unable to import required dependencies:"
+    for dependency, e in missing_dependencies:
+        msg += "\n{0}: {1}".format(dependency, str(e))
+    raise ImportError(msg)
 del hard_dependencies, dependency, missing_dependencies
 
 # numpy compat
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 3b4f85e680f6e..f8319999682e8 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1,7 +1,9 @@
 from datetime import datetime, timedelta
+from importlib import reload
 from io import StringIO
 import re
 import sys
+from unittest.mock import patch
 
 import numpy as np
 import pytest
@@ -1341,3 +1343,28 @@ def test_to_numpy_dtype(as_series):
     expected = np.array(['2000-01-01T05', '2001-01-01T05'],
                         dtype='M8[ns]')
     tm.assert_numpy_array_equal(result, expected)
+
+
+@patch("builtins.__import__")
+def test_missing_required_dependency(mock_import):
+    def mock_import_fail(name, *args, **kwargs):
+        if name == "numpy":
+            raise ImportError("cannot import name numpy")
+        elif name == "pytz":
+            raise ImportError("cannot import name some_dependency")
+        elif name == "dateutil":
+            raise ImportError("cannot import name some_other_dependency")
+        else:
+            return __import__(name, *args, **kwargs)
+
+    mock_import.side_effect = mock_import_fail
+
+    expected_msg = (
+        "Unable to import required dependencies:"
+        "\nnumpy: cannot import name numpy"
+        "\npytz: cannot import name some_dependency"
+        "\ndateutil: cannot import name some_other_dependency"
+    )
+
+    with pytest.raises(ImportError, match=expected_msg):
+        reload(pd)

From 1d7ad5fd7577f3da1c8eb19cf547f62d392405d0 Mon Sep 17 00:00:00 2001
From: nathalier <nathalier@gmail.com>
Date: Wed, 5 Jun 2019 20:06:13 +0100
Subject: [PATCH 48/51] BUG: fix TypeError for invalid integer dates %Y%m%d
 with errors='ignore' (# GH 26583) (#26585)

---
 doc/source/whatsnew/v0.25.0.rst              |  1 +
 pandas/_libs/tslibs/strptime.pyx             |  6 +++---
 pandas/tests/indexes/datetimes/test_tools.py | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 8fd9f07442810..02ee275bab364 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -537,6 +537,7 @@ Datetimelike
 - Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
 - Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`)
 - Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`)
+- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'``
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
index af3d3fa646a12..d93858cff5e05 100644
--- a/pandas/_libs/tslibs/strptime.pyx
+++ b/pandas/_libs/tslibs/strptime.pyx
@@ -140,13 +140,13 @@ def array_strptime(object[:] values, object fmt,
                     iresult[i] = NPY_NAT
                     continue
                 raise ValueError("time data %r does not match "
-                                 "format %r (match)" % (values[i], fmt))
+                                 "format %r (match)" % (val, fmt))
             if len(val) != found.end():
                 if is_coerce:
                     iresult[i] = NPY_NAT
                     continue
                 raise ValueError("unconverted data remains: %s" %
-                                 values[i][found.end():])
+                                 val[found.end():])
 
         # search
         else:
@@ -156,7 +156,7 @@ def array_strptime(object[:] values, object fmt,
                     iresult[i] = NPY_NAT
                     continue
                 raise ValueError("time data %r does not match format "
-                                 "%r (search)" % (values[i], fmt))
+                                 "%r (search)" % (val, fmt))
 
         iso_year = -1
         year = 1900
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index c507c31ee54dd..ea33e563b31be 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -133,6 +133,25 @@ def test_to_datetime_format_integer(self, cache):
         result = to_datetime(s, format='%Y%m', cache=cache)
         assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('int_date, expected', [
+        # valid date, length == 8
+        [20121030, datetime(2012, 10, 30)],
+        # short valid date, length == 6
+        [199934, datetime(1999, 3, 4)],
+        # long integer date partially parsed to datetime(2012,1,1), length > 8
+        [2012010101, 2012010101],
+        # invalid date partially parsed to datetime(2012,9,9), length == 8
+        [20129930, 20129930],
+        # short integer date partially parsed to datetime(2012,9,9), length < 8
+        [2012993, 2012993],
+        # short invalid date, length == 4
+        [2121, 2121]])
+    def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date,
+                                                       expected):
+        # GH 26583
+        result = to_datetime(int_date, format='%Y%m%d', errors='ignore')
+        assert result == expected
+
     @pytest.mark.parametrize('cache', [True, False])
     def test_to_datetime_format_microsecond(self, cache):
 

From 30d9cf30c680596fc6e00b3e06a30d2fc62bad69 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 5 Jun 2019 22:30:45 +0200
Subject: [PATCH 49/51] Revert "ERR: include original error message for missing
 required dependencies (#26665)"

This reverts commit 047d32d20640898978dbf6d9855cd6fecbbcf0d5.
---
 doc/source/whatsnew/v0.25.0.rst |  2 +-
 pandas/__init__.py              |  8 +++-----
 pandas/tests/test_base.py       | 27 ---------------------------
 3 files changed, 4 insertions(+), 33 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 02ee275bab364..1fb9b5ae695a0 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -82,7 +82,7 @@ Other Enhancements
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
 - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
-- Error message for missing required imports now includes the original ImportError's text (:issue:`23868`)
+-
 
 .. _whatsnew_0250.api_breaking:
 
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 11ea3047bb62a..4c494b4a62e39 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -10,13 +10,11 @@
     try:
         __import__(dependency)
     except ImportError as e:
-        missing_dependencies.append((dependency, e))
+        missing_dependencies.append(dependency)
 
 if missing_dependencies:
-    msg = "Unable to import required dependencies:"
-    for dependency, e in missing_dependencies:
-        msg += "\n{0}: {1}".format(dependency, str(e))
-    raise ImportError(msg)
+    raise ImportError(
+        "Missing required dependencies {0}".format(missing_dependencies))
 del hard_dependencies, dependency, missing_dependencies
 
 # numpy compat
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index f8319999682e8..3b4f85e680f6e 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1,9 +1,7 @@
 from datetime import datetime, timedelta
-from importlib import reload
 from io import StringIO
 import re
 import sys
-from unittest.mock import patch
 
 import numpy as np
 import pytest
@@ -1343,28 +1341,3 @@ def test_to_numpy_dtype(as_series):
     expected = np.array(['2000-01-01T05', '2001-01-01T05'],
                         dtype='M8[ns]')
     tm.assert_numpy_array_equal(result, expected)
-
-
-@patch("builtins.__import__")
-def test_missing_required_dependency(mock_import):
-    def mock_import_fail(name, *args, **kwargs):
-        if name == "numpy":
-            raise ImportError("cannot import name numpy")
-        elif name == "pytz":
-            raise ImportError("cannot import name some_dependency")
-        elif name == "dateutil":
-            raise ImportError("cannot import name some_other_dependency")
-        else:
-            return __import__(name, *args, **kwargs)
-
-    mock_import.side_effect = mock_import_fail
-
-    expected_msg = (
-        "Unable to import required dependencies:"
-        "\nnumpy: cannot import name numpy"
-        "\npytz: cannot import name some_dependency"
-        "\ndateutil: cannot import name some_other_dependency"
-    )
-
-    with pytest.raises(ImportError, match=expected_msg):
-        reload(pd)

From f8b4c57ad1e4f1a105905c53ffcf40a5dc5080c3 Mon Sep 17 00:00:00 2001
From: AlexTereshenkov <50622389+AlexTereshenkov@users.noreply.github.com>
Date: Wed, 5 Jun 2019 22:37:54 +0100
Subject: [PATCH 50/51] Remove redundant check arr_or_dtype is None (#26655)

---
 pandas/core/dtypes/common.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 4029e6f4bfdb5..52011d53d22cd 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1931,8 +1931,6 @@ def _is_dtype_type(arr_or_dtype, condition):
         if issubclass(arr_or_dtype, ExtensionDtype):
             arr_or_dtype = arr_or_dtype.type
         return condition(np.dtype(arr_or_dtype).type)
-    elif arr_or_dtype is None:
-        return condition(type(None))
 
     # if we have an array-like
     if hasattr(arr_or_dtype, 'dtype'):

From 891a419a5155e6b42c0696a81cf853b6f3febbf7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 5 Jun 2019 16:48:47 -0500
Subject: [PATCH 51/51] filter warning in repr (#26669)

---
 pandas/core/sparse/frame.py        |  5 +++++
 pandas/core/sparse/series.py       | 10 ++++++----
 pandas/tests/sparse/test_format.py | 13 +++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index 0320da6d9a48d..67ecbcbea67f9 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -242,6 +242,11 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
     def to_coo(self):
         return SparseFrameAccessor(self).to_coo()
 
+    def __repr__(self):
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "Sparse")
+            return super().__repr__()
+
     def __getstate__(self):
         # pickling
         return dict(_typ=self._typ, _subtyp=self._subtyp, _data=self._data,
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index 3814d8bb66635..3e3bae6444082 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -214,10 +214,12 @@ def as_sparse_array(self, kind=None, fill_value=None, copy=False):
                            fill_value=fill_value, kind=kind, copy=copy)
 
     def __repr__(self):
-        series_rep = Series.__repr__(self)
-        rep = '{series}\n{index!r}'.format(series=series_rep,
-                                           index=self.sp_index)
-        return rep
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "Sparse")
+            series_rep = Series.__repr__(self)
+            rep = '{series}\n{index!r}'.format(series=series_rep,
+                                               index=self.sp_index)
+            return rep
 
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwds):
diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/sparse/test_format.py
index 37c2acc587cf6..7ed8c48fce333 100644
--- a/pandas/tests/sparse/test_format.py
+++ b/pandas/tests/sparse/test_format.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 import pytest
 
@@ -133,3 +135,14 @@ def test_sparse_repr_after_set(self):
 
         repr(sdf)
         tm.assert_sp_frame_equal(sdf, res)
+
+
+def test_repr_no_warning():
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", FutureWarning)
+        df = pd.SparseDataFrame({"A": [1, 2]})
+        s = df['A']
+
+    with tm.assert_produces_warning(None):
+        repr(df)
+        repr(s)