pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 37 additions & 5 deletions b/‎.pre-commit-config.yaml
Lines changed: 37 additions & 5 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 0 additions & 25 deletions b/‎ci/code_checks.sh
Lines changed: 0 additions & 25 deletions
diff --git a/‎doc/source/getting_started/intro_tutorials/10_text_data.rst
Lines changed: 11 additions & 11 deletions b/‎doc/source/getting_started/intro_tutorials/10_text_data.rst
Lines changed: 11 additions & 11 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 18 additions & 1 deletion b/‎pandas/conftest.py
Lines changed: 18 additions & 1 deletion
diff --git a/‎pandas/core/aggregation.py
Lines changed: 15 additions & 46 deletions b/‎pandas/core/aggregation.py
Lines changed: 15 additions & 46 deletions
diff --git a/‎pandas/core/dtypes/generic.py
Lines changed: 1 addition & 0 deletions b/‎pandas/core/dtypes/generic.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/frame.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 8 additions & 4 deletions b/‎pandas/core/indexes/base.py
Lines changed: 8 additions & 4 deletions
@@ -56,12 +56,44 @@ repos:
     -   id: incorrect-sphinx-directives
         name: Check for incorrect Sphinx directives
         language: pygrep
-        entry: >-
-            \.\. (autosummary|contents|currentmodule|deprecated
-            |function|image|important|include|ipython|literalinclude
-            |math|module|note|raw|seealso|toctree|versionadded
-            |versionchanged|warning):[^:]
+        entry: |
+            (?x)
+            # Check for cases of e.g. .. warning: instead of .. warning::
+            \.\.\ (
+                autosummary|contents|currentmodule|deprecated|
+                function|image|important|include|ipython|literalinclude|
+                math|module|note|raw|seealso|toctree|versionadded|
+                versionchanged|warning
+            ):[^:]
         files: \.(py|pyx|rst)$
+    -   id: non-standard-imports
+        name: Check for non-standard imports
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
+            from\ pandas\.core\.common\ import|
+            from\ pandas\.core\ import\ common|
+
+            # Check for imports from collections.abc instead of `from collections import abc`
+            from\ collections\.abc\ import|
+
+            from\ numpy\ import\ nan
+        types: [python]
+    -   id: non-standard-imports-in-tests
+        name: Check for non-standard imports in test suite
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas._testing instead of `import pandas._testing as tm`
+            from\ pandas\._testing\ import|
+            from\ pandas\ import\ _testing\ as\ tm|
+
+            # No direct imports from conftest
+            conftest\ import|
+            import\ conftest
+        types: [python]
+        files: ^pandas/tests/
     -   id: incorrect-code-directives
         name: Check for incorrect code block or IPython directives
         language: pygrep
 
@@ -110,31 +110,6 @@ fi
 ### PATTERNS ###
 if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
 
-    # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
-    # Check for imports from collections.abc instead of `from collections import abc`
-    MSG='Check for non-standard imports' ; echo $MSG
-    invgrep -R --include="*.py*" -E "from pandas.core.common import" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from pandas.core import common" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from collections.abc import" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from numpy import nan" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # Checks for test suite
-    # Check for imports from pandas._testing instead of `import pandas._testing as tm`
-    invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # No direct imports from conftest
-    invgrep -R --include="*.py*" -E "conftest import" pandas/tests
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "import conftest" pandas/tests
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Check for use of exec' ; echo $MSG
     invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
@@ -66,15 +66,15 @@ How to manipulate textual data?
     <ul class="task-bullet">
         <li>
 
-Make all name characters lowercase
+Make all name characters lowercase.
 
 .. ipython:: python
 
     titanic["Name"].str.lower()
 
 To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column
-(see :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
-apply the ``lower`` method. As such, each of the strings is converted element wise.
+(see the :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
+apply the ``lower`` method. As such, each of the strings is converted element-wise.
 
 .. raw:: html
 
@@ -86,15 +86,15 @@ having a ``dt`` accessor, a number of
 specialized string methods are available when using the ``str``
 accessor. These methods have in general matching names with the
 equivalent built-in string methods for single elements, but are applied
-element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?)
+element-wise (remember :ref:`element-wise calculations <10min_tut_05_columns>`?)
 on each of the values of the columns.
 
 .. raw:: html
 
     <ul class="task-bullet">
         <li>
 
-Create a new column ``Surname`` that contains the surname of the Passengers by extracting the part before the comma.
+Create a new column ``Surname`` that contains the surname of the passengers by extracting the part before the comma.
 
 .. ipython:: python
 
@@ -135,7 +135,7 @@ More information on extracting parts of strings is available in the user guide s
     <ul class="task-bullet">
         <li>
 
-Extract the passenger data about the Countesses on board of the Titanic.
+Extract the passenger data about the countesses on board of the Titanic.
 
 .. ipython:: python
 
@@ -145,15 +145,15 @@ Extract the passenger data about the Countesses on board of the Titanic.
 
     titanic[titanic["Name"].str.contains("Countess")]
 
-(*Interested in her story? See *\ `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
+(*Interested in her story? See* `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
 
 The string method :meth:`Series.str.contains` checks for each of the values in the
 column ``Name`` if the string contains the word ``Countess`` and returns
-for each of the values ``True`` (``Countess`` is part of the name) of
+for each of the values ``True`` (``Countess`` is part of the name) or
 ``False`` (``Countess`` is not part of the name). This output can be used
 to subselect the data using conditional (boolean) indexing introduced in
 the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was
-only one Countess on the Titanic, we get one row as a result.
+only one countess on the Titanic, we get one row as a result.
 
 .. raw:: html
 
@@ -220,7 +220,7 @@ we can do a selection using the ``loc`` operator, introduced in the
     <ul class="task-bullet">
         <li>
 
-In the "Sex" column, replace values of "male" by "M" and values of "female" by "F"
+In the "Sex" column, replace values of "male" by "M" and values of "female" by "F".
 
 .. ipython:: python
 
@@ -256,7 +256,7 @@ a ``dictionary`` to define the mapping ``{from : to}``.
         <h4>REMEMBER</h4>
 
 -  String methods are available using the ``str`` accessor.
--  String methods work element wise and can be used for conditional
+-  String methods work element-wise and can be used for conditional
    indexing.
 -  The ``replace`` method is a convenient method to convert values
    according to a given dictionary.
 
@@ -34,7 +34,7 @@
 import pandas.util._test_decorators as td
 
 import pandas as pd
-from pandas import DataFrame
+from pandas import DataFrame, Series
 import pandas._testing as tm
 from pandas.core import ops
 from pandas.core.indexes.api import Index, MultiIndex
@@ -529,6 +529,23 @@ def series_with_simple_index(index):
     return _create_series(index)
 
 
+@pytest.fixture
+def series_with_multilevel_index():
+    """
+    Fixture with a Series with a 2-level MultiIndex.
+    """
+    arrays = [
+        ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
+        ["one", "two", "one", "two", "one", "two", "one", "two"],
+    ]
+    tuples = zip(*arrays)
+    index = MultiIndex.from_tuples(tuples)
+    data = np.random.randn(8)
+    ser = Series(data, index=index)
+    ser[3] = np.NaN
+    return ser
+
+
 _narrow_dtypes = [
     np.float16,
     np.float32,
 
@@ -31,7 +31,7 @@
 
 from pandas.core.dtypes.cast import is_nested_object
 from pandas.core.dtypes.common import is_dict_like, is_list_like
-from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries
 
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
@@ -621,58 +621,27 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs):
         # set the final keys
         keys = list(arg.keys())
 
-        # combine results
-
-        def is_any_series() -> bool:
-            # return a boolean if we have *any* nested series
-            return any(isinstance(r, ABCSeries) for r in results.values())
-
-        def is_any_frame() -> bool:
-            # return a boolean if we have *any* nested series
-            return any(isinstance(r, ABCDataFrame) for r in results.values())
-
-        if isinstance(results, list):
-            return concat(results, keys=keys, axis=1, sort=True), True
-
-        elif is_any_frame():
-            # we have a dict of DataFrames
-            # return a MI DataFrame
+        # Avoid making two isinstance calls in all and any below
+        is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
 
+        # combine results
+        if all(is_ndframe):
             keys_to_use = [k for k in keys if not results[k].empty]
             # Have to check, if at least one DataFrame is not empty.
             keys_to_use = keys_to_use if keys_to_use != [] else keys
-            return (
-                concat([results[k] for k in keys_to_use], keys=keys_to_use, axis=1),
-                True,
+            axis = 0 if isinstance(obj, ABCSeries) else 1
+            result = concat({k: results[k] for k in keys_to_use}, axis=axis)
+        elif any(is_ndframe):
+            # There is a mix of NDFrames and scalars
+            raise ValueError(
+                "cannot perform both aggregation "
+                "and transformation operations "
+                "simultaneously"
             )
+        else:
+            from pandas import Series
 
-        elif isinstance(obj, ABCSeries) and is_any_series():
-
-            # we have a dict of Series
-            # return a MI Series
-            try:
-                result = concat(results)
-            except TypeError as err:
-                # we want to give a nice error here if
-                # we have non-same sized objects, so
-                # we don't automatically broadcast
-
-                raise ValueError(
-                    "cannot perform both aggregation "
-                    "and transformation operations "
-                    "simultaneously"
-                ) from err
-
-            return result, True
-
-        # fall thru
-        from pandas import DataFrame, Series
-
-        try:
-            result = DataFrame(results)
-        except ValueError:
             # we have a dict of scalars
-
             # GH 36212 use name only if obj is a series
             if obj.ndim == 1:
                 obj = cast("Series", obj)
 
@@ -53,6 +53,7 @@ def _check(cls, inst) -> bool:
     },
 )
 
+ABCNDFrame = create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe"))
 ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
 ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
 
 
@@ -7442,9 +7442,9 @@ def _gotitem(
 
     >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
             A    B
-    max   NaN  8.0
-    min   1.0  2.0
     sum  12.0  NaN
+    min   1.0  2.0
+    max   NaN  8.0
 
     Aggregate different functions over the columns and rename the index of the resulting
     DataFrame.
 
@@ -220,7 +220,7 @@ def _outer_indexer(self, left, right):
 
     _typ = "index"
     _data: Union[ExtensionArray, np.ndarray]
-    _id: _Identity
+    _id: Optional[_Identity] = None
     _name: Label = None
     # MultiIndex.levels previously allowed setting the index name. We
     # don't allow this anymore, and raise if it happens rather than
@@ -541,10 +541,14 @@ def is_(self, other) -> bool:
         --------
         Index.identical : Works like ``Index.is_`` but also checks metadata.
         """
-        try:
-            return self._id is other._id
-        except AttributeError:
+        if self is other:
+            return True
+        elif not hasattr(other, "_id"):
             return False
+        elif com.any_none(self._id, other._id):
+            return False
+        else:
+            return self._id is other._id
 
     def _reset_identity(self) -> None:
         """
Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,7 @@ def _check(cls, inst) -> bool:`
`53`	`53`	`},`
`54`	`54`	`)`
`55`	`55`
	`56`	`+ABCNDFrame = create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe"))`
`56`	`57`	`ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))`
`57`	`58`	`ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))`
`58`	`59`