Skip to content

Commit 080c7e9

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into 33457
2 parents 9164c83 + 1b7e3a6 commit 080c7e9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1868
-1953
lines changed

.pre-commit-config.yaml

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,44 @@ repos:
5656
- id: incorrect-sphinx-directives
5757
name: Check for incorrect Sphinx directives
5858
language: pygrep
59-
entry: >-
60-
\.\. (autosummary|contents|currentmodule|deprecated
61-
|function|image|important|include|ipython|literalinclude
62-
|math|module|note|raw|seealso|toctree|versionadded
63-
|versionchanged|warning):[^:]
59+
entry: |
60+
(?x)
61+
# Check for cases of e.g. .. warning: instead of .. warning::
62+
\.\.\ (
63+
autosummary|contents|currentmodule|deprecated|
64+
function|image|important|include|ipython|literalinclude|
65+
math|module|note|raw|seealso|toctree|versionadded|
66+
versionchanged|warning
67+
):[^:]
6468
files: \.(py|pyx|rst)$
69+
- id: non-standard-imports
70+
name: Check for non-standard imports
71+
language: pygrep
72+
entry: |
73+
(?x)
74+
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
75+
from\ pandas\.core\.common\ import|
76+
from\ pandas\.core\ import\ common|
77+
78+
# Check for imports from collections.abc instead of `from collections import abc`
79+
from\ collections\.abc\ import|
80+
81+
from\ numpy\ import\ nan
82+
types: [python]
83+
- id: non-standard-imports-in-tests
84+
name: Check for non-standard imports in test suite
85+
language: pygrep
86+
entry: |
87+
(?x)
88+
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
89+
from\ pandas\._testing\ import|
90+
from\ pandas\ import\ _testing\ as\ tm|
91+
92+
# No direct imports from conftest
93+
conftest\ import|
94+
import\ conftest
95+
types: [python]
96+
files: ^pandas/tests/
6597
- id: incorrect-code-directives
6698
name: Check for incorrect code block or IPython directives
6799
language: pygrep

ci/code_checks.sh

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -110,31 +110,6 @@ fi
110110
### PATTERNS ###
111111
if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
112112

113-
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
114-
# Check for imports from collections.abc instead of `from collections import abc`
115-
MSG='Check for non-standard imports' ; echo $MSG
116-
invgrep -R --include="*.py*" -E "from pandas.core.common import" pandas
117-
RET=$(($RET + $?)) ; echo $MSG "DONE"
118-
invgrep -R --include="*.py*" -E "from pandas.core import common" pandas
119-
RET=$(($RET + $?)) ; echo $MSG "DONE"
120-
invgrep -R --include="*.py*" -E "from collections.abc import" pandas
121-
RET=$(($RET + $?)) ; echo $MSG "DONE"
122-
invgrep -R --include="*.py*" -E "from numpy import nan" pandas
123-
RET=$(($RET + $?)) ; echo $MSG "DONE"
124-
125-
# Checks for test suite
126-
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
127-
invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
128-
RET=$(($RET + $?)) ; echo $MSG "DONE"
129-
invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests
130-
RET=$(($RET + $?)) ; echo $MSG "DONE"
131-
132-
# No direct imports from conftest
133-
invgrep -R --include="*.py*" -E "conftest import" pandas/tests
134-
RET=$(($RET + $?)) ; echo $MSG "DONE"
135-
invgrep -R --include="*.py*" -E "import conftest" pandas/tests
136-
RET=$(($RET + $?)) ; echo $MSG "DONE"
137-
138113
MSG='Check for use of exec' ; echo $MSG
139114
invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
140115
RET=$(($RET + $?)) ; echo $MSG "DONE"

doc/source/getting_started/intro_tutorials/10_text_data.rst

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,15 @@ How to manipulate textual data?
6666
<ul class="task-bullet">
6767
<li>
6868

69-
Make all name characters lowercase
69+
Make all name characters lowercase.
7070

7171
.. ipython:: python
7272
7373
titanic["Name"].str.lower()
7474
7575
To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column
76-
(see :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
77-
apply the ``lower`` method. As such, each of the strings is converted element wise.
76+
(see the :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
77+
apply the ``lower`` method. As such, each of the strings is converted element-wise.
7878

7979
.. raw:: html
8080

@@ -86,15 +86,15 @@ having a ``dt`` accessor, a number of
8686
specialized string methods are available when using the ``str``
8787
accessor. These methods have in general matching names with the
8888
equivalent built-in string methods for single elements, but are applied
89-
element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?)
89+
element-wise (remember :ref:`element-wise calculations <10min_tut_05_columns>`?)
9090
on each of the values of the columns.
9191

9292
.. raw:: html
9393

9494
<ul class="task-bullet">
9595
<li>
9696

97-
Create a new column ``Surname`` that contains the surname of the Passengers by extracting the part before the comma.
97+
Create a new column ``Surname`` that contains the surname of the passengers by extracting the part before the comma.
9898

9999
.. ipython:: python
100100
@@ -135,7 +135,7 @@ More information on extracting parts of strings is available in the user guide s
135135
<ul class="task-bullet">
136136
<li>
137137

138-
Extract the passenger data about the Countesses on board of the Titanic.
138+
Extract the passenger data about the countesses on board of the Titanic.
139139

140140
.. ipython:: python
141141
@@ -145,15 +145,15 @@ Extract the passenger data about the Countesses on board of the Titanic.
145145
146146
titanic[titanic["Name"].str.contains("Countess")]
147147
148-
(*Interested in her story? See *\ `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
148+
(*Interested in her story? See* `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
149149

150150
The string method :meth:`Series.str.contains` checks for each of the values in the
151151
column ``Name`` if the string contains the word ``Countess`` and returns
152-
for each of the values ``True`` (``Countess`` is part of the name) of
152+
for each of the values ``True`` (``Countess`` is part of the name) or
153153
``False`` (``Countess`` is not part of the name). This output can be used
154154
to subselect the data using conditional (boolean) indexing introduced in
155155
the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was
156-
only one Countess on the Titanic, we get one row as a result.
156+
only one countess on the Titanic, we get one row as a result.
157157

158158
.. raw:: html
159159

@@ -220,7 +220,7 @@ we can do a selection using the ``loc`` operator, introduced in the
220220
<ul class="task-bullet">
221221
<li>
222222

223-
In the "Sex" column, replace values of "male" by "M" and values of "female" by "F"
223+
In the "Sex" column, replace values of "male" by "M" and values of "female" by "F".
224224

225225
.. ipython:: python
226226
@@ -256,7 +256,7 @@ a ``dictionary`` to define the mapping ``{from : to}``.
256256
<h4>REMEMBER</h4>
257257

258258
- String methods are available using the ``str`` accessor.
259-
- String methods work element wise and can be used for conditional
259+
- String methods work element-wise and can be used for conditional
260260
indexing.
261261
- The ``replace`` method is a convenient method to convert values
262262
according to a given dictionary.

pandas/conftest.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import pandas.util._test_decorators as td
3535

3636
import pandas as pd
37-
from pandas import DataFrame
37+
from pandas import DataFrame, Series
3838
import pandas._testing as tm
3939
from pandas.core import ops
4040
from pandas.core.indexes.api import Index, MultiIndex
@@ -529,6 +529,23 @@ def series_with_simple_index(index):
529529
return _create_series(index)
530530

531531

532+
@pytest.fixture
533+
def series_with_multilevel_index():
534+
"""
535+
Fixture with a Series with a 2-level MultiIndex.
536+
"""
537+
arrays = [
538+
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
539+
["one", "two", "one", "two", "one", "two", "one", "two"],
540+
]
541+
tuples = zip(*arrays)
542+
index = MultiIndex.from_tuples(tuples)
543+
data = np.random.randn(8)
544+
ser = Series(data, index=index)
545+
ser[3] = np.NaN
546+
return ser
547+
548+
532549
_narrow_dtypes = [
533550
np.float16,
534551
np.float32,

pandas/core/aggregation.py

Lines changed: 15 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from pandas.core.dtypes.cast import is_nested_object
3333
from pandas.core.dtypes.common import is_dict_like, is_list_like
34-
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
34+
from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries
3535

3636
from pandas.core.base import DataError, SpecificationError
3737
import pandas.core.common as com
@@ -621,58 +621,27 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs):
621621
# set the final keys
622622
keys = list(arg.keys())
623623

624-
# combine results
625-
626-
def is_any_series() -> bool:
627-
# return a boolean if we have *any* nested series
628-
return any(isinstance(r, ABCSeries) for r in results.values())
629-
630-
def is_any_frame() -> bool:
631-
# return a boolean if we have *any* nested series
632-
return any(isinstance(r, ABCDataFrame) for r in results.values())
633-
634-
if isinstance(results, list):
635-
return concat(results, keys=keys, axis=1, sort=True), True
636-
637-
elif is_any_frame():
638-
# we have a dict of DataFrames
639-
# return a MI DataFrame
624+
# Avoid making two isinstance calls in all and any below
625+
is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
640626

627+
# combine results
628+
if all(is_ndframe):
641629
keys_to_use = [k for k in keys if not results[k].empty]
642630
# Have to check, if at least one DataFrame is not empty.
643631
keys_to_use = keys_to_use if keys_to_use != [] else keys
644-
return (
645-
concat([results[k] for k in keys_to_use], keys=keys_to_use, axis=1),
646-
True,
632+
axis = 0 if isinstance(obj, ABCSeries) else 1
633+
result = concat({k: results[k] for k in keys_to_use}, axis=axis)
634+
elif any(is_ndframe):
635+
# There is a mix of NDFrames and scalars
636+
raise ValueError(
637+
"cannot perform both aggregation "
638+
"and transformation operations "
639+
"simultaneously"
647640
)
641+
else:
642+
from pandas import Series
648643

649-
elif isinstance(obj, ABCSeries) and is_any_series():
650-
651-
# we have a dict of Series
652-
# return a MI Series
653-
try:
654-
result = concat(results)
655-
except TypeError as err:
656-
# we want to give a nice error here if
657-
# we have non-same sized objects, so
658-
# we don't automatically broadcast
659-
660-
raise ValueError(
661-
"cannot perform both aggregation "
662-
"and transformation operations "
663-
"simultaneously"
664-
) from err
665-
666-
return result, True
667-
668-
# fall thru
669-
from pandas import DataFrame, Series
670-
671-
try:
672-
result = DataFrame(results)
673-
except ValueError:
674644
# we have a dict of scalars
675-
676645
# GH 36212 use name only if obj is a series
677646
if obj.ndim == 1:
678647
obj = cast("Series", obj)

pandas/core/dtypes/generic.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def _check(cls, inst) -> bool:
5353
},
5454
)
5555

56+
ABCNDFrame = create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe"))
5657
ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
5758
ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
5859

pandas/core/frame.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7442,9 +7442,9 @@ def _gotitem(
74427442
74437443
>>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
74447444
A B
7445-
max NaN 8.0
7446-
min 1.0 2.0
74477445
sum 12.0 NaN
7446+
min 1.0 2.0
7447+
max NaN 8.0
74487448
74497449
Aggregate different functions over the columns and rename the index of the resulting
74507450
DataFrame.

pandas/core/indexes/base.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def _outer_indexer(self, left, right):
220220

221221
_typ = "index"
222222
_data: Union[ExtensionArray, np.ndarray]
223-
_id: _Identity
223+
_id: Optional[_Identity] = None
224224
_name: Label = None
225225
# MultiIndex.levels previously allowed setting the index name. We
226226
# don't allow this anymore, and raise if it happens rather than
@@ -541,10 +541,14 @@ def is_(self, other) -> bool:
541541
--------
542542
Index.identical : Works like ``Index.is_`` but also checks metadata.
543543
"""
544-
try:
545-
return self._id is other._id
546-
except AttributeError:
544+
if self is other:
545+
return True
546+
elif not hasattr(other, "_id"):
547547
return False
548+
elif com.any_none(self._id, other._id):
549+
return False
550+
else:
551+
return self._id is other._id
548552

549553
def _reset_identity(self) -> None:
550554
"""

0 commit comments

Comments
 (0)