Skip to content

Commit b47ed01

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into 33457
2 parents 080c7e9 + 4aa41b8 commit b47ed01

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+634
-543
lines changed

ci/code_checks.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
178178
RET=$(($RET + $?)) ; echo $MSG "DONE"
179179

180180
MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
181-
check_namespace "Series"
182-
RET=$(($RET + $?))
183-
check_namespace "DataFrame"
184-
RET=$(($RET + $?))
181+
for class in "Series" "DataFrame" "Index"; do
182+
check_namespace ${class}
183+
RET=$(($RET + $?))
184+
done
185185
echo $MSG "DONE"
186186
fi
187187

doc/source/whatsnew/v1.2.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ Indexing
517517
- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`)
518518
- Bug in :meth:`DataFrame.__getitem__` and :meth:`DataFrame.loc.__getitem__` with :class:`IntervalIndex` columns and a numeric indexer (:issue:`26490`)
519519
- Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`)
520+
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`)
520521

521522
Missing
522523
^^^^^^^
@@ -589,6 +590,7 @@ Reshaping
589590
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
590591
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
591592
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
593+
- Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`)
592594
-
593595

594596
Sparse

pandas/core/frame.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8547,6 +8547,7 @@ def count(self, axis=0, level=None, numeric_only=False):
85478547
See Also
85488548
--------
85498549
Series.count: Number of non-NA elements in a Series.
8550+
DataFrame.value_counts: Count unique combinations of columns.
85508551
DataFrame.shape: Number of DataFrame rows and columns (including NA
85518552
elements).
85528553
DataFrame.isna: Boolean same-sized DataFrame showing places of NA

pandas/core/generic.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3684,7 +3684,9 @@ class animal locomotion
36843684
index = self.index
36853685
if isinstance(index, MultiIndex):
36863686
try:
3687-
loc, new_index = self.index.get_loc_level(key, drop_level=drop_level)
3687+
loc, new_index = self.index._get_loc_level(
3688+
key, level=0, drop_level=drop_level
3689+
)
36883690
except TypeError as e:
36893691
raise TypeError(f"Expected label or tuple of labels, got {key}") from e
36903692
else:

pandas/core/indexes/base.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1556,12 +1556,19 @@ def droplevel(self, level=0):
15561556

15571557
levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
15581558

1559-
if len(level) == 0:
1559+
return self._drop_level_numbers(levnums)
1560+
1561+
def _drop_level_numbers(self, levnums: List[int]):
1562+
"""
1563+
Drop MultiIndex levels by level _number_, not name.
1564+
"""
1565+
1566+
if len(levnums) == 0:
15601567
return self
1561-
if len(level) >= self.nlevels:
1568+
if len(levnums) >= self.nlevels:
15621569
raise ValueError(
1563-
f"Cannot remove {len(level)} levels from an index with {self.nlevels} "
1564-
"levels: at least one level must be left."
1570+
f"Cannot remove {len(levnums)} levels from an index with "
1571+
f"{self.nlevels} levels: at least one level must be left."
15651572
)
15661573
# The two checks above guarantee that here self is a MultiIndex
15671574
self = cast("MultiIndex", self)
@@ -3590,8 +3597,12 @@ def _join_multi(self, other, how, return_indexers=True):
35903597
from pandas.core.reshape.merge import restore_dropped_levels_multijoin
35913598

35923599
# figure out join names
3593-
self_names = set(com.not_none(*self.names))
3594-
other_names = set(com.not_none(*other.names))
3600+
self_names_list = list(com.not_none(*self.names))
3601+
other_names_list = list(com.not_none(*other.names))
3602+
self_names_order = self_names_list.index
3603+
other_names_order = other_names_list.index
3604+
self_names = set(self_names_list)
3605+
other_names = set(other_names_list)
35953606
overlap = self_names & other_names
35963607

35973608
# need at least 1 in common
@@ -3601,8 +3612,8 @@ def _join_multi(self, other, how, return_indexers=True):
36013612
if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
36023613

36033614
# Drop the non-matching levels from left and right respectively
3604-
ldrop_names = list(self_names - overlap)
3605-
rdrop_names = list(other_names - overlap)
3615+
ldrop_names = sorted(self_names - overlap, key=self_names_order)
3616+
rdrop_names = sorted(other_names - overlap, key=other_names_order)
36063617

36073618
# if only the order differs
36083619
if not len(ldrop_names + rdrop_names):

pandas/core/indexes/multi.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2864,16 +2864,29 @@ def get_loc_level(self, key, level=0, drop_level: bool = True):
28642864
>>> mi.get_loc_level(['b', 'e'])
28652865
(1, None)
28662866
"""
2867+
if not isinstance(level, (list, tuple)):
2868+
level = self._get_level_number(level)
2869+
else:
2870+
level = [self._get_level_number(lev) for lev in level]
2871+
return self._get_loc_level(key, level=level, drop_level=drop_level)
2872+
2873+
def _get_loc_level(
2874+
self, key, level: Union[int, List[int]] = 0, drop_level: bool = True
2875+
):
2876+
"""
2877+
get_loc_level but with `level` known to be positional, not name-based.
2878+
"""
2879+
28672880
# different name to distinguish from maybe_droplevels
28682881
def maybe_mi_droplevels(indexer, levels, drop_level: bool):
28692882
if not drop_level:
28702883
return self[indexer]
28712884
# kludge around
28722885
orig_index = new_index = self[indexer]
2873-
levels = [self._get_level_number(i) for i in levels]
2886+
28742887
for i in sorted(levels, reverse=True):
28752888
try:
2876-
new_index = new_index.droplevel(i)
2889+
new_index = new_index._drop_level_numbers([i])
28772890
except ValueError:
28782891

28792892
# no dropping here
@@ -2887,7 +2900,7 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool):
28872900
)
28882901
result = None
28892902
for lev, k in zip(level, key):
2890-
loc, new_index = self.get_loc_level(k, level=lev)
2903+
loc, new_index = self._get_loc_level(k, level=lev)
28912904
if isinstance(loc, slice):
28922905
mask = np.zeros(len(self), dtype=bool)
28932906
mask[loc] = True
@@ -2897,8 +2910,6 @@ def maybe_mi_droplevels(indexer, levels, drop_level: bool):
28972910

28982911
return result, maybe_mi_droplevels(result, level, drop_level)
28992912

2900-
level = self._get_level_number(level)
2901-
29022913
# kludge for #1796
29032914
if isinstance(key, list):
29042915
key = tuple(key)
@@ -2963,7 +2974,8 @@ def partial_selection(key, indexer=None):
29632974
indexer = self._get_level_indexer(key, level=level)
29642975
return indexer, maybe_mi_droplevels(indexer, [level], drop_level)
29652976

2966-
def _get_level_indexer(self, key, level=0, indexer=None):
2977+
def _get_level_indexer(self, key, level: int = 0, indexer=None):
2978+
# `level` kwarg is _always_ positional, never name
29672979
# return an indexer, boolean array or a slice showing where the key is
29682980
# in the totality of values
29692981
# if the indexer is provided, then use this
@@ -3767,13 +3779,13 @@ def maybe_droplevels(index, key):
37673779
if isinstance(key, tuple):
37683780
for _ in key:
37693781
try:
3770-
index = index.droplevel(0)
3782+
index = index._drop_level_numbers([0])
37713783
except ValueError:
37723784
# we have dropped too much, so back out
37733785
return original_index
37743786
else:
37753787
try:
3776-
index = index.droplevel(0)
3788+
index = index._drop_level_numbers([0])
37773789
except ValueError:
37783790
pass
37793791

pandas/io/parsers.py

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -794,10 +794,8 @@ def __init__(self, f, engine=None, **kwds):
794794

795795
_validate_skipfooter(kwds)
796796

797-
if kwds.get("dialect") is not None:
798-
dialect = kwds["dialect"]
799-
if dialect in csv.list_dialects():
800-
dialect = csv.get_dialect(dialect)
797+
dialect = _extract_dialect(kwds)
798+
if dialect is not None:
801799
kwds = _merge_with_dialect_properties(dialect, kwds)
802800

803801
if kwds.get("header", "infer") == "infer":
@@ -3739,6 +3737,50 @@ def _refine_defaults_read(
37393737
return kwds
37403738

37413739

3740+
def _extract_dialect(kwds: Dict[str, Any]) -> Optional[csv.Dialect]:
3741+
"""
3742+
Extract concrete csv dialect instance.
3743+
3744+
Returns
3745+
-------
3746+
csv.Dialect or None
3747+
"""
3748+
if kwds.get("dialect") is None:
3749+
return None
3750+
3751+
dialect = kwds["dialect"]
3752+
if dialect in csv.list_dialects():
3753+
dialect = csv.get_dialect(dialect)
3754+
3755+
_validate_dialect(dialect)
3756+
3757+
return dialect
3758+
3759+
3760+
MANDATORY_DIALECT_ATTRS = (
3761+
"delimiter",
3762+
"doublequote",
3763+
"escapechar",
3764+
"skipinitialspace",
3765+
"quotechar",
3766+
"quoting",
3767+
)
3768+
3769+
3770+
def _validate_dialect(dialect: csv.Dialect) -> None:
3771+
"""
3772+
Validate csv dialect instance.
3773+
3774+
Raises
3775+
------
3776+
ValueError
3777+
If incorrect dialect is provided.
3778+
"""
3779+
for param in MANDATORY_DIALECT_ATTRS:
3780+
if not hasattr(dialect, param):
3781+
raise ValueError(f"Invalid dialect {dialect} provided")
3782+
3783+
37423784
def _merge_with_dialect_properties(
37433785
dialect: csv.Dialect,
37443786
defaults: Dict[str, Any],
@@ -3757,30 +3799,11 @@ def _merge_with_dialect_properties(
37573799
-------
37583800
kwds : dict
37593801
Updated keyword arguments, merged with dialect parameters.
3760-
3761-
Raises
3762-
------
3763-
ValueError
3764-
If incorrect dialect is provided.
37653802
"""
37663803
kwds = defaults.copy()
37673804

3768-
# Any valid dialect should have these attributes.
3769-
# If any are missing, we will raise automatically.
3770-
mandatory_dialect_attrs = (
3771-
"delimiter",
3772-
"doublequote",
3773-
"escapechar",
3774-
"skipinitialspace",
3775-
"quotechar",
3776-
"quoting",
3777-
)
3778-
3779-
for param in mandatory_dialect_attrs:
3780-
try:
3781-
dialect_val = getattr(dialect, param)
3782-
except AttributeError as err:
3783-
raise ValueError(f"Invalid dialect {dialect} provided") from err
3805+
for param in MANDATORY_DIALECT_ATTRS:
3806+
dialect_val = getattr(dialect, param)
37843807

37853808
parser_default = _parser_defaults[param]
37863809
provided = kwds.get(param, parser_default)

0 commit comments

Comments
 (0)