Skip to content

Commit 78cd01e

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into cln-comp
2 parents dcbfe4e + e131b21 commit 78cd01e

File tree

15 files changed

+269
-142
lines changed

15 files changed

+269
-142
lines changed

pandas/_libs/lib.pyx

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def is_scalar(val: object) -> bool:
125125
- Interval
126126
- DateOffset
127127
- Fraction
128-
- Number
128+
- Number.
129129

130130
Returns
131131
-------
@@ -867,9 +867,10 @@ def is_list_like(obj: object, allow_sets: bool = True):
867867
868868
Parameters
869869
----------
870-
obj : The object to check
871-
allow_sets : boolean, default True
872-
If this parameter is False, sets will not be considered list-like
870+
obj : object
871+
The object to check.
872+
allow_sets : bool, default True
873+
If this parameter is False, sets will not be considered list-like.
873874
874875
.. versionadded:: 0.24.0
875876

pandas/core/arrays/interval.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,9 @@ def _from_factorized(cls, values, original):
260260
Whether the intervals are closed on the left-side, right-side, both
261261
or neither.
262262
copy : bool, default False
263-
copy the data
263+
Copy the data.
264264
dtype : dtype or None, default None
265-
If None, dtype will be inferred
265+
If None, dtype will be inferred.
266266
267267
.. versionadded:: 0.23.0
268268
@@ -383,16 +383,16 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
383383
Parameters
384384
----------
385385
data : array-like (1-dimensional)
386-
Array of tuples
386+
Array of tuples.
387387
closed : {'left', 'right', 'both', 'neither'}, default 'right'
388388
Whether the intervals are closed on the left-side, right-side, both
389389
or neither.
390390
copy : bool, default False
391-
by-default copy the data, this is compat only and ignored
391+
By-default copy the data, this is compat only and ignored.
392392
dtype : dtype or None, default None
393-
If None, dtype will be inferred
393+
If None, dtype will be inferred.
394394
395-
..versionadded:: 0.23.0
395+
.. versionadded:: 0.23.0
396396
397397
Returns
398398
-------

pandas/core/dtypes/concat.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,13 +185,14 @@ def concat_categorical(to_concat, axis=0):
185185

186186
def union_categoricals(to_union, sort_categories=False, ignore_order=False):
187187
"""
188-
Combine list-like of Categorical-like, unioning categories. All
189-
categories must have the same dtype.
188+
Combine list-like of Categorical-like, unioning categories.
189+
190+
All categories must have the same dtype.
190191
191192
Parameters
192193
----------
193-
to_union : list-like of Categorical, CategoricalIndex,
194-
or Series with dtype='category'
194+
to_union : list-like
195+
Categorical, CategoricalIndex, or Series with dtype='category'.
195196
sort_categories : bool, default False
196197
If true, resulting categories will be lexsorted, otherwise
197198
they will be ordered as they appear in the data.
@@ -201,7 +202,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
201202
202203
Returns
203204
-------
204-
result : Categorical
205+
Categorical
205206
206207
Raises
207208
------

pandas/core/groupby/generic.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -655,16 +655,17 @@ def value_counts(
655655
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
656656

657657
# multi-index components
658-
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
658+
codes = self.grouper.recons_codes
659+
codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
659660
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
660661
names = self.grouper.names + [self._selection_name]
661662

662663
if dropna:
663-
mask = labels[-1] != -1
664+
mask = codes[-1] != -1
664665
if mask.all():
665666
dropna = False
666667
else:
667-
out, labels = out[mask], [label[mask] for label in labels]
668+
out, codes = out[mask], [level_codes[mask] for level_codes in codes]
668669

669670
if normalize:
670671
out = out.astype("float")
@@ -680,11 +681,11 @@ def value_counts(
680681
if sort and bins is None:
681682
cat = ids[inc][mask] if dropna else ids[inc]
682683
sorter = np.lexsort((out if ascending else -out, cat))
683-
out, labels[-1] = out[sorter], labels[-1][sorter]
684+
out, codes[-1] = out[sorter], codes[-1][sorter]
684685

685686
if bins is None:
686687
mi = MultiIndex(
687-
levels=levels, codes=labels, names=names, verify_integrity=False
688+
levels=levels, codes=codes, names=names, verify_integrity=False
688689
)
689690

690691
if is_integer_dtype(out):
@@ -694,14 +695,14 @@ def value_counts(
694695
# for compat. with libgroupby.value_counts need to ensure every
695696
# bin is present at every index level, null filled with zeros
696697
diff = np.zeros(len(out), dtype="bool")
697-
for lab in labels[:-1]:
698-
diff |= np.r_[True, lab[1:] != lab[:-1]]
698+
for level_codes in codes[:-1]:
699+
diff |= np.r_[True, level_codes[1:] != level_codes[:-1]]
699700

700701
ncat, nbin = diff.sum(), len(levels[-1])
701702

702703
left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
703704

704-
right = [diff.cumsum() - 1, labels[-1]]
705+
right = [diff.cumsum() - 1, codes[-1]]
705706

706707
_, idx = _get_join_indexers(left, right, sort=False, how="left")
707708
out = np.where(idx != -1, out[idx], 0)
@@ -711,7 +712,10 @@ def value_counts(
711712
out, left[-1] = out[sorter], left[-1][sorter]
712713

713714
# build the multi-index w/ full levels
714-
codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
715+
def build_codes(lev_codes: np.ndarray) -> np.ndarray:
716+
return np.repeat(lev_codes[diff], nbin)
717+
718+
codes = [build_codes(lev_codes) for lev_codes in codes[:-1]]
715719
codes.append(left[-1])
716720

717721
mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
@@ -758,7 +762,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
758762
)
759763
)
760764
filled = getattr(self, fill_method)(limit=limit)
761-
fill_grp = filled.groupby(self.grouper.labels)
765+
fill_grp = filled.groupby(self.grouper.codes)
762766
shifted = fill_grp.shift(periods=periods, freq=freq)
763767

764768
return (filled / shifted) - 1

pandas/core/groupby/groupby.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -899,10 +899,21 @@ def _python_agg_general(self, func, *args, **kwargs):
899899
output = {}
900900
for name, obj in self._iterate_slices():
901901
try:
902-
result, counts = self.grouper.agg_series(obj, f)
902+
# if this function is invalid for this dtype, we will ignore it.
903+
func(obj[:0])
903904
except TypeError:
904905
continue
905-
else:
906+
except AssertionError:
907+
raise
908+
except Exception:
909+
# Our function depends on having a non-empty argument
910+
# See test_groupby_agg_err_catching
911+
pass
912+
913+
result, counts = self.grouper.agg_series(obj, f)
914+
if result is not None:
915+
# TODO: only 3 test cases get None here, do something
916+
# in those cases
906917
output[name] = self._try_cast(result, obj, numeric_only=True)
907918

908919
if len(output) == 0:
@@ -2338,7 +2349,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0
23382349
)
23392350
)
23402351
filled = getattr(self, fill_method)(limit=limit)
2341-
fill_grp = filled.groupby(self.grouper.labels)
2352+
fill_grp = filled.groupby(self.grouper.codes)
23422353
shifted = fill_grp.shift(periods=periods, freq=freq)
23432354
return (filled / shifted) - 1
23442355

pandas/core/groupby/grouper.py

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
split-apply-combine paradigm.
44
"""
55

6-
from typing import Tuple
6+
from typing import Optional, Tuple
77
import warnings
88

99
import numpy as np
@@ -21,6 +21,7 @@
2121
)
2222
from pandas.core.dtypes.generic import ABCSeries
2323

24+
from pandas._typing import FrameOrSeries
2425
import pandas.core.algorithms as algorithms
2526
from pandas.core.arrays import Categorical, ExtensionArray
2627
import pandas.core.common as com
@@ -228,10 +229,10 @@ class Grouping:
228229
----------
229230
index : Index
230231
grouper :
231-
obj :
232+
obj Union[DataFrame, Series]:
232233
name :
233234
level :
234-
observed : boolean, default False
235+
observed : bool, default False
235236
If we are a Categorical, use the observed values
236237
in_axis : if the Grouping is a column in self.obj and hence among
237238
Groupby.exclusions list
@@ -240,25 +241,22 @@ class Grouping:
240241
-------
241242
**Attributes**:
242243
* indices : dict of {group -> index_list}
243-
* labels : ndarray, group labels
244-
* ids : mapping of label -> group
245-
* counts : array of group counts
244+
* codes : ndarray, group codes
246245
* group_index : unique groups
247246
* groups : dict of {group -> label_list}
248247
"""
249248

250249
def __init__(
251250
self,
252-
index,
251+
index: Index,
253252
grouper=None,
254-
obj=None,
253+
obj: Optional[FrameOrSeries] = None,
255254
name=None,
256255
level=None,
257-
sort=True,
258-
observed=False,
259-
in_axis=False,
256+
sort: bool = True,
257+
observed: bool = False,
258+
in_axis: bool = False,
260259
):
261-
262260
self.name = name
263261
self.level = level
264262
self.grouper = _convert_grouper(index, grouper)
@@ -290,12 +288,12 @@ def __init__(
290288
if self.name is None:
291289
self.name = index.names[level]
292290

293-
self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501
291+
self.grouper, self._codes, self._group_index = index._get_grouper_for_level( # noqa: E501
294292
self.grouper, level
295293
)
296294

297295
# a passed Grouper like, directly get the grouper in the same way
298-
# as single grouper groupby, use the group_info to get labels
296+
# as single grouper groupby, use the group_info to get codes
299297
elif isinstance(self.grouper, Grouper):
300298
# get the new grouper; we already have disambiguated
301299
# what key/level refer to exactly, don't need to
@@ -308,7 +306,7 @@ def __init__(
308306
self.grouper = grouper._get_grouper()
309307

310308
else:
311-
if self.grouper is None and self.name is not None:
309+
if self.grouper is None and self.name is not None and self.obj is not None:
312310
self.grouper = self.obj[self.name]
313311

314312
elif isinstance(self.grouper, (list, tuple)):
@@ -324,7 +322,7 @@ def __init__(
324322

325323
# we make a CategoricalIndex out of the cat grouper
326324
# preserving the categories / ordered attributes
327-
self._labels = self.grouper.codes
325+
self._codes = self.grouper.codes
328326
if observed:
329327
codes = algorithms.unique1d(self.grouper.codes)
330328
codes = codes[codes != -1]
@@ -380,11 +378,11 @@ def __repr__(self):
380378
def __iter__(self):
381379
return iter(self.indices)
382380

383-
_labels = None
384-
_group_index = None
381+
_codes = None # type: np.ndarray
382+
_group_index = None # type: Index
385383

386384
@property
387-
def ngroups(self):
385+
def ngroups(self) -> int:
388386
return len(self.group_index)
389387

390388
@cache_readonly
@@ -397,38 +395,38 @@ def indices(self):
397395
return values._reverse_indexer()
398396

399397
@property
400-
def labels(self):
401-
if self._labels is None:
402-
self._make_labels()
403-
return self._labels
398+
def codes(self) -> np.ndarray:
399+
if self._codes is None:
400+
self._make_codes()
401+
return self._codes
404402

405403
@cache_readonly
406-
def result_index(self):
404+
def result_index(self) -> Index:
407405
if self.all_grouper is not None:
408406
return recode_from_groupby(self.all_grouper, self.sort, self.group_index)
409407
return self.group_index
410408

411409
@property
412-
def group_index(self):
410+
def group_index(self) -> Index:
413411
if self._group_index is None:
414-
self._make_labels()
412+
self._make_codes()
415413
return self._group_index
416414

417-
def _make_labels(self):
418-
if self._labels is None or self._group_index is None:
415+
def _make_codes(self) -> None:
416+
if self._codes is None or self._group_index is None:
419417
# we have a list of groupers
420418
if isinstance(self.grouper, BaseGrouper):
421-
labels = self.grouper.label_info
419+
codes = self.grouper.codes_info
422420
uniques = self.grouper.result_index
423421
else:
424-
labels, uniques = algorithms.factorize(self.grouper, sort=self.sort)
422+
codes, uniques = algorithms.factorize(self.grouper, sort=self.sort)
425423
uniques = Index(uniques, name=self.name)
426-
self._labels = labels
424+
self._codes = codes
427425
self._group_index = uniques
428426

429427
@cache_readonly
430-
def groups(self):
431-
return self.index.groupby(Categorical.from_codes(self.labels, self.group_index))
428+
def groups(self) -> dict:
429+
return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
432430

433431

434432
def _get_grouper(
@@ -678,7 +676,7 @@ def _is_label_like(val):
678676
return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val))
679677

680678

681-
def _convert_grouper(axis, grouper):
679+
def _convert_grouper(axis: Index, grouper):
682680
if isinstance(grouper, dict):
683681
return grouper.get
684682
elif isinstance(grouper, Series):

0 commit comments

Comments
 (0)