Skip to content

Commit f41f40f

Browse files
committed
Merge branch 'master' into api-ri
2 parents 31ecb65 + 22f8ee8 commit f41f40f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1036
-569
lines changed

.github/workflows/ci.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,20 @@ jobs:
176176
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
177177
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
178178
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
179+
180+
pytest pandas/tests/api/
181+
pytest pandas/tests/base/
182+
pytest pandas/tests/computation/
183+
pytest pandas/tests/config/
184+
pytest pandas/tests/dtypes/
185+
pytest pandas/tests/generic/
186+
pytest pandas/tests/indexes/
187+
pytest pandas/tests/libs/
188+
pytest pandas/tests/plotting/
189+
pytest pandas/tests/scalar/
190+
pytest pandas/tests/strings/
191+
pytest pandas/tests/tools/
192+
pytest pandas/tests/tseries/
193+
pytest pandas/tests/tslibs/
194+
pytest pandas/tests/util/
195+
pytest pandas/tests/window/

asv_bench/benchmarks/frame_methods.py

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
date_range,
1212
isnull,
1313
period_range,
14+
timedelta_range,
1415
)
1516

1617
from .pandas_vb_common import tm
@@ -52,6 +53,7 @@ def setup(self):
5253
N = 10 ** 3
5354
self.df = DataFrame(np.random.randn(N * 10, N))
5455
self.idx = np.arange(4 * N, 7 * N)
56+
self.idx_cols = np.random.randint(0, N, N)
5557
self.df2 = DataFrame(
5658
{
5759
c: {
@@ -68,6 +70,9 @@ def time_reindex_axis0(self):
6870
self.df.reindex(self.idx)
6971

7072
def time_reindex_axis1(self):
73+
self.df.reindex(columns=self.idx_cols)
74+
75+
def time_reindex_axis1_missing(self):
7176
self.df.reindex(columns=self.idx)
7277

7378
def time_reindex_both_axes(self):
@@ -351,15 +356,42 @@ def time_isnull_obj(self):
351356

352357
class Fillna:
353358

354-
params = ([True, False], ["pad", "bfill"])
355-
param_names = ["inplace", "method"]
356-
357-
def setup(self, inplace, method):
358-
values = np.random.randn(10000, 100)
359-
values[::2] = np.nan
360-
self.df = DataFrame(values)
361-
362-
def time_frame_fillna(self, inplace, method):
359+
params = (
360+
[True, False],
361+
["pad", "bfill"],
362+
[
363+
"float64",
364+
"float32",
365+
"object",
366+
"Int64",
367+
"Float64",
368+
"datetime64[ns]",
369+
"datetime64[ns, tz]",
370+
"timedelta64[ns]",
371+
],
372+
)
373+
param_names = ["inplace", "method", "dtype"]
374+
375+
def setup(self, inplace, method, dtype):
376+
N, M = 10000, 100
377+
if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
378+
data = {
379+
"datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
380+
"datetime64[ns, tz]": date_range(
381+
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
382+
),
383+
"timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
384+
}
385+
self.df = DataFrame({f"col_{i}": data[dtype] for i in range(M)})
386+
self.df[::2] = None
387+
else:
388+
values = np.random.randn(N, M)
389+
values[::2] = np.nan
390+
if dtype == "Int64":
391+
values = values.round()
392+
self.df = DataFrame(values, dtype=dtype)
393+
394+
def time_frame_fillna(self, inplace, method, dtype):
363395
self.df.fillna(inplace=inplace, method=method)
364396

365397

doc/source/reference/style.rst

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ Style application
3535
Styler.applymap
3636
Styler.where
3737
Styler.format
38-
Styler.set_precision
3938
Styler.set_td_classes
4039
Styler.set_table_styles
4140
Styler.set_table_attributes
@@ -44,7 +43,6 @@ Style application
4443
Styler.set_caption
4544
Styler.set_properties
4645
Styler.set_uuid
47-
Styler.set_na_rep
4846
Styler.clear
4947
Styler.pipe
5048

@@ -53,9 +51,9 @@ Builtin styles
5351
.. autosummary::
5452
:toctree: api/
5553

54+
Styler.highlight_null
5655
Styler.highlight_max
5756
Styler.highlight_min
58-
Styler.highlight_null
5957
Styler.background_gradient
6058
Styler.bar
6159

doc/source/whatsnew/v1.3.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ Other enhancements
135135
- :meth:`.Styler.set_tooltips_class` and :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`)
136136
- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`)
137137
- :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`)
138+
- Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`)
138139
- :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
139140
- :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.
140141
- Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`)
@@ -364,6 +365,8 @@ Deprecations
364365
- Deprecated :meth:`core.window.ewm.ExponentialMovingWindow.vol` (:issue:`39220`)
365366
- Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
366367
- Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`)
368+
- Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`)
369+
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`)
367370

368371
.. ---------------------------------------------------------------------------
369372
@@ -375,6 +378,7 @@ Performance improvements
375378
- Performance improvement in :meth:`IntervalIndex.isin` (:issue:`38353`)
376379
- Performance improvement in :meth:`Series.mean` for nullable data types (:issue:`34814`)
377380
- Performance improvement in :meth:`Series.isin` for nullable data types (:issue:`38340`)
381+
- Performance improvement in :meth:`DataFrame.fillna` with ``method="pad|backfill"`` for nullable floating and nullable integer dtypes (:issue:`39953`)
378382
- Performance improvement in :meth:`DataFrame.corr` for method=kendall (:issue:`28329`)
379383
- Performance improvement in :meth:`core.window.rolling.Rolling.corr` and :meth:`core.window.rolling.Rolling.cov` (:issue:`39388`)
380384
- Performance improvement in :meth:`core.window.rolling.RollingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` (:issue:`39591`)

pandas/_libs/algos.pyx

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,10 +597,11 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
597597

598598
@cython.boundscheck(False)
599599
@cython.wraparound(False)
600-
def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
600+
def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
601601
cdef:
602602
Py_ssize_t i, N
603603
algos_t val
604+
uint8_t prev_mask
604605
int lim, fill_count = 0
605606

606607
N = len(values)
@@ -612,15 +613,18 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
612613
lim = validate_limit(N, limit)
613614

614615
val = values[0]
616+
prev_mask = mask[0]
615617
for i in range(N):
616618
if mask[i]:
617619
if fill_count >= lim:
618620
continue
619621
fill_count += 1
620622
values[i] = val
623+
mask[i] = prev_mask
621624
else:
622625
fill_count = 0
623626
val = values[i]
627+
prev_mask = mask[i]
624628

625629

626630
@cython.boundscheck(False)
@@ -739,10 +743,11 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
739743

740744
@cython.boundscheck(False)
741745
@cython.wraparound(False)
742-
def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
746+
def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
743747
cdef:
744748
Py_ssize_t i, N
745749
algos_t val
750+
uint8_t prev_mask
746751
int lim, fill_count = 0
747752

748753
N = len(values)
@@ -754,15 +759,18 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
754759
lim = validate_limit(N, limit)
755760

756761
val = values[N - 1]
762+
prev_mask = mask[N - 1]
757763
for i in range(N - 1, -1, -1):
758764
if mask[i]:
759765
if fill_count >= lim:
760766
continue
761767
fill_count += 1
762768
values[i] = val
769+
mask[i] = prev_mask
763770
else:
764771
fill_count = 0
765772
val = values[i]
773+
prev_mask = mask[i]
766774

767775

768776
@cython.boundscheck(False)

pandas/_libs/internals.pyx

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,24 @@ cimport numpy as cnp
1515
from numpy cimport (
1616
NPY_INT64,
1717
int64_t,
18+
ndarray,
1819
)
1920

2021
cnp.import_array()
2122

2223
from pandas._libs.algos import ensure_int64
24+
from pandas._libs.util cimport is_integer_object
2325

2426

2527
@cython.final
2628
cdef class BlockPlacement:
2729
# __slots__ = '_as_slice', '_as_array', '_len'
2830
cdef:
2931
slice _as_slice
30-
object _as_array
32+
ndarray _as_array # Note: this still allows `None`
3133
bint _has_slice, _has_array, _is_known_slice_like
3234

33-
def __init__(self, val):
35+
def __cinit__(self, val):
3436
cdef:
3537
slice slc
3638

@@ -39,7 +41,7 @@ cdef class BlockPlacement:
3941
self._has_slice = False
4042
self._has_array = False
4143

42-
if isinstance(val, int):
44+
if is_integer_object(val):
4345
slc = slice(val, val + 1, 1)
4446
self._as_slice = slc
4547
self._has_slice = True
@@ -160,12 +162,12 @@ cdef class BlockPlacement:
160162
np.concatenate([self.as_array] + [o.as_array for o in others])
161163
)
162164

163-
cdef iadd(self, other):
165+
cdef BlockPlacement iadd(self, other):
164166
cdef:
165167
slice s = self._ensure_has_slice()
166168
Py_ssize_t other_int, start, stop, step, l
167169

168-
if isinstance(other, int) and s is not None:
170+
if is_integer_object(other) and s is not None:
169171
other_int = <Py_ssize_t>other
170172

171173
if other_int == 0:
@@ -438,13 +440,13 @@ def get_blkno_placements(blknos, group: bool = True):
438440
"""
439441
Parameters
440442
----------
441-
blknos : array of int64
443+
blknos : np.ndarray[int64]
442444
group : bool, default True
443445
444446
Returns
445447
-------
446448
iterator
447-
yield (BlockPlacement, blkno)
449+
yield (blkno, BlockPlacement)
448450
"""
449451
blknos = ensure_int64(blknos)
450452

pandas/_testing/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -560,18 +560,18 @@ def makeCustomIndex(
560560
names = [names]
561561

562562
# specific 1D index type requested?
563-
idx_func = {
563+
idx_func_dict: dict[str, Callable[..., Index]] = {
564564
"i": makeIntIndex,
565565
"f": makeFloatIndex,
566566
"s": makeStringIndex,
567567
"u": makeUnicodeIndex,
568568
"dt": makeDateIndex,
569569
"td": makeTimedeltaIndex,
570570
"p": makePeriodIndex,
571-
}.get(idx_type)
571+
}
572+
idx_func = idx_func_dict.get(idx_type)
572573
if idx_func:
573-
# error: Cannot call function of unknown type
574-
idx = idx_func(nentries) # type: ignore[operator]
574+
idx = idx_func(nentries)
575575
# but we need to fill in the name
576576
if names:
577577
idx.name = names[0]

pandas/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def add_imports(doctest_namespace):
190190
# ----------------------------------------------------------------
191191
# Common arguments
192192
# ----------------------------------------------------------------
193-
@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis {repr(x)}")
193+
@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis={repr(x)}")
194194
def axis(request):
195195
"""
196196
Fixture for returning the axis numbers of a DataFrame.

pandas/core/apply.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Union,
1616
cast,
1717
)
18+
import warnings
1819

1920
import numpy as np
2021

@@ -267,6 +268,7 @@ def transform_dict_like(self, func):
267268
func = self.normalize_dictlike_arg("transform", obj, func)
268269

269270
results: Dict[Hashable, FrameOrSeriesUnion] = {}
271+
failed_names = []
270272
for name, how in func.items():
271273
colg = obj._gotitem(name, ndim=1)
272274
try:
@@ -277,10 +279,20 @@ def transform_dict_like(self, func):
277279
"No transform functions were provided",
278280
}:
279281
raise err
280-
282+
else:
283+
failed_names.append(name)
281284
# combine results
282285
if not results:
283286
raise ValueError("Transform function failed")
287+
if len(failed_names) > 0:
288+
warnings.warn(
289+
f"{failed_names} did not transform successfully. "
290+
f"Allowing for partial failure is deprecated, this will raise "
291+
f"a ValueError in a future version of pandas."
292+
f"Drop these columns/ops to avoid this warning.",
293+
FutureWarning,
294+
stacklevel=4,
295+
)
284296
return concat(results, axis=1)
285297

286298
def transform_str_or_callable(self, func) -> FrameOrSeriesUnion:

0 commit comments

Comments
 (0)