Skip to content

Commit 946492f

Browse files
authored
Merge branch 'main' into fix/resample-interpolate-fails-with-inplace-true-58690-remove-inplace-option
2 parents 9ff7ec9 + bbe0e53 commit 946492f

24 files changed

+418
-59
lines changed

ci/code_checks.sh

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7171
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
7373
-i "pandas.DataFrame.max RT03" \
74-
-i "pandas.DataFrame.mean RT03,SA01" \
75-
-i "pandas.DataFrame.median RT03,SA01" \
74+
-i "pandas.DataFrame.mean RT03" \
75+
-i "pandas.DataFrame.median RT03" \
7676
-i "pandas.DataFrame.min RT03" \
7777
-i "pandas.DataFrame.plot PR02" \
7878
-i "pandas.Grouper PR02" \
@@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8484
-i "pandas.MultiIndex.get_loc_level PR07" \
8585
-i "pandas.MultiIndex.levshape SA01" \
8686
-i "pandas.MultiIndex.names SA01" \
87-
-i "pandas.MultiIndex.nlevels SA01" \
8887
-i "pandas.MultiIndex.remove_unused_levels RT03,SA01" \
8988
-i "pandas.MultiIndex.reorder_levels RT03,SA01" \
9089
-i "pandas.MultiIndex.set_levels RT03,SA01" \
@@ -465,7 +464,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
465464
-i "pandas.io.stata.StataReader.variable_labels RT03,SA01" \
466465
-i "pandas.io.stata.StataWriter.write_file SA01" \
467466
-i "pandas.json_normalize RT03,SA01" \
468-
-i "pandas.merge PR07" \
469467
-i "pandas.merge_asof PR07,RT03" \
470468
-i "pandas.period_range RT03,SA01" \
471469
-i "pandas.plotting.andrews_curves RT03,SA01" \

doc/source/user_guide/missing_data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ this behaviour and include NA values in the calculation, use ``skipna=False``.
353353
Dropping missing data
354354
~~~~~~~~~~~~~~~~~~~~~
355355

356-
:meth:`~DataFrame.dropna` dropa rows or columns with missing data.
356+
:meth:`~DataFrame.dropna` drops rows or columns with missing data.
357357

358358
.. ipython:: python
359359

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,10 @@ I/O
545545
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
546546
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
547547
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
548+
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
548549
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
549550
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
551+
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
550552

551553
Period
552554
^^^^^^
@@ -597,6 +599,7 @@ Other
597599
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
598600
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
599601
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
602+
- Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
600603
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
601604
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
602605
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)

pandas/core/_numba/executor.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,19 @@
1414

1515
from pandas.compat._optional import import_optional_dependency
1616

17+
from pandas.core.util.numba_ import jit_user_function
18+
1719

1820
@functools.cache
1921
def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
2022
if TYPE_CHECKING:
2123
import numba
2224
else:
2325
numba = import_optional_dependency("numba")
24-
nb_compat_func = numba.extending.register_jitable(func)
26+
nb_compat_func = jit_user_function(func)
2527

2628
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
27-
def nb_looper(values, axis):
29+
def nb_looper(values, axis, *args):
2830
# Operate on the first row/col in order to get
2931
# the output shape
3032
if axis == 0:
@@ -33,7 +35,7 @@ def nb_looper(values, axis):
3335
else:
3436
first_elem = values[0]
3537
dim0 = values.shape[0]
36-
res0 = nb_compat_func(first_elem)
38+
res0 = nb_compat_func(first_elem, *args)
3739
# Use np.asarray to get shape for
3840
# https://github.com/numba/numba/issues/4202#issuecomment-1185981507
3941
buf_shape = (dim0,) + np.atleast_1d(np.asarray(res0)).shape
@@ -44,11 +46,11 @@ def nb_looper(values, axis):
4446
if axis == 1:
4547
buff[0] = res0
4648
for i in numba.prange(1, values.shape[0]):
47-
buff[i] = nb_compat_func(values[i])
49+
buff[i] = nb_compat_func(values[i], *args)
4850
else:
4951
buff[:, 0] = res0
5052
for j in numba.prange(1, values.shape[1]):
51-
buff[:, j] = nb_compat_func(values[:, j])
53+
buff[:, j] = nb_compat_func(values[:, j], *args)
5254
return buff
5355

5456
return nb_looper

pandas/core/apply.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
from pandas.core._numba.executor import generate_apply_looper
5252
import pandas.core.common as com
5353
from pandas.core.construction import ensure_wrapped_if_datetimelike
54+
from pandas.core.util.numba_ import (
55+
get_jit_arguments,
56+
prepare_function_arguments,
57+
)
5458

5559
if TYPE_CHECKING:
5660
from collections.abc import (
@@ -70,7 +74,6 @@
7074
from pandas.core.resample import Resampler
7175
from pandas.core.window.rolling import BaseWindow
7276

73-
7477
ResType = dict[int, Any]
7578

7679

@@ -997,17 +1000,20 @@ def wrapper(*args, **kwargs):
9971000
return wrapper
9981001

9991002
if engine == "numba":
1000-
engine_kwargs = {} if engine_kwargs is None else engine_kwargs
1001-
1003+
args, kwargs = prepare_function_arguments(
1004+
self.func, # type: ignore[arg-type]
1005+
self.args,
1006+
self.kwargs,
1007+
)
10021008
# error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
10031009
# incompatible type "Callable[..., Any] | str | list[Callable
10041010
# [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
10051011
# list[Callable[..., Any] | str]]"; expected "Hashable"
10061012
nb_looper = generate_apply_looper(
10071013
self.func, # type: ignore[arg-type]
1008-
**engine_kwargs,
1014+
**get_jit_arguments(engine_kwargs, kwargs),
10091015
)
1010-
result = nb_looper(self.values, self.axis)
1016+
result = nb_looper(self.values, self.axis, *args)
10111017
# If we made the result 2-D, squeeze it back to 1-D
10121018
result = np.squeeze(result)
10131019
else:
@@ -1148,21 +1154,23 @@ def generate_numba_apply_func(
11481154
# Currently the parallel argument doesn't get passed through here
11491155
# (it's disabled) since the dicts in numba aren't thread-safe.
11501156
@numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
1151-
def numba_func(values, col_names, df_index):
1157+
def numba_func(values, col_names, df_index, *args):
11521158
results = {}
11531159
for j in range(values.shape[1]):
11541160
# Create the series
11551161
ser = Series(
11561162
values[:, j], index=df_index, name=maybe_cast_str(col_names[j])
11571163
)
1158-
results[j] = jitted_udf(ser)
1164+
results[j] = jitted_udf(ser, *args)
11591165
return results
11601166

11611167
return numba_func
11621168

11631169
def apply_with_numba(self) -> dict[int, Any]:
1170+
func = cast(Callable, self.func)
1171+
args, kwargs = prepare_function_arguments(func, self.args, self.kwargs)
11641172
nb_func = self.generate_numba_apply_func(
1165-
cast(Callable, self.func), **self.engine_kwargs
1173+
func, **get_jit_arguments(self.engine_kwargs, kwargs)
11661174
)
11671175
from pandas.core._numba.extensions import set_numba_data
11681176

@@ -1177,7 +1185,7 @@ def apply_with_numba(self) -> dict[int, Any]:
11771185
# Convert from numba dict to regular dict
11781186
# Our isinstance checks in the df constructor don't pass for numbas typed dict
11791187
with set_numba_data(index) as index, set_numba_data(columns) as columns:
1180-
res = dict(nb_func(self.values, columns, index))
1188+
res = dict(nb_func(self.values, columns, index, *args))
11811189
return res
11821190

11831191
@property
@@ -1285,7 +1293,7 @@ def generate_numba_apply_func(
12851293
jitted_udf = numba.extending.register_jitable(func)
12861294

12871295
@numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
1288-
def numba_func(values, col_names_index, index):
1296+
def numba_func(values, col_names_index, index, *args):
12891297
results = {}
12901298
# Currently the parallel argument doesn't get passed through here
12911299
# (it's disabled) since the dicts in numba aren't thread-safe.
@@ -1297,15 +1305,17 @@ def numba_func(values, col_names_index, index):
12971305
index=col_names_index,
12981306
name=maybe_cast_str(index[i]),
12991307
)
1300-
results[i] = jitted_udf(ser)
1308+
results[i] = jitted_udf(ser, *args)
13011309

13021310
return results
13031311

13041312
return numba_func
13051313

13061314
def apply_with_numba(self) -> dict[int, Any]:
1315+
func = cast(Callable, self.func)
1316+
args, kwargs = prepare_function_arguments(func, self.args, self.kwargs)
13071317
nb_func = self.generate_numba_apply_func(
1308-
cast(Callable, self.func), **self.engine_kwargs
1318+
func, **get_jit_arguments(self.engine_kwargs, kwargs)
13091319
)
13101320

13111321
from pandas.core._numba.extensions import set_numba_data
@@ -1316,7 +1326,7 @@ def apply_with_numba(self) -> dict[int, Any]:
13161326
set_numba_data(self.obj.index) as index,
13171327
set_numba_data(self.columns) as columns,
13181328
):
1319-
res = dict(nb_func(self.values, columns, index))
1329+
res = dict(nb_func(self.values, columns, index, *args))
13201330

13211331
return res
13221332

pandas/core/generic.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12571,7 +12571,7 @@ def make_doc(name: str, ndim: int) -> str:
1257112571
elif name == "median":
1257212572
base_doc = _num_doc
1257312573
desc = "Return the median of the values over the requested axis."
12574-
see_also = ""
12574+
see_also = _stat_func_see_also
1257512575
examples = """
1257612576
1257712577
Examples
@@ -12612,7 +12612,7 @@ def make_doc(name: str, ndim: int) -> str:
1261212612
elif name == "mean":
1261312613
base_doc = _num_doc
1261412614
desc = "Return the mean of the values over the requested axis."
12615-
see_also = ""
12615+
see_also = _stat_func_see_also
1261612616
examples = """
1261712617
1261812618
Examples
@@ -12760,6 +12760,7 @@ def make_doc(name: str, ndim: int) -> str:
1276012760
a 0.0
1276112761
dtype: float64"""
1276212762
kwargs = {"min_count": ""}
12763+
1276312764
elif name == "kurt":
1276412765
base_doc = _num_doc
1276512766
desc = (

pandas/core/indexes/multi.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,13 @@ def nlevels(self) -> int:
10311031
"""
10321032
Integer number of levels in this MultiIndex.
10331033
1034+
See Also
1035+
--------
1036+
MultiIndex.levels : Get the levels of the MultiIndex.
1037+
MultiIndex.codes : Get the codes of the MultiIndex.
1038+
MultiIndex.from_arrays : Convert arrays to MultiIndex.
1039+
MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
1040+
10341041
Examples
10351042
--------
10361043
>>> mi = pd.MultiIndex.from_arrays([["a"], ["b"], ["c"]])

0 commit comments

Comments
 (0)