Skip to content

Commit 1938311

Browse files
Merge remote-tracking branch 'upstream/master' into future-1
2 parents a0adc2e + 52f04db commit 1938311

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+512
-659
lines changed

asv_bench/benchmarks/algos/isin.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,6 @@ def setup(self, dtype):
5050

5151
elif dtype in ["category[object]", "category[int]"]:
5252
# Note: sizes are different in this case than others
53-
np.random.seed(1234)
54-
5553
n = 5 * 10 ** 5
5654
sample_size = 100
5755

@@ -99,7 +97,6 @@ class IsinAlmostFullWithRandomInt:
9997
def setup(self, dtype, exponent, title):
10098
M = 3 * 2 ** (exponent - 2)
10199
# 0.77-the maximal share of occupied buckets
102-
np.random.seed(42)
103100
self.series = Series(np.random.randint(0, M, M)).astype(dtype)
104101

105102
values = np.random.randint(0, M, M).astype(dtype)
@@ -132,7 +129,6 @@ class IsinWithRandomFloat:
132129
param_names = ["dtype", "size", "title"]
133130

134131
def setup(self, dtype, size, title):
135-
np.random.seed(42)
136132
self.values = np.random.rand(size)
137133
self.series = Series(self.values).astype(dtype)
138134
np.random.shuffle(self.values)
@@ -179,7 +175,6 @@ class IsinWithArange:
179175

180176
def setup(self, dtype, M, offset_factor):
181177
offset = int(M * offset_factor)
182-
np.random.seed(42)
183178
tmp = Series(np.random.randint(offset, M + offset, 10 ** 6))
184179
self.series = tmp.astype(dtype)
185180
self.values = np.arange(M).astype(dtype)
@@ -290,10 +285,8 @@ def setup(self, dtype, MaxNumber, series_type):
290285
raise NotImplementedError
291286

292287
if series_type == "random_hits":
293-
np.random.seed(42)
294288
array = np.random.randint(0, MaxNumber, N)
295289
if series_type == "random_misses":
296-
np.random.seed(42)
297290
array = np.random.randint(0, MaxNumber, N) + MaxNumber
298291
if series_type == "monotone_hits":
299292
array = np.repeat(np.arange(MaxNumber), N // MaxNumber)
@@ -322,7 +315,6 @@ def setup(self, dtype, series_type):
322315
raise NotImplementedError
323316

324317
if series_type == "random":
325-
np.random.seed(42)
326318
vals = np.random.randint(0, 10 * N, N)
327319
if series_type == "monotone":
328320
vals = np.arange(N)

asv_bench/benchmarks/frame_ctor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ class FromDictwithTimestamp:
6767

6868
def setup(self, offset):
6969
N = 10 ** 3
70-
np.random.seed(1234)
7170
idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N)
7271
df = DataFrame(np.random.randn(N, 10), index=idx)
7372
self.d = df.to_dict()

asv_bench/benchmarks/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,11 +507,11 @@ def time_frame_agg(self, dtype, method):
507507
self.df.groupby("key").agg(method)
508508

509509

510-
class CumminMax:
510+
class Cumulative:
511511
param_names = ["dtype", "method"]
512512
params = [
513513
["float64", "int64", "Float64", "Int64"],
514-
["cummin", "cummax"],
514+
["cummin", "cummax", "cumsum"],
515515
]
516516

517517
def setup(self, dtype, method):

asv_bench/benchmarks/hash_functions.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ class NumericSeriesIndexingShuffled:
6767

6868
def setup(self, index, N):
6969
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
70-
np.random.seed(42)
7170
np.random.shuffle(vals)
7271
indices = index(vals)
7372
self.data = pd.Series(np.arange(N), index=indices)

asv_bench/benchmarks/indexing.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -368,17 +368,14 @@ def setup(self):
368368
self.df = DataFrame(index=range(self.N))
369369

370370
def time_insert(self):
371-
np.random.seed(1234)
372371
for i in range(100):
373372
self.df.insert(0, i, np.random.randn(self.N), allow_duplicates=True)
374373

375374
def time_assign_with_setitem(self):
376-
np.random.seed(1234)
377375
for i in range(100):
378376
self.df[i] = np.random.randn(self.N)
379377

380378
def time_assign_list_like_with_setitem(self):
381-
np.random.seed(1234)
382379
self.df[list(range(100))] = np.random.randn(self.N, 100)
383380

384381
def time_assign_list_of_columns_concat(self):

asv_bench/benchmarks/series_methods.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ class Mode:
145145
param_names = ["N", "dtype"]
146146

147147
def setup(self, N, dtype):
148-
np.random.seed(42)
149148
self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)
150149

151150
def time_mode(self, N, dtype):

ci/code_checks.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
7777
invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
7878
RET=$(($RET + $?)) ; echo $MSG "DONE"
7979

80+
MSG='Check for unnecessary random seeds in asv benchmarks' ; echo $MSG
81+
invgrep -R --exclude pandas_vb_common.py -E 'np.random.seed' asv_bench/benchmarks/
82+
RET=$(($RET + $?)) ; echo $MSG "DONE"
83+
8084
fi
8185

8286
### CODE ###

doc/source/whatsnew/v1.3.0.rst

Lines changed: 59 additions & 75 deletions
Large diffs are not rendered by default.

pandas/_libs/groupby.pyx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -247,24 +247,24 @@ def group_cumsum(numeric[:, ::1] out,
247247
for j in range(K):
248248
val = values[i, j]
249249

250+
# For floats, use Kahan summation to reduce floating-point
251+
# error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
250252
if numeric == float32_t or numeric == float64_t:
251253
if val == val:
252254
y = val - compensation[lab, j]
253255
t = accum[lab, j] + y
254256
compensation[lab, j] = t - accum[lab, j] - y
255257
accum[lab, j] = t
256-
out[i, j] = accum[lab, j]
258+
out[i, j] = t
257259
else:
258260
out[i, j] = NaN
259261
if not skipna:
260262
accum[lab, j] = NaN
261263
break
262264
else:
263-
y = val - compensation[lab, j]
264-
t = accum[lab, j] + y
265-
compensation[lab, j] = t - accum[lab, j] - y
265+
t = val + accum[lab, j]
266266
accum[lab, j] = t
267-
out[i, j] = accum[lab, j]
267+
out[i, j] = t
268268

269269

270270
@cython.boundscheck(False)

pandas/compat/numpy/function.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,9 @@
1515
methods that are spread throughout the codebase. This module will make it
1616
easier to adjust to future upstream changes in the analogous numpy signatures.
1717
"""
18-
from typing import (
19-
Any,
20-
Dict,
21-
Optional,
22-
Union,
23-
)
18+
from __future__ import annotations
19+
20+
from typing import Any
2421

2522
from numpy import ndarray
2623

@@ -41,7 +38,7 @@ def __init__(
4138
self,
4239
defaults,
4340
fname=None,
44-
method: Optional[str] = None,
41+
method: str | None = None,
4542
max_fname_arg_count=None,
4643
):
4744
self.fname = fname
@@ -55,7 +52,7 @@ def __call__(
5552
kwargs,
5653
fname=None,
5754
max_fname_arg_count=None,
58-
method: Optional[str] = None,
55+
method: str | None = None,
5956
) -> None:
6057
if args or kwargs:
6158
fname = self.fname if fname is None else fname
@@ -119,7 +116,7 @@ def validate_argmax_with_skipna(skipna, args, kwargs):
119116
return skipna
120117

121118

122-
ARGSORT_DEFAULTS: Dict[str, Optional[Union[int, str]]] = {}
119+
ARGSORT_DEFAULTS: dict[str, int | str | None] = {}
123120
ARGSORT_DEFAULTS["axis"] = -1
124121
ARGSORT_DEFAULTS["kind"] = "quicksort"
125122
ARGSORT_DEFAULTS["order"] = None
@@ -132,7 +129,7 @@ def validate_argmax_with_skipna(skipna, args, kwargs):
132129

133130
# two different signatures of argsort, this second validation for when the
134131
# `kind` param is supported
135-
ARGSORT_DEFAULTS_KIND: Dict[str, Optional[int]] = {}
132+
ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {}
136133
ARGSORT_DEFAULTS_KIND["axis"] = -1
137134
ARGSORT_DEFAULTS_KIND["order"] = None
138135
validate_argsort_kind = CompatValidator(
@@ -155,7 +152,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs):
155152
return ascending
156153

157154

158-
CLIP_DEFAULTS: Dict[str, Any] = {"out": None}
155+
CLIP_DEFAULTS: dict[str, Any] = {"out": None}
159156
validate_clip = CompatValidator(
160157
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
161158
)
@@ -176,7 +173,7 @@ def validate_clip_with_axis(axis, args, kwargs):
176173
return axis
177174

178175

179-
CUM_FUNC_DEFAULTS: Dict[str, Any] = {}
176+
CUM_FUNC_DEFAULTS: dict[str, Any] = {}
180177
CUM_FUNC_DEFAULTS["dtype"] = None
181178
CUM_FUNC_DEFAULTS["out"] = None
182179
validate_cum_func = CompatValidator(
@@ -201,7 +198,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
201198
return skipna
202199

203200

204-
ALLANY_DEFAULTS: Dict[str, Optional[bool]] = {}
201+
ALLANY_DEFAULTS: dict[str, bool | None] = {}
205202
ALLANY_DEFAULTS["dtype"] = None
206203
ALLANY_DEFAULTS["out"] = None
207204
ALLANY_DEFAULTS["keepdims"] = False
@@ -224,28 +221,28 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
224221
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
225222
)
226223

227-
RESHAPE_DEFAULTS: Dict[str, str] = {"order": "C"}
224+
RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"}
228225
validate_reshape = CompatValidator(
229226
RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1
230227
)
231228

232-
REPEAT_DEFAULTS: Dict[str, Any] = {"axis": None}
229+
REPEAT_DEFAULTS: dict[str, Any] = {"axis": None}
233230
validate_repeat = CompatValidator(
234231
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
235232
)
236233

237-
ROUND_DEFAULTS: Dict[str, Any] = {"out": None}
234+
ROUND_DEFAULTS: dict[str, Any] = {"out": None}
238235
validate_round = CompatValidator(
239236
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
240237
)
241238

242-
SORT_DEFAULTS: Dict[str, Optional[Union[int, str]]] = {}
239+
SORT_DEFAULTS: dict[str, int | str | None] = {}
243240
SORT_DEFAULTS["axis"] = -1
244241
SORT_DEFAULTS["kind"] = "quicksort"
245242
SORT_DEFAULTS["order"] = None
246243
validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs")
247244

248-
STAT_FUNC_DEFAULTS: Dict[str, Optional[Any]] = {}
245+
STAT_FUNC_DEFAULTS: dict[str, Any | None] = {}
249246
STAT_FUNC_DEFAULTS["dtype"] = None
250247
STAT_FUNC_DEFAULTS["out"] = None
251248

@@ -279,13 +276,13 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
279276
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
280277
)
281278

282-
STAT_DDOF_FUNC_DEFAULTS: Dict[str, Optional[bool]] = {}
279+
STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {}
283280
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
284281
STAT_DDOF_FUNC_DEFAULTS["out"] = None
285282
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
286283
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
287284

288-
TAKE_DEFAULTS: Dict[str, Optional[str]] = {}
285+
TAKE_DEFAULTS: dict[str, str | None] = {}
289286
TAKE_DEFAULTS["out"] = None
290287
TAKE_DEFAULTS["mode"] = "raise"
291288
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
@@ -392,7 +389,7 @@ def validate_resampler_func(method: str, args, kwargs) -> None:
392389
raise TypeError("too many arguments passed in")
393390

394391

395-
def validate_minmax_axis(axis: Optional[int], ndim: int = 1) -> None:
392+
def validate_minmax_axis(axis: int | None, ndim: int = 1) -> None:
396393
"""
397394
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
398395
or None, as otherwise it will be incorrectly ignored.

0 commit comments

Comments
 (0)