Skip to content

Commit 986f97a

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas
2 parents 1cd53fa + 178acf6 commit 986f97a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+894
-696
lines changed

.pre-commit-config.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,28 @@ repos:
9999
language: pygrep
100100
entry: (\.\. code-block ::|\.\. ipython ::)
101101
files: \.(py|pyx|rst)$
102+
- id: unwanted-patterns-strings-to-concatenate
103+
name: Check for use of not concatenated strings
104+
language: python
105+
entry: ./scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
106+
files: \.(py|pyx|pxd|pxi)$
107+
- id: unwanted-patterns-strings-with-wrong-placed-whitespace
108+
name: Check for strings with wrong placed spaces
109+
language: python
110+
entry: ./scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
111+
files: \.(py|pyx|pxd|pxi)$
112+
- id: unwanted-patterns-private-import-across-module
113+
name: Check for import of private attributes across modules
114+
language: python
115+
entry: ./scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
116+
types: [python]
117+
exclude: ^(asv_bench|pandas/_vendored|pandas/tests|doc)/
118+
- id: unwanted-patterns-private-function-across-module
119+
name: Check for use of private functions across modules
120+
language: python
121+
entry: ./scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
122+
types: [python]
123+
exclude: ^(asv_bench|pandas/_vendored|pandas/tests|doc)/
102124
- repo: https://github.com/asottile/yesqa
103125
rev: v1.2.2
104126
hooks:

ci/code_checks.sh

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -73,38 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
7373
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
7474
RET=$(($RET + $?)) ; echo $MSG "DONE"
7575

76-
MSG='Check for use of not concatenated strings' ; echo $MSG
77-
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
78-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" .
79-
else
80-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" .
81-
fi
82-
RET=$(($RET + $?)) ; echo $MSG "DONE"
83-
84-
MSG='Check for strings with wrong placed spaces' ; echo $MSG
85-
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
86-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" .
87-
else
88-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" .
89-
fi
90-
RET=$(($RET + $?)) ; echo $MSG "DONE"
91-
92-
MSG='Check for import of private attributes across modules' ; echo $MSG
93-
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
94-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
95-
else
96-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
97-
fi
98-
RET=$(($RET + $?)) ; echo $MSG "DONE"
99-
100-
MSG='Check for use of private functions across modules' ; echo $MSG
101-
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
102-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
103-
else
104-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
105-
fi
106-
RET=$(($RET + $?)) ; echo $MSG "DONE"
107-
10876
fi
10977

11078
### PATTERNS ###

doc/source/user_guide/io.rst

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5686,7 +5686,7 @@ ignored.
56865686
dtypes: float64(1), int64(1)
56875687
memory usage: 15.3 MB
56885688
5689-
Given the next test set:
5689+
The following test functions will be used below to compare the performance of several IO methods:
56905690

56915691
.. code-block:: python
56925692
@@ -5791,7 +5791,7 @@ Given the next test set:
57915791
def test_parquet_read():
57925792
pd.read_parquet("test.parquet")
57935793
5794-
When writing, the top-three functions in terms of speed are ``test_feather_write``, ``test_hdf_fixed_write`` and ``test_hdf_fixed_write_compress``.
5794+
When writing, the top three functions in terms of speed are ``test_feather_write``, ``test_hdf_fixed_write`` and ``test_hdf_fixed_write_compress``.
57955795

57965796
.. code-block:: ipython
57975797
@@ -5825,7 +5825,7 @@ When writing, the top-three functions in terms of speed are ``test_feather_write
58255825
In [13]: %timeit test_parquet_write(df)
58265826
67.6 ms ± 706 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
58275827
5828-
When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and
5828+
When reading, the top three functions in terms of speed are ``test_feather_read``, ``test_pickle_read`` and
58295829
``test_hdf_fixed_read``.
58305830

58315831

@@ -5862,8 +5862,7 @@ When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and
58625862
24.4 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
58635863
58645864
5865-
For this test case ``test.pkl.compress``, ``test.parquet`` and ``test.feather`` took the least space on disk.
5866-
Space on disk (in bytes)
5865+
The files ``test.pkl.compress``, ``test.parquet`` and ``test.feather`` took the least space on disk (in bytes).
58675866

58685867
.. code-block:: none
58695868

doc/source/whatsnew/v1.2.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ Datetimelike
374374
- Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`)
375375
- :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`)
376376
- Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`)
377+
- Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`)
377378

378379
Timedelta
379380
^^^^^^^^^
@@ -402,6 +403,7 @@ Numeric
402403
- Bug in :class:`DataFrame` arithmetic ops incorrectly accepting keyword arguments (:issue:`36843`)
403404
- Bug in :class:`IntervalArray` comparisons with :class:`Series` not returning :class:`Series` (:issue:`36908`)
404405
- Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`)
406+
- Bug in :meth:`DataFrame.std`` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`)
405407

406408
Conversion
407409
^^^^^^^^^^

pandas/_libs/window/aggregations.pyx

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ cdef:
5858
cdef inline int int_max(int a, int b): return a if a >= b else b
5959
cdef inline int int_min(int a, int b): return a if a <= b else b
6060

61-
cdef bint is_monotonic_start_end_bounds(
61+
cdef bint is_monotonic_increasing_start_end_bounds(
6262
ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
6363
):
6464
return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]
@@ -143,9 +143,11 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,
143143
int64_t s, e
144144
int64_t nobs = 0, i, j, N = len(values)
145145
ndarray[float64_t] output
146-
bint is_monotonic_bounds
146+
bint is_monotonic_increasing_bounds
147147

148-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
148+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
149+
start, end
150+
)
149151
output = np.empty(N, dtype=float)
150152

151153
with nogil:
@@ -154,7 +156,7 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,
154156
s = start[i]
155157
e = end[i]
156158

157-
if i == 0 or not is_monotonic_bounds:
159+
if i == 0 or not is_monotonic_increasing_bounds:
158160

159161
# setup
160162

@@ -173,9 +175,10 @@ def roll_sum(ndarray[float64_t] values, ndarray[int64_t] start,
173175

174176
output[i] = calc_sum(minp, nobs, sum_x)
175177

176-
if not is_monotonic_bounds:
177-
for j in range(s, e):
178-
remove_sum(values[j], &nobs, &sum_x, &compensation_remove)
178+
if not is_monotonic_increasing_bounds:
179+
nobs = 0
180+
sum_x = 0.0
181+
compensation_remove = 0.0
179182

180183
return output
181184

@@ -244,9 +247,11 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,
244247
int64_t s, e
245248
Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values)
246249
ndarray[float64_t] output
247-
bint is_monotonic_bounds
250+
bint is_monotonic_increasing_bounds
248251

249-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
252+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
253+
start, end
254+
)
250255
output = np.empty(N, dtype=float)
251256

252257
with nogil:
@@ -255,7 +260,7 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,
255260
s = start[i]
256261
e = end[i]
257262

258-
if i == 0 or not is_monotonic_bounds:
263+
if i == 0 or not is_monotonic_increasing_bounds:
259264

260265
# setup
261266
for j in range(s, e):
@@ -276,10 +281,11 @@ def roll_mean(ndarray[float64_t] values, ndarray[int64_t] start,
276281

277282
output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
278283

279-
if not is_monotonic_bounds:
280-
for j in range(s, e):
281-
val = values[j]
282-
remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove)
284+
if not is_monotonic_increasing_bounds:
285+
nobs = 0
286+
neg_ct = 0
287+
sum_x = 0.0
288+
compensation_remove = 0.0
283289
return output
284290

285291
# ----------------------------------------------------------------------
@@ -367,10 +373,12 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,
367373
int64_t s, e
368374
Py_ssize_t i, j, N = len(values)
369375
ndarray[float64_t] output
370-
bint is_monotonic_bounds
376+
bint is_monotonic_increasing_bounds
371377

372378
minp = max(minp, 1)
373-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
379+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
380+
start, end
381+
)
374382
output = np.empty(N, dtype=float)
375383

376384
with nogil:
@@ -382,7 +390,7 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,
382390

383391
# Over the first window, observations can only be added
384392
# never removed
385-
if i == 0 or not is_monotonic_bounds:
393+
if i == 0 or not is_monotonic_increasing_bounds:
386394

387395
for j in range(s, e):
388396
add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add)
@@ -403,10 +411,11 @@ def roll_var(ndarray[float64_t] values, ndarray[int64_t] start,
403411

404412
output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
405413

406-
if not is_monotonic_bounds:
407-
for j in range(s, e):
408-
remove_var(values[j], &nobs, &mean_x, &ssqdm_x,
409-
&compensation_remove)
414+
if not is_monotonic_increasing_bounds:
415+
nobs = 0.0
416+
mean_x = 0.0
417+
ssqdm_x = 0.0
418+
compensation_remove = 0.0
410419

411420
return output
412421

@@ -486,10 +495,12 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
486495
int64_t nobs = 0, i, j, N = len(values)
487496
int64_t s, e
488497
ndarray[float64_t] output
489-
bint is_monotonic_bounds
498+
bint is_monotonic_increasing_bounds
490499

491500
minp = max(minp, 3)
492-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
501+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
502+
start, end
503+
)
493504
output = np.empty(N, dtype=float)
494505

495506
with nogil:
@@ -501,7 +512,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
501512

502513
# Over the first window, observations can only be added
503514
# never removed
504-
if i == 0 or not is_monotonic_bounds:
515+
if i == 0 or not is_monotonic_increasing_bounds:
505516

506517
for j in range(s, e):
507518
val = values[j]
@@ -524,10 +535,11 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
524535

525536
output[i] = calc_skew(minp, nobs, x, xx, xxx)
526537

527-
if not is_monotonic_bounds:
528-
for j in range(s, e):
529-
val = values[j]
530-
remove_skew(val, &nobs, &x, &xx, &xxx)
538+
if not is_monotonic_increasing_bounds:
539+
nobs = 0
540+
x = 0.0
541+
xx = 0.0
542+
xxx = 0.0
531543

532544
return output
533545

@@ -611,10 +623,12 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
611623
float64_t x = 0, xx = 0, xxx = 0, xxxx = 0
612624
int64_t nobs = 0, i, j, s, e, N = len(values)
613625
ndarray[float64_t] output
614-
bint is_monotonic_bounds
626+
bint is_monotonic_increasing_bounds
615627

616628
minp = max(minp, 4)
617-
is_monotonic_bounds = is_monotonic_start_end_bounds(start, end)
629+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
630+
start, end
631+
)
618632
output = np.empty(N, dtype=float)
619633

620634
with nogil:
@@ -626,7 +640,7 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
626640

627641
# Over the first window, observations can only be added
628642
# never removed
629-
if i == 0 or not is_monotonic_bounds:
643+
if i == 0 or not is_monotonic_increasing_bounds:
630644

631645
for j in range(s, e):
632646
add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx)
@@ -646,9 +660,12 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
646660

647661
output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
648662

649-
if not is_monotonic_bounds:
650-
for j in range(s, e):
651-
remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx)
663+
if not is_monotonic_increasing_bounds:
664+
nobs = 0
665+
x = 0.0
666+
xx = 0.0
667+
xxx = 0.0
668+
xxxx = 0.0
652669

653670
return output
654671

pandas/core/arrays/timedeltas.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -381,14 +381,12 @@ def sum(
381381
nv.validate_sum(
382382
(), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial)
383383
)
384-
if not self.size and (self.ndim == 1 or axis is None):
385-
return NaT
386384

387385
result = nanops.nansum(
388-
self._data, axis=axis, skipna=skipna, min_count=min_count
386+
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
389387
)
390-
if is_scalar(result):
391-
return Timedelta(result)
388+
if axis is None or self.ndim == 1:
389+
return self._box_func(result)
392390
return self._from_backing_data(result)
393391

394392
def std(
@@ -403,13 +401,11 @@ def std(
403401
nv.validate_stat_ddof_func(
404402
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std"
405403
)
406-
if not len(self):
407-
return NaT
408-
if not skipna and self._hasnans:
409-
return NaT
410404

411-
result = nanops.nanstd(self._data, axis=axis, skipna=skipna, ddof=ddof)
412-
return Timedelta(result)
405+
result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
406+
if axis is None or self.ndim == 1:
407+
return self._box_func(result)
408+
return self._from_backing_data(result)
413409

414410
# ----------------------------------------------------------------
415411
# Rendering Methods

pandas/core/base.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,6 +1201,16 @@ def factorize(self, sort: bool = False, na_sentinel: Optional[int] = -1):
12011201
>>> ser.searchsorted([1, 3], side='right')
12021202
array([1, 3])
12031203
1204+
>>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000']))
1205+
>>> ser
1206+
0 2000-03-11
1207+
1 2000-03-12
1208+
2 2000-03-13
1209+
dtype: datetime64[ns]
1210+
1211+
>>> ser.searchsorted('3/14/2000')
1212+
3
1213+
12041214
>>> ser = pd.Categorical(
12051215
... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
12061216
... )

0 commit comments

Comments
 (0)