Skip to content

Commit a7fdb62

Browse files
Merge remote-tracking branch 'upstream/master' into remove_tests_multiple
2 parents f93999a + b104954 commit a7fdb62

File tree

19 files changed

+686
-652
lines changed

19 files changed

+686
-652
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ Other
307307
- Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`)
308308
- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`)
309309
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
310+
- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
310311
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
311312

312313
.. _whatsnew_1000.contributors:

pandas/core/algorithms.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,6 +1910,7 @@ def diff(arr, n: int, axis: int = 0):
19101910
dtype = arr.dtype
19111911

19121912
is_timedelta = False
1913+
is_bool = False
19131914
if needs_i8_conversion(arr):
19141915
dtype = np.float64
19151916
arr = arr.view("i8")
@@ -1918,6 +1919,7 @@ def diff(arr, n: int, axis: int = 0):
19181919

19191920
elif is_bool_dtype(dtype):
19201921
dtype = np.object_
1922+
is_bool = True
19211923

19221924
elif is_integer_dtype(dtype):
19231925
dtype = np.float64
@@ -1959,6 +1961,8 @@ def diff(arr, n: int, axis: int = 0):
19591961
result = res - lag
19601962
result[mask] = na
19611963
out_arr[res_indexer] = result
1964+
elif is_bool:
1965+
out_arr[res_indexer] = arr[res_indexer] ^ arr[lag_indexer]
19621966
else:
19631967
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
19641968

pandas/core/arrays/interval.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def _from_factorized(cls, values, original):
259259
closed : {'left', 'right', 'both', 'neither'}, default 'right'
260260
Whether the intervals are closed on the left-side, right-side, both
261261
or neither.
262-
copy : boolean, default False
262+
copy : bool, default False
263263
copy the data
264264
dtype : dtype or None, default None
265265
If None, dtype will be inferred
@@ -315,7 +315,7 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
315315
closed : {'left', 'right', 'both', 'neither'}, default 'right'
316316
Whether the intervals are closed on the left-side, right-side, both
317317
or neither.
318-
copy : boolean, default False
318+
copy : bool, default False
319319
Copy the data.
320320
dtype : dtype, optional
321321
If None, dtype will be inferred.
@@ -387,7 +387,7 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
387387
closed : {'left', 'right', 'both', 'neither'}, default 'right'
388388
Whether the intervals are closed on the left-side, right-side, both
389389
or neither.
390-
copy : boolean, default False
390+
copy : bool, default False
391391
by-default copy the data, this is compat only and ignored
392392
dtype : dtype or None, default None
393393
If None, dtype will be inferred
@@ -811,7 +811,7 @@ def value_counts(self, dropna=True):
811811
812812
Parameters
813813
----------
814-
dropna : boolean, default True
814+
dropna : bool, default True
815815
Don't include counts of NaN.
816816
817817
Returns

pandas/core/dtypes/cast.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def maybe_promote(dtype, fill_value=np.nan):
339339
# if we passed an array here, determine the fill value by dtype
340340
if isinstance(fill_value, np.ndarray):
341341
if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)):
342-
fill_value = iNaT
342+
fill_value = fill_value.dtype.type("NaT", "ns")
343343
else:
344344

345345
# we need to change to object type as our
@@ -350,9 +350,14 @@ def maybe_promote(dtype, fill_value=np.nan):
350350

351351
# returns tuple of (dtype, fill_value)
352352
if issubclass(dtype.type, np.datetime64):
353-
fill_value = tslibs.Timestamp(fill_value).value
353+
fill_value = tslibs.Timestamp(fill_value).to_datetime64()
354354
elif issubclass(dtype.type, np.timedelta64):
355-
fill_value = tslibs.Timedelta(fill_value).value
355+
fv = tslibs.Timedelta(fill_value)
356+
if fv is NaT:
357+
# NaT has no `to_timedelta6` method
358+
fill_value = np.timedelta64("NaT", "ns")
359+
else:
360+
fill_value = fv.to_timedelta64()
356361
elif is_datetime64tz_dtype(dtype):
357362
if isna(fill_value):
358363
fill_value = NaT
@@ -393,7 +398,7 @@ def maybe_promote(dtype, fill_value=np.nan):
393398
dtype = np.float64
394399
fill_value = np.nan
395400
elif is_datetime_or_timedelta_dtype(dtype):
396-
fill_value = iNaT
401+
fill_value = dtype.type("NaT", "ns")
397402
else:
398403
dtype = np.object_
399404
fill_value = np.nan

pandas/core/frame.py

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ class DataFrame(NDFrame):
304304
Parameters
305305
----------
306306
data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
307-
Dict can contain Series, arrays, constants, or list-like objects
307+
Dict can contain Series, arrays, constants, or list-like objects.
308308
309309
.. versionchanged:: 0.23.0
310310
If data is a dict, column order follows insertion-order for
@@ -316,14 +316,14 @@ class DataFrame(NDFrame):
316316
317317
index : Index or array-like
318318
Index to use for resulting frame. Will default to RangeIndex if
319-
no indexing information part of input data and no index provided
319+
no indexing information part of input data and no index provided.
320320
columns : Index or array-like
321321
Column labels to use for resulting frame. Will default to
322-
RangeIndex (0, 1, 2, ..., n) if no column labels are provided
322+
RangeIndex (0, 1, 2, ..., n) if no column labels are provided.
323323
dtype : dtype, default None
324-
Data type to force. Only a single dtype is allowed. If None, infer
324+
Data type to force. Only a single dtype is allowed. If None, infer.
325325
copy : bool, default False
326-
Copy data from inputs. Only affects DataFrame / 2d ndarray input
326+
Copy data from inputs. Only affects DataFrame / 2d ndarray input.
327327
328328
See Also
329329
--------
@@ -1544,20 +1544,20 @@ def from_records(
15441544
data : ndarray (structured dtype), list of tuples, dict, or DataFrame
15451545
index : str, list of fields, array-like
15461546
Field of array to use as the index, alternately a specific set of
1547-
input labels to use
1547+
input labels to use.
15481548
exclude : sequence, default None
1549-
Columns or fields to exclude
1549+
Columns or fields to exclude.
15501550
columns : sequence, default None
15511551
Column names to use. If the passed data do not have names
15521552
associated with them, this argument provides names for the
15531553
columns. Otherwise this argument indicates the order of the columns
15541554
in the result (any names not found in the data will become all-NA
1555-
columns)
1555+
columns).
15561556
coerce_float : bool, default False
15571557
Attempt to convert values of non-string, non-numeric objects (like
1558-
decimal.Decimal) to floating point, useful for SQL result sets
1558+
decimal.Decimal) to floating point, useful for SQL result sets.
15591559
nrows : int, default None
1560-
Number of rows to read if data is an iterator
1560+
Number of rows to read if data is an iterator.
15611561
15621562
Returns
15631563
-------
@@ -2118,8 +2118,8 @@ def to_parquet(
21182118
.. versionadded:: 0.24.0
21192119
21202120
partition_cols : list, optional, default None
2121-
Column names by which to partition the dataset
2122-
Columns are partitioned in the order they are given
2121+
Column names by which to partition the dataset.
2122+
Columns are partitioned in the order they are given.
21232123
21242124
.. versionadded:: 0.24.0
21252125
@@ -3460,9 +3460,9 @@ def insert(self, loc, column, value, allow_duplicates=False):
34603460
Parameters
34613461
----------
34623462
loc : int
3463-
Insertion index. Must verify 0 <= loc <= len(columns)
3463+
Insertion index. Must verify 0 <= loc <= len(columns).
34643464
column : str, number, or hashable object
3465-
label of the inserted column
3465+
Label of the inserted column.
34663466
value : int, Series, or array-like
34673467
allow_duplicates : bool, optional
34683468
"""
@@ -3681,9 +3681,9 @@ def lookup(self, row_labels, col_labels):
36813681
Parameters
36823682
----------
36833683
row_labels : sequence
3684-
The row labels to use for lookup
3684+
The row labels to use for lookup.
36853685
col_labels : sequence
3686-
The column labels to use for lookup
3686+
The column labels to use for lookup.
36873687
36883688
Returns
36893689
-------
@@ -4770,14 +4770,14 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False):
47704770
----------
47714771
subset : column label or sequence of labels, optional
47724772
Only consider certain columns for identifying duplicates, by
4773-
default use all of the columns
4773+
default use all of the columns.
47744774
keep : {'first', 'last', False}, default 'first'
47754775
Determines which duplicates (if any) to keep.
47764776
- ``first`` : Drop duplicates except for the first occurrence.
47774777
- ``last`` : Drop duplicates except for the last occurrence.
47784778
- False : Drop all duplicates.
47794779
inplace : bool, default False
4780-
Whether to drop duplicates in place or to return a copy
4780+
Whether to drop duplicates in place or to return a copy.
47814781
47824782
Returns
47834783
-------
@@ -4805,7 +4805,7 @@ def duplicated(self, subset=None, keep="first"):
48054805
----------
48064806
subset : column label or sequence of labels, optional
48074807
Only consider certain columns for identifying duplicates, by
4808-
default use all of the columns
4808+
default use all of the columns.
48094809
keep : {'first', 'last', False}, default 'first'
48104810
Determines which duplicates (if any) to mark.
48114811
@@ -6233,9 +6233,9 @@ def unstack(self, level=-1, fill_value=None):
62336233
Parameters
62346234
----------
62356235
level : int, str, or list of these, default -1 (last level)
6236-
Level(s) of index to unstack, can pass level name
6236+
Level(s) of index to unstack, can pass level name.
62376237
fill_value : int, string or dict
6238-
Replace NaN with this value if the unstack produces missing values
6238+
Replace NaN with this value if the unstack produces missing values.
62396239
62406240
Returns
62416241
-------
@@ -7368,7 +7368,8 @@ def corr(self, method="pearson", min_periods=1):
73687368
* callable: callable with input two 1d ndarrays
73697369
and returning a float. Note that the returned matrix from corr
73707370
will have 1 along the diagonals and will be symmetric
7371-
regardless of the callable's behavior
7371+
regardless of the callable's behavior.
7372+
73727373
.. versionadded:: 0.24.0
73737374
73747375
min_periods : int, optional
@@ -7572,7 +7573,7 @@ def corrwith(self, other, axis=0, drop=False, method="pearson"):
75727573
* kendall : Kendall Tau correlation coefficient
75737574
* spearman : Spearman rank correlation
75747575
* callable: callable with input two 1d ndarrays
7575-
and returning a float
7576+
and returning a float.
75767577
75777578
.. versionadded:: 0.24.0
75787579
@@ -7948,7 +7949,7 @@ def idxmin(self, axis=0, skipna=True):
79487949
----------
79497950
axis : {0 or 'index', 1 or 'columns'}, default 0
79507951
The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise
7951-
skipna : boolean, default True
7952+
skipna : bool, default True
79527953
Exclude NA/null values. If an entire row/column is NA, the result
79537954
will be NA.
79547955
@@ -7985,7 +7986,7 @@ def idxmax(self, axis=0, skipna=True):
79857986
----------
79867987
axis : {0 or 'index', 1 or 'columns'}, default 0
79877988
The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise
7988-
skipna : boolean, default True
7989+
skipna : bool, default True
79897990
Exclude NA/null values. If an entire row/column is NA, the result
79907991
will be NA.
79917992
@@ -8037,7 +8038,8 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
80378038
The axis to iterate over while searching for the mode:
80388039
80398040
* 0 or 'index' : get mode of each column
8040-
* 1 or 'columns' : get mode of each row
8041+
* 1 or 'columns' : get mode of each row.
8042+
80418043
numeric_only : bool, default False
80428044
If True, only apply to numeric columns.
80438045
dropna : bool, default True

pandas/core/groupby/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
709709
f : function
710710
Function to apply to each subframe. Should return True or False.
711711
dropna : Drop groups that do not pass the filter. True by default;
712-
if False, groups that evaluate False are filled with NaNs.
712+
If False, groups that evaluate False are filled with NaNs.
713713
714714
Returns
715715
-------

0 commit comments

Comments
 (0)