Skip to content

Commit 9e55818

Browse files
committed
Merge remote-tracking branch 'upstream/master' into issue-19860
2 parents bef65fc + 2be2ba5 commit 9e55818

28 files changed

+299
-385
lines changed

ci/doctests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then
2121

2222
# DataFrame / Series docstrings
2323
pytest --doctest-modules -v pandas/core/frame.py \
24-
-k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_records -to_stata -transform"
24+
-k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata -transform"
2525

2626
if [ $? -ne "0" ]; then
2727
RET=1

ci/lint.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ if [ "$LINT" ]; then
2020

2121
# pandas/_libs/src is C code, so no need to search there.
2222
echo "Linting *.py"
23-
flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,C410,E402,E731,E741,W503
23+
flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=C406,C408,C409,E402,E731,E741,W503
2424
if [ $? -ne "0" ]; then
2525
RET=1
2626
fi
2727
echo "Linting *.py DONE"
2828

2929
echo "Linting setup.py"
30-
flake8 setup.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
30+
flake8 setup.py --ignore=E402,E731,E741,W503
3131
if [ $? -ne "0" ]; then
3232
RET=1
3333
fi
@@ -41,21 +41,21 @@ if [ "$LINT" ]; then
4141
echo "Linting asv_bench/benchmarks/*.py DONE"
4242

4343
echo "Linting scripts/*.py"
44-
flake8 scripts --filename=*.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
44+
flake8 scripts --filename=*.py --ignore=C408,E402,E731,E741,W503
4545
if [ $? -ne "0" ]; then
4646
RET=1
4747
fi
4848
echo "Linting scripts/*.py DONE"
4949

5050
echo "Linting doc scripts"
51-
flake8 doc/make.py doc/source/conf.py --ignore=C406,C408,C409,C410,E402,E731,E741,W503
51+
flake8 doc/make.py doc/source/conf.py --ignore=E402,E731,E741,W503
5252
if [ $? -ne "0" ]; then
5353
RET=1
5454
fi
5555
echo "Linting doc scripts DONE"
5656

5757
echo "Linting *.pyx"
58-
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C407,C411
58+
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
5959
if [ $? -ne "0" ]; then
6060
RET=1
6161
fi

doc/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2352,6 +2352,7 @@ Computations / Descriptive Stats
23522352
Resampler.std
23532353
Resampler.sum
23542354
Resampler.var
2355+
Resampler.quantile
23552356

23562357
Style
23572358
-----

doc/source/groupby.rst

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,8 @@ consider the following ``DataFrame``:
106106
.. versionadded:: 0.20
107107

108108
A string passed to ``groupby`` may refer to either a column or an index level.
109-
If a string matches both a column name and an index level name then a warning is
110-
issued and the column takes precedence. This will result in an ambiguity error
111-
in a future version.
109+
If a string matches both a column name and an index level name, a
110+
``ValueError`` will be raised.
112111

113112
.. ipython:: python
114113
@@ -389,7 +388,7 @@ This is mainly syntactic sugar for the alternative and much more verbose:
389388
Additionally this method avoids recomputing the internal grouping information
390389
derived from the passed key.
391390

392-
.. _groupby.iterating:
391+
.. _groupby.iterating-label:
393392

394393
Iterating through groups
395394
------------------------
@@ -415,8 +414,7 @@ In the case of grouping by multiple keys, the group name will be a tuple:
415414
...: print(group)
416415
...:
417416

418-
It's standard Python-fu but remember you can unpack the tuple in the for loop
419-
statement if you wish: ``for (k1, k2), group in grouped:``.
417+
See :ref:`timeseries.iterating-label`.
420418

421419
Selecting a group
422420
-----------------

doc/source/timeseries.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,24 @@ regularity will result in a ``DatetimeIndex``, although frequency is lost:
703703
704704
ts2[[0, 2, 6]].index
705705
706+
.. _timeseries.iterating-label:
707+
708+
Iterating through groups
709+
------------------------
710+
711+
With the :ref:`Resampler` object in hand, iterating through the grouped data is very
712+
natural and functions similarly to :py:func:`itertools.groupby`:
713+
714+
.. ipython:: python
715+
716+
resampled = df.resample('H')
717+
718+
for name, group in resampled:
719+
print(name)
720+
print(group)
721+
722+
See :ref:`groupby.iterating-label`.
723+
706724
.. _timeseries.components:
707725

708726
Time/Date Components

doc/source/whatsnew/v0.24.0.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Pandas has gained the ability to hold integer dtypes with missing values. This l
4242
Here is an example of the usage.
4343

4444
We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value
45-
marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`)
45+
marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`)
4646

4747
.. ipython:: python
4848

@@ -182,6 +182,8 @@ Other Enhancements
182182
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
183183
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
184184
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
185+
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
186+
- :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
185187

186188
.. _whatsnew_0240.api_breaking:
187189

@@ -521,6 +523,7 @@ Removal of prior version deprecations/changes
521523
- :meth:`Series.repeat` has renamed the ``reps`` argument to ``repeats`` (:issue:`14645`)
522524
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
523525
- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
526+
- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
524527
-
525528

526529
.. _whatsnew_0240.performance:
@@ -656,7 +659,11 @@ Indexing
656659
- Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`)
657660
- Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`)
658661
- Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`)
662+
<<<<<<< HEAD
659663
- Bug where mixed indexes wouldn't allow integers for ``.at`` (:issue:`19860`)
664+
=======
665+
- ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`)
666+
>>>>>>> upstream/master
660667

661668
Missing
662669
^^^^^^^
@@ -677,6 +684,7 @@ I/O
677684

678685
- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
679686
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
687+
- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
680688
-
681689

682690
Plotting
@@ -694,7 +702,7 @@ Groupby/Resample/Rolling
694702
``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`).
695703
- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
696704
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
697-
-
705+
- Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`).
698706

699707
Sparse
700708
^^^^^^

pandas/_libs/index.pyx

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,7 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None):
5050

5151

5252
cpdef object get_value_box(ndarray arr, object loc):
53-
cdef:
54-
Py_ssize_t i, sz
55-
56-
if util.is_float_object(loc):
57-
casted = int(loc)
58-
if casted == loc:
59-
loc = casted
60-
i = <Py_ssize_t> loc
61-
sz = cnp.PyArray_SIZE(arr)
62-
63-
if i < 0 and sz > 0:
64-
i += sz
65-
66-
if i >= sz or sz == 0 or i < 0:
67-
raise IndexError('index out of bounds')
68-
69-
return get_value_at(arr, i, tz=None)
53+
return get_value_at(arr, loc, tz=None)
7054

7155

7256
# Don't populate hash tables in monotonic indexes larger than this

pandas/_libs/util.pxd

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -72,23 +72,50 @@ cdef inline void set_array_not_contiguous(ndarray ao) nogil:
7272
(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS))
7373

7474

75-
cdef inline object get_value_at(ndarray arr, object loc):
75+
cdef inline Py_ssize_t validate_indexer(ndarray arr, object loc) except -1:
76+
"""
77+
Cast the given indexer `loc` to an integer. If it is negative, i.e. a
78+
python-style indexing-from-the-end indexer, translate it to a
79+
from-the-front indexer. Raise if this is not possible.
80+
81+
Parameters
82+
----------
83+
arr : ndarray
84+
loc : object
85+
86+
Returns
87+
-------
88+
idx : Py_ssize_t
89+
90+
Raises
91+
------
92+
IndexError
93+
"""
7694
cdef:
77-
Py_ssize_t i, sz
95+
Py_ssize_t idx, size
7896
int casted
7997

8098
if is_float_object(loc):
8199
casted = int(loc)
82100
if casted == loc:
83101
loc = casted
84-
i = <Py_ssize_t> loc
85-
sz = cnp.PyArray_SIZE(arr)
86102

87-
if i < 0 and sz > 0:
88-
i += sz
89-
elif i >= sz or sz == 0:
103+
idx = <Py_ssize_t>loc
104+
size = cnp.PyArray_SIZE(arr)
105+
106+
if idx < 0 and size > 0:
107+
idx += size
108+
if idx >= size or size == 0 or idx < 0:
90109
raise IndexError('index out of bounds')
91110

111+
return idx
112+
113+
114+
cdef inline object get_value_at(ndarray arr, object loc):
115+
cdef:
116+
Py_ssize_t i
117+
118+
i = validate_indexer(arr, loc)
92119
return get_value_1d(arr, i)
93120

94121

@@ -99,19 +126,9 @@ cdef inline set_value_at_unsafe(ndarray arr, object loc, object value):
99126
flag above the loop and then eschew the check on each iteration.
100127
"""
101128
cdef:
102-
Py_ssize_t i, sz
103-
if is_float_object(loc):
104-
casted = int(loc)
105-
if casted == loc:
106-
loc = casted
107-
i = <Py_ssize_t> loc
108-
sz = cnp.PyArray_SIZE(arr)
109-
110-
if i < 0:
111-
i += sz
112-
elif i >= sz:
113-
raise IndexError('index out of bounds')
129+
Py_ssize_t i
114130

131+
i = validate_indexer(arr, loc)
115132
assign_value_1d(arr, i, value)
116133

117134

pandas/core/arrays/integer.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,8 +409,7 @@ def astype(self, dtype, copy=True):
409409

410410
# if we are astyping to an existing IntegerDtype we can fastpath
411411
if isinstance(dtype, _IntegerDtype):
412-
result = self._data.astype(dtype.numpy_dtype,
413-
casting='same_kind', copy=False)
412+
result = self._data.astype(dtype.numpy_dtype, copy=False)
414413
return type(self)(result, mask=self._mask, copy=False)
415414

416415
# coerce

0 commit comments

Comments
 (0)