Skip to content

Commit b52b0e2

Browse files
committed
Merge remote-tracking branch 'upstream/main' into finalize
2 parents e35b5fe + 5de2448 commit b52b0e2

File tree

94 files changed

+1030
-398
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+1030
-398
lines changed

.github/workflows/docbuild-and-upload.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ jobs:
8181
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
8282

8383
- name: Upload prod docs
84-
run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/version/${GITHUB_REF_NAME}
84+
run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/version/${GITHUB_REF_NAME:1}
8585
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
8686

8787
- name: Move docs into site directory

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,10 @@ repos:
258258
|/_testing/
259259
- id: autotyping
260260
name: autotyping
261-
entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics
261+
entry: python -m libcst.tool codemod autotyping.AutotypeCommand --none-return --scalar-return --annotate-magics --annotate-imprecise-magics --bool-param
262262
types_or: [python, pyi]
263263
files: ^pandas
264-
exclude: ^(pandas/tests|pandas/io/clipboard)
264+
exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard)
265265
language: python
266266
additional_dependencies:
267267
- autotyping==22.9.0

asv_bench/benchmarks/index_object.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,38 @@
1919
class SetOperations:
2020

2121
params = (
22-
["datetime", "date_string", "int", "strings"],
22+
["monotonic", "non_monotonic"],
23+
["datetime", "date_string", "int", "strings", "ea_int"],
2324
["intersection", "union", "symmetric_difference"],
2425
)
25-
param_names = ["dtype", "method"]
26+
param_names = ["index_structure", "dtype", "method"]
2627

27-
def setup(self, dtype, method):
28+
def setup(self, index_structure, dtype, method):
2829
N = 10**5
2930
dates_left = date_range("1/1/2000", periods=N, freq="T")
3031
fmt = "%Y-%m-%d %H:%M:%S"
3132
date_str_left = Index(dates_left.strftime(fmt))
3233
int_left = Index(np.arange(N))
34+
ea_int_left = Index(np.arange(N), dtype="Int64")
3335
str_left = tm.makeStringIndex(N)
36+
3437
data = {
35-
"datetime": {"left": dates_left, "right": dates_left[:-1]},
36-
"date_string": {"left": date_str_left, "right": date_str_left[:-1]},
37-
"int": {"left": int_left, "right": int_left[:-1]},
38-
"strings": {"left": str_left, "right": str_left[:-1]},
38+
"datetime": dates_left,
39+
"date_string": date_str_left,
40+
"int": int_left,
41+
"strings": str_left,
42+
"ea_int": ea_int_left,
3943
}
44+
45+
if index_structure == "non_monotonic":
46+
data = {k: mi[::-1] for k, mi in data.items()}
47+
48+
data = {k: {"left": idx, "right": idx[:-1]} for k, idx in data.items()}
49+
4050
self.left = data[dtype]["left"]
4151
self.right = data[dtype]["right"]
4252

43-
def time_operation(self, dtype, method):
53+
def time_operation(self, index_structure, dtype, method):
4454
getattr(self.left, method)(self.right)
4555

4656

asv_bench/benchmarks/join_merge.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
DataFrame,
77
MultiIndex,
88
Series,
9+
array,
910
concat,
1011
date_range,
1112
merge,
@@ -411,6 +412,42 @@ def time_multiby(self, direction, tolerance):
411412
)
412413

413414

415+
class MergeMultiIndex:
416+
params = [
417+
[
418+
("int64", "int64"),
419+
("datetime64[ns]", "int64"),
420+
("Int64", "Int64"),
421+
],
422+
["left", "right", "inner", "outer"],
423+
]
424+
param_names = ["dtypes", "how"]
425+
426+
def setup(self, dtypes, how):
427+
n = 100_000
428+
offset = 50_000
429+
mi1 = MultiIndex.from_arrays(
430+
[
431+
array(np.arange(n), dtype=dtypes[0]),
432+
array(np.arange(n), dtype=dtypes[1]),
433+
]
434+
)
435+
mi2 = MultiIndex.from_arrays(
436+
[
437+
array(np.arange(offset, n + offset), dtype=dtypes[0]),
438+
array(np.arange(offset, n + offset), dtype=dtypes[1]),
439+
]
440+
)
441+
self.df1 = DataFrame({"col1": 1}, index=mi1)
442+
self.df2 = DataFrame({"col2": 2}, index=mi2)
443+
444+
def time_merge_sorted_multiindex(self, dtypes, how):
445+
# copy to avoid MultiIndex._values caching
446+
df1 = self.df1.copy()
447+
df2 = self.df2.copy()
448+
merge(df1, df2, how=how, left_index=True, right_index=True)
449+
450+
414451
class Align:
415452
def setup(self):
416453
size = 5 * 10**5

asv_bench/benchmarks/multiindex_object.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class SetOperations:
237237

238238
params = [
239239
("monotonic", "non_monotonic"),
240-
("datetime", "int", "string"),
240+
("datetime", "int", "string", "ea_int"),
241241
("intersection", "union", "symmetric_difference"),
242242
]
243243
param_names = ["index_structure", "dtype", "method"]
@@ -255,10 +255,14 @@ def setup(self, index_structure, dtype, method):
255255
level2 = tm.makeStringIndex(N // 1000).values
256256
str_left = MultiIndex.from_product([level1, level2])
257257

258+
level2 = range(N // 1000)
259+
ea_int_left = MultiIndex.from_product([level1, Series(level2, dtype="Int64")])
260+
258261
data = {
259262
"datetime": dates_left,
260263
"int": int_left,
261264
"string": str_left,
265+
"ea_int": ea_int_left,
262266
}
263267

264268
if index_structure == "non_monotonic":
@@ -299,4 +303,36 @@ def time_unique_dups(self, dtype_val):
299303
self.midx_dups.unique()
300304

301305

306+
class Isin:
307+
params = [
308+
("string", "int", "datetime"),
309+
]
310+
param_names = ["dtype"]
311+
312+
def setup(self, dtype):
313+
N = 10**5
314+
level1 = range(1000)
315+
316+
level2 = date_range(start="1/1/2000", periods=N // 1000)
317+
dates_midx = MultiIndex.from_product([level1, level2])
318+
319+
level2 = range(N // 1000)
320+
int_midx = MultiIndex.from_product([level1, level2])
321+
322+
level2 = tm.makeStringIndex(N // 1000).values
323+
str_midx = MultiIndex.from_product([level1, level2])
324+
325+
data = {
326+
"datetime": dates_midx,
327+
"int": int_midx,
328+
"string": str_midx,
329+
}
330+
331+
self.midx = data[dtype]
332+
self.values = self.midx[:100]
333+
334+
def time_isin(self, dtype):
335+
self.midx.isin(self.values)
336+
337+
302338
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/_templates/sidebar-nav-bs.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
2-
<div class="bd-toc-item active">
2+
<div class="bd-toc-item navbar-nav">
33
{% if pagename.startswith("reference") %}
44
{{ generate_toctree_html("sidebar", maxdepth=4, collapse=True, includehidden=True, titles_only=True) }}
55
{% else %}

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.5
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.5.1
2728
v1.5.0
2829

2930
Version 1.4

doc/source/whatsnew/v1.5.0.rst

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.. _whatsnew_150:
22

3-
What's new in 1.5.0 (??)
4-
------------------------
3+
What's new in 1.5.0 (September 19, 2022)
4+
----------------------------------------
55

66
These are the changes in pandas 1.5.0. See :ref:`release` for a full changelog
77
including other versions of pandas.
@@ -1214,12 +1214,10 @@ Sparse
12141214
^^^^^^
12151215
- Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``SparseDtype`` failing to retain the array's ``fill_value`` (:issue:`45691`)
12161216
- Bug in :meth:`SparseArray.unique` fails to keep original elements order (:issue:`47809`)
1217-
-
12181217

12191218
ExtensionArray
12201219
^^^^^^^^^^^^^^
12211220
- Bug in :meth:`IntegerArray.searchsorted` and :meth:`FloatingArray.searchsorted` returning inconsistent results when acting on ``np.nan`` (:issue:`45255`)
1222-
-
12231221

12241222
Styler
12251223
^^^^^^
@@ -1234,18 +1232,18 @@ Metadata
12341232
^^^^^^^^
12351233
- Fixed metadata propagation in :meth:`DataFrame.melt` (:issue:`28283`)
12361234
- Fixed metadata propagation in :meth:`DataFrame.explode` (:issue:`28283`)
1237-
-
12381235

12391236
Other
12401237
^^^^^
12411238

12421239
.. ***DO NOT USE THIS SECTION***
12431240
12441241
- Bug in :func:`.assert_index_equal` with ``names=True`` and ``check_order=False`` not checking names (:issue:`47328`)
1245-
-
12461242

12471243
.. ---------------------------------------------------------------------------
12481244
.. _whatsnew_150.contributors:
12491245

12501246
Contributors
12511247
~~~~~~~~~~~~
1248+
1249+
.. contributors:: v1.4.4..v1.5.0|HEAD

doc/source/whatsnew/v1.5.1.rst

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
.. _whatsnew_151:
2+
3+
What's new in 1.5.1 (October ??, 2022)
4+
--------------------------------------
5+
6+
These are the changes in pandas 1.5.1. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_151.regressions:
14+
15+
Fixed regressions
16+
~~~~~~~~~~~~~~~~~
17+
-
18+
-
19+
20+
.. ---------------------------------------------------------------------------
21+
22+
.. _whatsnew_151.bug_fixes:
23+
24+
Bug fixes
25+
~~~~~~~~~
26+
-
27+
-
28+
29+
.. ---------------------------------------------------------------------------
30+
31+
.. _whatsnew_151.other:
32+
33+
Other
34+
~~~~~
35+
-
36+
-
37+
38+
.. ---------------------------------------------------------------------------
39+
40+
.. _whatsnew_151.contributors:
41+
42+
Contributors
43+
~~~~~~~~~~~~

doc/source/whatsnew/v1.6.0.rst

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@ enhancement2
2828

2929
Other enhancements
3030
^^^^^^^^^^^^^^^^^^
31+
- :func:`read_sas` now supports using ``encoding='infer'`` to correctly read and use the encoding specified by the sas file. (:issue:`48048`)
3132
- :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`)
3233
- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`)
3334
- :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`)
3435
- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
36+
- Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
3537
-
3638

3739
.. ---------------------------------------------------------------------------
@@ -44,8 +46,33 @@ These are bug fixes that might have notable behavior changes.
4446

4547
.. _whatsnew_160.notable_bug_fixes.notable_bug_fix1:
4648

47-
notable_bug_fix1
48-
^^^^^^^^^^^^^^^^
49+
:meth:`.GroupBy.cumsum` and :meth:`.GroupBy.cumprod` overflow instead of lossy casting to float
50+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
51+
52+
In previous versions we cast to float when applying ``cumsum`` and ``cumprod`` which
53+
lead to incorrect results even if the result could be hold by ``int64`` dtype.
54+
Additionally, the aggregation overflows consistent with numpy and the regular
55+
:meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods when the limit of
56+
``int64`` is reached (:issue:`37493`).
57+
58+
*Old Behavior*
59+
60+
.. code-block:: ipython
61+
62+
In [1]: df = pd.DataFrame({"key": ["b"] * 7, "value": 625})
63+
In [2]: df.groupby("key")["value"].cumprod()[5]
64+
Out[2]: 5.960464477539062e+16
65+
66+
We return incorrect results with the 6th value.
67+
68+
*New Behavior*
69+
70+
.. ipython:: python
71+
72+
df = pd.DataFrame({"key": ["b"] * 7, "value": 625})
73+
df.groupby("key")["value"].cumprod()
74+
75+
We overflow with the 7th value, but the 6th value is still correct.
4976

5077
.. _whatsnew_160.notable_bug_fixes.notable_bug_fix2:
5178

@@ -103,14 +130,17 @@ Deprecations
103130

104131
Performance improvements
105132
~~~~~~~~~~~~~~~~~~~~~~~~
106-
- Performance improvement in :meth:`.DataFrameGroupBy.median` and :meth:`.SeriesGroupBy.median` for nullable dtypes (:issue:`37493`)
133+
- Performance improvement in :meth:`.DataFrameGroupBy.median` and :meth:`.SeriesGroupBy.median` and :meth:`.GroupBy.cumprod` for nullable dtypes (:issue:`37493`)
107134
- Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`)
108135
- Performance improvement in :meth:`MultiIndex.union` without missing values and without duplicates (:issue:`48505`)
109136
- Performance improvement in :meth:`.DataFrameGroupBy.mean`, :meth:`.SeriesGroupBy.mean`, :meth:`.DataFrameGroupBy.var`, and :meth:`.SeriesGroupBy.var` for extension array dtypes (:issue:`37493`)
137+
- Performance improvement in :meth:`MultiIndex.isin` when ``level=None`` (:issue:`48622`)
110138
- Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
111139
- Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`)
140+
- Performance improvement in :func:`merge` and :meth:`DataFrame.join` when joining on a sorted :class:`MultiIndex` (:issue:`48504`)
112141
- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`)
113142
- Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
143+
- Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
114144
- Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
115145
- Performance improvement to :func:`read_sas` with ``blank_missing=True`` (:issue:`48502`)
116146
-
@@ -150,6 +180,7 @@ Conversion
150180
^^^^^^^^^^
151181
- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`)
152182
- Bug in :meth:`DataFrame.eval` incorrectly raising an ``AttributeError`` when there are negative values in function call (:issue:`46471`)
183+
- Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`)
153184
-
154185

155186
Strings
@@ -175,9 +206,12 @@ Missing
175206

176207
MultiIndex
177208
^^^^^^^^^^
209+
- Bug in :class:`MultiIndex.set_levels` raising ``IndexError`` when setting empty level (:issue:`48636`)
178210
- Bug in :meth:`MultiIndex.unique` losing extension array dtype (:issue:`48335`)
211+
- Bug in :meth:`MultiIndex.intersection` losing extension array (:issue:`48604`)
179212
- Bug in :meth:`MultiIndex.union` losing extension array (:issue:`48498`, :issue:`48505`)
180213
- Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`)
214+
- Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`)
181215
-
182216

183217
I/O

0 commit comments

Comments
 (0)