Skip to content

Commit 17a0514

Browse files
committed
Merge remote-tracking branch 'upstream/master' into pd.Series.map_performance
2 parents e523480 + 0159cba commit 17a0514

File tree

152 files changed

+3021
-1757
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

152 files changed

+3021
-1757
lines changed

.devcontainer.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
"python.linting.pylintEnabled": false,
1818
"python.linting.mypyEnabled": true,
1919
"python.testing.pytestEnabled": true,
20-
"python.testing.cwd": "pandas/tests"
20+
"python.testing.pytestArgs": [
21+
"pandas"
22+
]
2123
},
2224

2325
// Add the IDs of extensions you want installed when the container is created in the array below.

asv_bench/benchmarks/categoricals.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def setup(self):
3434
self.values_all_int8 = np.ones(N, "int8")
3535
self.categorical = pd.Categorical(self.values, self.categories)
3636
self.series = pd.Series(self.categorical)
37+
self.intervals = pd.interval_range(0, 1, periods=N // 10)
3738

3839
def time_regular(self):
3940
pd.Categorical(self.values, self.categories)
@@ -44,6 +45,9 @@ def time_fastpath(self):
4445
def time_datetimes(self):
4546
pd.Categorical(self.datetimes)
4647

48+
def time_interval(self):
49+
pd.Categorical(self.datetimes, categories=self.datetimes)
50+
4751
def time_datetimes_with_nat(self):
4852
pd.Categorical(self.datetimes_with_nat)
4953

asv_bench/benchmarks/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from .pandas_vb_common import tm
1818

19-
method_blacklist = {
19+
method_blocklist = {
2020
"object": {
2121
"median",
2222
"prod",
@@ -403,7 +403,7 @@ class GroupByMethods:
403403
]
404404

405405
def setup(self, dtype, method, application):
406-
if method in method_blacklist.get(dtype, {}):
406+
if method in method_blocklist.get(dtype, {}):
407407
raise NotImplementedError # skip benchmark
408408
ngroups = 1000
409409
size = ngroups * 2

asv_bench/benchmarks/io/json.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
import numpy as np
24

35
from pandas import DataFrame, concat, date_range, read_json, timedelta_range
@@ -82,6 +84,7 @@ def setup(self, orient, frame):
8284
timedeltas = timedelta_range(start=1, periods=N, freq="s")
8385
datetimes = date_range(start=1, periods=N, freq="s")
8486
ints = np.random.randint(100000000, size=N)
87+
longints = sys.maxsize * np.random.randint(100000000, size=N)
8588
floats = np.random.randn(N)
8689
strings = tm.makeStringIndex(N)
8790
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
@@ -120,6 +123,18 @@ def setup(self, orient, frame):
120123
index=index,
121124
)
122125

126+
self.df_longint_float_str = DataFrame(
127+
{
128+
"longint_1": longints,
129+
"longint_2": longints,
130+
"float_1": floats,
131+
"float_2": floats,
132+
"str_1": strings,
133+
"str_2": strings,
134+
},
135+
index=index,
136+
)
137+
123138
def time_to_json(self, orient, frame):
124139
getattr(self, frame).to_json(self.fname, orient=orient)
125140

@@ -172,6 +187,7 @@ def setup(self):
172187
timedeltas = timedelta_range(start=1, periods=N, freq="s")
173188
datetimes = date_range(start=1, periods=N, freq="s")
174189
ints = np.random.randint(100000000, size=N)
190+
longints = sys.maxsize * np.random.randint(100000000, size=N)
175191
floats = np.random.randn(N)
176192
strings = tm.makeStringIndex(N)
177193
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
@@ -209,6 +225,17 @@ def setup(self):
209225
},
210226
index=index,
211227
)
228+
self.df_longint_float_str = DataFrame(
229+
{
230+
"longint_1": longints,
231+
"longint_2": longints,
232+
"float_1": floats,
233+
"float_2": floats,
234+
"str_1": strings,
235+
"str_2": strings,
236+
},
237+
index=index,
238+
)
212239

213240
def time_floats_with_int_idex_lines(self):
214241
self.df.to_json(self.fname, orient="records", lines=True)
@@ -225,6 +252,9 @@ def time_float_int_lines(self):
225252
def time_float_int_str_lines(self):
226253
self.df_int_float_str.to_json(self.fname, orient="records", lines=True)
227254

255+
def time_float_longint_str_lines(self):
256+
self.df_longint_float_str.to_json(self.fname, orient="records", lines=True)
257+
228258

229259
class ToJSONMem:
230260
def setup_cache(self):

ci/code_checks.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,19 +248,19 @@ fi
248248
### CODE ###
249249
if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
250250

251-
MSG='Check import. No warnings, and blacklist some optional dependencies' ; echo $MSG
251+
MSG='Check import. No warnings, and blocklist some optional dependencies' ; echo $MSG
252252
python -W error -c "
253253
import sys
254254
import pandas
255255
256-
blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
256+
blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
257257
'lxml', 'matplotlib', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
258258
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
259259
260260
# GH#28227 for some of these check for top-level modules, while others are
261261
# more specific (e.g. urllib.request)
262262
import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules)
263-
mods = blacklist & import_mods
263+
mods = blocklist & import_mods
264264
if mods:
265265
sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
266266
sys.exit(len(mods))

doc/source/user_guide/advanced.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,9 @@ You don't have to specify all levels of the ``MultiIndex`` by passing only the
260260
first elements of the tuple. For example, you can use "partial" indexing to
261261
get all elements with ``bar`` in the first level as follows:
262262

263-
df.loc['bar']
263+
.. ipython:: python
264+
265+
df.loc['bar']
264266
265267
This is a shortcut for the slightly more verbose notation ``df.loc[('bar',),]`` (equivalent
266268
to ``df.loc['bar',]`` in this example).

doc/source/user_guide/computation.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ For example, if we have the following ``DataFrame``:
561561
df
562562
563563
and we want to use an expanding window where ``use_expanding`` is ``True`` otherwise a window of size
564-
1, we can create the following ``BaseIndexer``:
564+
1, we can create the following ``BaseIndexer`` subclass:
565565

566566
.. code-block:: ipython
567567
@@ -593,6 +593,8 @@ and we want to use an expanding window where ``use_expanding`` is ``True`` other
593593
3 3.0
594594
4 10.0
595595
596+
You can view other examples of ``BaseIndexer`` subclasses `here <https://github.com/pandas-dev/pandas/blob/master/pandas/core/window/indexers.py>`__
597+
596598
.. versionadded:: 1.1
597599

598600
For some problems knowledge of the future is available for analysis. For example, this occurs when

doc/source/user_guide/visualization.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,34 @@ shown by default.
11081108
11091109
plt.close('all')
11101110
1111+
1112+
Controlling the labels
1113+
~~~~~~~~~~~~~~~~~~~~~~
1114+
1115+
.. versionadded:: 1.1.0
1116+
1117+
You may set the ``xlabel`` and ``ylabel`` arguments to give the plot custom labels
1118+
for x and y axis. By default, pandas will pick up index name as xlabel, while leaving
1119+
it empty for ylabel.
1120+
1121+
.. ipython:: python
1122+
:suppress:
1123+
1124+
plt.figure()
1125+
1126+
.. ipython:: python
1127+
1128+
df.plot()
1129+
1130+
@savefig plot_xlabel_ylabel.png
1131+
df.plot(xlabel="new x", ylabel="new y")
1132+
1133+
.. ipython:: python
1134+
:suppress:
1135+
1136+
plt.close('all')
1137+
1138+
11111139
Scales
11121140
~~~~~~
11131141

doc/source/whatsnew/v0.14.1.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ Enhancements
131131

132132
- Implemented ``sem`` (standard error of the mean) operation for ``Series``,
133133
``DataFrame``, ``Panel``, and ``Groupby`` (:issue:`6897`)
134-
- Add ``nlargest`` and ``nsmallest`` to the ``Series`` ``groupby`` whitelist,
134+
- Add ``nlargest`` and ``nsmallest`` to the ``Series`` ``groupby`` allowlist,
135135
which means you can now use these methods on a ``SeriesGroupBy`` object
136136
(:issue:`7053`).
137137
- All offsets ``apply``, ``rollforward`` and ``rollback`` can now handle ``np.datetime64``, previously results in ``ApplyTypeError`` (:issue:`7452`)

doc/source/whatsnew/v1.1.0.rst

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ including other versions of pandas.
1313
Enhancements
1414
~~~~~~~~~~~~
1515

16+
.. _whatsnew_110.specify_missing_labels:
17+
18+
KeyErrors raised by loc specify missing labels
19+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20+
Previously, if labels were missing for a loc call, a KeyError was raised stating that this was no longer supported.
21+
22+
Now the error message also includes a list of the missing labels (max 10 items, display width 80 characters). See :issue:`34272`.
23+
24+
1625
.. _whatsnew_110.astype_string:
1726

1827
All dtypes can now be converted to ``StringDtype``
@@ -303,6 +312,7 @@ Other enhancements
303312
:class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`,
304313
and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`).
305314
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
315+
- :meth:`Series.plot` and :meth:`DataFrame.plot` now accepts `xlabel` and `ylabel` parameters to present labels on x and y axis (:issue:`9093`).
306316
- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`)
307317
- Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
308318
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`)
@@ -314,7 +324,10 @@ Other enhancements
314324
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).
315325
- :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
316326
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
317-
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`).
327+
- :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
328+
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`).
329+
- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
330+
- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
318331

319332
.. ---------------------------------------------------------------------------
320333
@@ -800,6 +813,9 @@ Deprecations
800813
- The ``squeeze`` keyword in the ``groupby`` function is deprecated and will be removed in a future version (:issue:`32380`)
801814
- The ``tz`` keyword in :meth:`Period.to_timestamp` is deprecated and will be removed in a future version; use `per.to_timestamp(...).tz_localize(tz)`` instead (:issue:`34522`)
802815
- :meth:`DatetimeIndex.to_perioddelta` is deprecated and will be removed in a future version. Use ``index - index.to_period(freq).to_timestamp()`` instead (:issue:`34853`)
816+
- :meth:`util.testing.assert_almost_equal` now accepts both relative and absolute
817+
precision through the ``rtol``, and ``atol`` parameters, thus deprecating the
818+
``check_less_precise`` parameter. (:issue:`13357`).
803819

804820
.. ---------------------------------------------------------------------------
805821
@@ -823,6 +839,8 @@ Performance improvements
823839
- Performance improvement for groupby methods :meth:`~pandas.core.groupby.groupby.Groupby.first`
824840
and :meth:`~pandas.core.groupby.groupby.Groupby.last` (:issue:`34178`)
825841
- Performance improvement in :func:`factorize` for nullable (integer and boolean) dtypes (:issue:`33064`).
842+
- Performance improvement when constructing :class:`Categorical` objects (:issue:`33921`)
843+
- Fixed performance regression in :func:`pandas.qcut` and :func:`pandas.cut` (:issue:`33921`)
826844
- Performance improvement in reductions (sum, prod, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`).
827845
- Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`)
828846
- Performance improvement in :class:`pandas.core.groupby.RollingGroupby` (:issue:`34052`)
@@ -844,6 +862,7 @@ Categorical
844862
- Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`)
845863
- Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`)
846864
- :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`)
865+
- Repr of :class:`Categorical` was not distinguishing between int and str (:issue:`33676`)
847866

848867
Datetimelike
849868
^^^^^^^^^^^^
@@ -952,6 +971,7 @@ Indexing
952971
- Bug in :meth:`Series.at` when used with a :class:`MultiIndex` would raise an exception on valid inputs (:issue:`26989`)
953972
- Bug in :meth:`DataFrame.loc` with dictionary of values changes columns with dtype of ``int`` to ``float`` (:issue:`34573`)
954973
- Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an IndexingError when accessing a None value (:issue:`34318`)
974+
- Bug in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` would not preserve data types on an empty :class:`DataFrame` or :class:`Series` with a :class:`MultiIndex` (:issue:`19602`)
955975

956976
Missing
957977
^^^^^^^
@@ -1016,6 +1036,10 @@ I/O
10161036
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
10171037
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
10181038
- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)
1039+
- `TypeError` exceptions raised by :meth:`read_csv` and :meth:`read_table` were showing as ``parser_f`` when an unexpected keyword argument was passed (:issue:`25648`)
1040+
- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
1041+
- Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`)
1042+
- Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`)
10191043

10201044
Plotting
10211045
^^^^^^^^
@@ -1026,6 +1050,7 @@ Plotting
10261050
- Bug in :meth:`DataFrame.hist` where the order of ``column`` argument was ignored (:issue:`29235`)
10271051
- Bug in :meth:`DataFrame.plot.scatter` that when adding multiple plots with different ``cmap``, colorbars alway use the first ``cmap`` (:issue:`33389`)
10281052
- Bug in :meth:`DataFrame.plot.scatter` was adding a colorbar to the plot even if the argument `c` was assigned to a column containing color names (:issue:`34316`)
1053+
- Bug in :meth:`pandas.plotting.bootstrap_plot` was causing cluttered axes and overlapping labels (:issue:`34905`)
10291054

10301055
Groupby/resample/rolling
10311056
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1047,6 +1072,7 @@ Groupby/resample/rolling
10471072
- Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of ``SeriesGroupBy`` previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`)
10481073
- Bug in :meth:`DataFrame.groupby` lost index, when one of the ``agg`` keys referenced an empty list (:issue:`32580`)
10491074
- Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`)
1075+
- Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`)
10501076

10511077
Reshaping
10521078
^^^^^^^^^
@@ -1087,6 +1113,7 @@ Sparse
10871113
- Bug in :meth:`Series.sum` with ``SparseArray`` raises ``TypeError`` (:issue:`25777`)
10881114
- Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`)
10891115
- The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s string representation (:issue:`34352`)
1116+
- Bug where empty :class:`DataFrame` could not be cast to :class:`SparseDtype` (:issue:`33113`)
10901117

10911118
ExtensionArray
10921119
^^^^^^^^^^^^^^
@@ -1118,6 +1145,7 @@ Other
11181145
- :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`)
11191146
- Bug in :class:`Tick` comparisons raising ``TypeError`` when comparing against timedelta-like objects (:issue:`34088`)
11201147
- Bug in :class:`Tick` multiplication raising ``TypeError`` when multiplying by a float (:issue:`34486`)
1148+
- Passing a `set` as `names` argument to :func:`pandas.read_csv`, :func:`pandas.read_table`, or :func:`pandas.read_fwf` will raise ``ValueError: Names should be an ordered collection.`` (:issue:`34946`)
11211149

11221150
.. ---------------------------------------------------------------------------
11231151

environment.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ dependencies:
3737
# Dask and its dependencies (that dont install with dask)
3838
- dask-core
3939
- toolz>=0.7.3
40-
- fsspec>=0.5.1
4140
- partd>=0.3.10
4241
- cloudpickle>=0.2.1
4342

pandas/_libs/internals.pyx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cnp.import_array()
1616
from pandas._libs.algos import ensure_int64
1717

1818

19+
@cython.final
1920
cdef class BlockPlacement:
2021
# __slots__ = '_as_slice', '_as_array', '_len'
2122
cdef:

0 commit comments

Comments
 (0)