Skip to content

Commit 5bce9ac

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into overwrite_xl
2 parents 4b73d6a + 3888a3f commit 5bce9ac

File tree

94 files changed

+1782
-883
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+1782
-883
lines changed

.github/workflows/ci.yml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,10 @@ jobs:
154154
PANDAS_DATA_MANAGER: array
155155
run: |
156156
source activate pandas-dev
157+
157158
pytest pandas/tests/frame/methods
158159
pytest pandas/tests/frame/test_constructors.py
159-
pytest pandas/tests/frame/constructors/
160+
pytest pandas/tests/frame/test_*
160161
pytest pandas/tests/frame/test_reductions.py
161162
pytest pandas/tests/reductions/
162163
pytest pandas/tests/generic/test_generic.py
@@ -165,10 +166,30 @@ jobs:
165166
pytest pandas/tests/resample/
166167
pytest pandas/tests/reshape/merge
167168
169+
pytest pandas/tests/series/methods
170+
pytest pandas/tests/series/test_*
171+
168172
# indexing subset (temporary since other tests don't pass yet)
169173
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
170174
pytest pandas/tests/frame/indexing/test_where.py
171175
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
172176
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
173177
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
174178
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
179+
180+
pytest pandas/tests/api/
181+
pytest pandas/tests/base/
182+
pytest pandas/tests/computation/
183+
pytest pandas/tests/config/
184+
pytest pandas/tests/dtypes/
185+
pytest pandas/tests/generic/
186+
pytest pandas/tests/indexes/
187+
pytest pandas/tests/libs/
188+
pytest pandas/tests/plotting/
189+
pytest pandas/tests/scalar/
190+
pytest pandas/tests/strings/
191+
pytest pandas/tests/tools/
192+
pytest pandas/tests/tseries/
193+
pytest pandas/tests/tslibs/
194+
pytest pandas/tests/util/
195+
pytest pandas/tests/window/

asv_bench/benchmarks/frame_methods.py

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
date_range,
1212
isnull,
1313
period_range,
14+
timedelta_range,
1415
)
1516

1617
from .pandas_vb_common import tm
@@ -52,6 +53,7 @@ def setup(self):
5253
N = 10 ** 3
5354
self.df = DataFrame(np.random.randn(N * 10, N))
5455
self.idx = np.arange(4 * N, 7 * N)
56+
self.idx_cols = np.random.randint(0, N, N)
5557
self.df2 = DataFrame(
5658
{
5759
c: {
@@ -68,6 +70,9 @@ def time_reindex_axis0(self):
6870
self.df.reindex(self.idx)
6971

7072
def time_reindex_axis1(self):
73+
self.df.reindex(columns=self.idx_cols)
74+
75+
def time_reindex_axis1_missing(self):
7176
self.df.reindex(columns=self.idx)
7277

7378
def time_reindex_both_axes(self):
@@ -351,15 +356,42 @@ def time_isnull_obj(self):
351356

352357
class Fillna:
353358

354-
params = ([True, False], ["pad", "bfill"])
355-
param_names = ["inplace", "method"]
356-
357-
def setup(self, inplace, method):
358-
values = np.random.randn(10000, 100)
359-
values[::2] = np.nan
360-
self.df = DataFrame(values)
361-
362-
def time_frame_fillna(self, inplace, method):
359+
params = (
360+
[True, False],
361+
["pad", "bfill"],
362+
[
363+
"float64",
364+
"float32",
365+
"object",
366+
"Int64",
367+
"Float64",
368+
"datetime64[ns]",
369+
"datetime64[ns, tz]",
370+
"timedelta64[ns]",
371+
],
372+
)
373+
param_names = ["inplace", "method", "dtype"]
374+
375+
def setup(self, inplace, method, dtype):
376+
N, M = 10000, 100
377+
if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
378+
data = {
379+
"datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
380+
"datetime64[ns, tz]": date_range(
381+
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
382+
),
383+
"timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
384+
}
385+
self.df = DataFrame({f"col_{i}": data[dtype] for i in range(M)})
386+
self.df[::2] = None
387+
else:
388+
values = np.random.randn(N, M)
389+
values[::2] = np.nan
390+
if dtype == "Int64":
391+
values = values.round()
392+
self.df = DataFrame(values, dtype=dtype)
393+
394+
def time_frame_fillna(self, inplace, method, dtype):
363395
self.df.fillna(inplace=inplace, method=method)
364396

365397

asv_bench/benchmarks/io/json.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
DataFrame,
77
concat,
88
date_range,
9+
json_normalize,
910
read_json,
1011
timedelta_range,
1112
)
@@ -77,6 +78,27 @@ def peakmem_read_json_lines_nrows(self, index):
7778
read_json(self.fname, orient="records", lines=True, nrows=15000)
7879

7980

81+
class NormalizeJSON(BaseIO):
82+
fname = "__test__.json"
83+
params = [
84+
["split", "columns", "index", "values", "records"],
85+
["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"],
86+
]
87+
param_names = ["orient", "frame"]
88+
89+
def setup(self, orient, frame):
90+
data = {
91+
"hello": ["thisisatest", 999898, "mixed types"],
92+
"nest1": {"nest2": {"nest3": "nest3_value", "nest3_int": 3445}},
93+
"nest1_list": {"nest2": ["blah", 32423, 546456.876, 92030234]},
94+
"hello2": "string",
95+
}
96+
self.data = [data for i in range(10000)]
97+
98+
def time_normalize_json(self, orient, frame):
99+
json_normalize(self.data)
100+
101+
80102
class ToJSON(BaseIO):
81103

82104
fname = "__test__.json"

doc/source/whatsnew/v1.2.4.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717

18-
-
18+
- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
1919
-
2020

2121
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.3.0.rst

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ Other enhancements
132132
- Disallow :class:`DataFrame` indexer for ``iloc`` for :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__`, (:issue:`39004`)
133133
- :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`)
134134
- :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`)
135-
- :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes (:issue:`35643`, :issue:`21266`, :issue:`39317`)
135+
- :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes (:issue:`35643`, :issue:`21266`, :issue:`39317`, :issue:`39708`)
136136
- :meth:`.Styler.set_tooltips_class` and :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`)
137137
- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`)
138138
- :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`)
@@ -182,6 +182,46 @@ Preserve dtypes in :meth:`~pandas.DataFrame.combine_first`
182182
combined.dtypes
183183
184184
185+
Try operating inplace when setting values with ``loc`` and ``iloc``
186+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
187+
188+
When setting an entire column using ``loc`` or ``iloc``, pandas will try to
189+
insert the values into the existing data rather than create an entirely new array.
190+
191+
.. ipython:: python
192+
193+
df = pd.DataFrame(range(3), columns=["A"], dtype="float64")
194+
values = df.values
195+
new = np.array([5, 6, 7], dtype="int64")
196+
df.loc[[0, 1, 2], "A"] = new
197+
198+
In both the new and old behavior, the data in ``values`` is overwritten, but in
199+
the old behavior the dtype of ``df["A"]`` changed to ``int64``.
200+
201+
*pandas 1.2.x*
202+
203+
.. code-block:: ipython
204+
205+
In [1]: df.dtypes
206+
Out[1]:
207+
A int64
208+
dtype: object
209+
In [2]: np.shares_memory(df["A"].values, new)
210+
Out[2]: False
211+
In [3]: np.shares_memory(df["A"].values, values)
212+
Out[3]: False
213+
214+
In pandas 1.3.0, ``df`` continues to share data with ``values``
215+
216+
*pandas 1.3.0*
217+
218+
.. ipython:: python
219+
220+
df.dtypes
221+
np.shares_memory(df["A"], new)
222+
np.shares_memory(df["A"], values)
223+
224+
185225
.. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting:
186226

187227
Consistent Casting With Setting Into Boolean Series
@@ -324,6 +364,7 @@ Deprecations
324364
- Deprecated :meth:`core.window.ewm.ExponentialMovingWindow.vol` (:issue:`39220`)
325365
- Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
326366
- Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`)
367+
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`)
327368

328369
.. ---------------------------------------------------------------------------
329370
@@ -335,10 +376,12 @@ Performance improvements
335376
- Performance improvement in :meth:`IntervalIndex.isin` (:issue:`38353`)
336377
- Performance improvement in :meth:`Series.mean` for nullable data types (:issue:`34814`)
337378
- Performance improvement in :meth:`Series.isin` for nullable data types (:issue:`38340`)
379+
- Performance improvement in :meth:`DataFrame.fillna` with ``method="pad|backfill"`` for nullable floating and nullable integer dtypes (:issue:`39953`)
338380
- Performance improvement in :meth:`DataFrame.corr` for method=kendall (:issue:`28329`)
339381
- Performance improvement in :meth:`core.window.rolling.Rolling.corr` and :meth:`core.window.rolling.Rolling.cov` (:issue:`39388`)
340382
- Performance improvement in :meth:`core.window.rolling.RollingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` (:issue:`39591`)
341383
- Performance improvement in :func:`unique` for object data type (:issue:`37615`)
384+
- Performance improvement in :func:`pd.json_normalize` for basic cases (including seperators) (:issue:`40035` :issue:`15621`)
342385
- Performance improvement in :class:`core.window.rolling.ExpandingGroupby` aggregation methods (:issue:`39664`)
343386
- Performance improvement in :class:`Styler` where render times are more than 50% reduced (:issue:`39972` :issue:`39952`)
344387
- Performance improvement in :meth:`core.window.ewm.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`)
@@ -540,6 +583,8 @@ Reshaping
540583
- Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``ExtensionDtype`` dtypes (:issue:`39454`)
541584
- Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``datetime64`` and ``timedelta64`` dtypes (:issue:`39574`)
542585
- Bug in :meth:`DataFrame.pivot_table` returning a ``MultiIndex`` for a single value when operating on and empty ``DataFrame`` (:issue:`13483`)
586+
- Allow :class:`Index` to be passed to the :func:`numpy.all` function (:issue:`40180`)
587+
-
543588

544589
Sparse
545590
^^^^^^

pandas/_libs/algos.pyx

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,10 +597,11 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
597597

598598
@cython.boundscheck(False)
599599
@cython.wraparound(False)
600-
def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
600+
def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
601601
cdef:
602602
Py_ssize_t i, N
603603
algos_t val
604+
uint8_t prev_mask
604605
int lim, fill_count = 0
605606

606607
N = len(values)
@@ -612,15 +613,18 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
612613
lim = validate_limit(N, limit)
613614

614615
val = values[0]
616+
prev_mask = mask[0]
615617
for i in range(N):
616618
if mask[i]:
617619
if fill_count >= lim:
618620
continue
619621
fill_count += 1
620622
values[i] = val
623+
mask[i] = prev_mask
621624
else:
622625
fill_count = 0
623626
val = values[i]
627+
prev_mask = mask[i]
624628

625629

626630
@cython.boundscheck(False)
@@ -739,10 +743,11 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
739743

740744
@cython.boundscheck(False)
741745
@cython.wraparound(False)
742-
def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
746+
def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
743747
cdef:
744748
Py_ssize_t i, N
745749
algos_t val
750+
uint8_t prev_mask
746751
int lim, fill_count = 0
747752

748753
N = len(values)
@@ -754,15 +759,18 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
754759
lim = validate_limit(N, limit)
755760

756761
val = values[N - 1]
762+
prev_mask = mask[N - 1]
757763
for i in range(N - 1, -1, -1):
758764
if mask[i]:
759765
if fill_count >= lim:
760766
continue
761767
fill_count += 1
762768
values[i] = val
769+
mask[i] = prev_mask
763770
else:
764771
fill_count = 0
765772
val = values[i]
773+
prev_mask = mask[i]
766774

767775

768776
@cython.boundscheck(False)

pandas/_typing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
from pandas.core.internals import (
5959
ArrayManager,
6060
BlockManager,
61+
SingleArrayManager,
62+
SingleBlockManager,
6163
)
6264
from pandas.core.resample import Resampler
6365
from pandas.core.series import Series
@@ -184,3 +186,4 @@
184186

185187
# internals
186188
Manager = Union["ArrayManager", "BlockManager"]
189+
SingleManager = Union["SingleArrayManager", "SingleBlockManager"]

pandas/compat/numpy/function.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
212212
ALLANY_DEFAULTS["dtype"] = None
213213
ALLANY_DEFAULTS["out"] = None
214214
ALLANY_DEFAULTS["keepdims"] = False
215+
ALLANY_DEFAULTS["axis"] = None
215216
validate_all = CompatValidator(
216217
ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1
217218
)

pandas/conftest.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def add_imports(doctest_namespace):
190190
# ----------------------------------------------------------------
191191
# Common arguments
192192
# ----------------------------------------------------------------
193-
@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis {repr(x)}")
193+
@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis={repr(x)}")
194194
def axis(request):
195195
"""
196196
Fixture for returning the axis numbers of a DataFrame.
@@ -1585,6 +1585,14 @@ def indexer_sl(request):
15851585
return request.param
15861586

15871587

1588+
@pytest.fixture(params=[tm.at, tm.loc])
1589+
def indexer_al(request):
1590+
"""
1591+
Parametrize over at.__setitem__, loc.__setitem__
1592+
"""
1593+
return request.param
1594+
1595+
15881596
@pytest.fixture
15891597
def using_array_manager(request):
15901598
"""

pandas/core/apply.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Union,
1616
cast,
1717
)
18+
import warnings
1819

1920
import numpy as np
2021

@@ -267,6 +268,7 @@ def transform_dict_like(self, func):
267268
func = self.normalize_dictlike_arg("transform", obj, func)
268269

269270
results: Dict[Hashable, FrameOrSeriesUnion] = {}
271+
failed_names = []
270272
for name, how in func.items():
271273
colg = obj._gotitem(name, ndim=1)
272274
try:
@@ -277,10 +279,20 @@ def transform_dict_like(self, func):
277279
"No transform functions were provided",
278280
}:
279281
raise err
280-
282+
else:
283+
failed_names.append(name)
281284
# combine results
282285
if not results:
283286
raise ValueError("Transform function failed")
287+
if len(failed_names) > 0:
288+
warnings.warn(
289+
f"{failed_names} did not transform successfully. "
290+
f"Allowing for partial failure is deprecated, this will raise "
291+
f"a ValueError in a future version of pandas."
292+
f"Drop these columns/ops to avoid this warning.",
293+
FutureWarning,
294+
stacklevel=4,
295+
)
284296
return concat(results, axis=1)
285297

286298
def transform_str_or_callable(self, func) -> FrameOrSeriesUnion:

0 commit comments

Comments
 (0)