Skip to content

Commit 855a004

Browse files
authored
Merge branch 'main' into fix/resample-interpolate-fails-with-inplace-true-58690-remove-inplace-option
2 parents 181dbfa + c46fb76 commit 855a004

File tree

19 files changed

+107
-48
lines changed

19 files changed

+107
-48
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,8 @@ Timezones
504504

505505
Numeric
506506
^^^^^^^
507+
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
507508
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
508-
-
509509

510510
Conversion
511511
^^^^^^^^^^
@@ -547,6 +547,7 @@ I/O
547547
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
548548
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
549549
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
550+
- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
550551
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
551552
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
552553

@@ -609,6 +610,7 @@ Other
609610
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
610611
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
611612
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
613+
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
612614
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
613615
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
614616
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

pandas/core/arrays/arrow/array.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
from pandas._libs import lib
2020
from pandas._libs.tslibs import (
21-
NaT,
2221
Timedelta,
2322
Timestamp,
2423
timezones,
@@ -2612,17 +2611,19 @@ def _str_wrap(self, width: int, **kwargs) -> Self:
26122611
@property
26132612
def _dt_days(self) -> Self:
26142613
return type(self)(
2615-
pa.array(self._to_timedeltaarray().days, from_pandas=True, type=pa.int32())
2614+
pa.array(
2615+
self._to_timedeltaarray().components.days,
2616+
from_pandas=True,
2617+
type=pa.int32(),
2618+
)
26162619
)
26172620

26182621
@property
26192622
def _dt_hours(self) -> Self:
26202623
return type(self)(
26212624
pa.array(
2622-
[
2623-
td.components.hours if td is not NaT else None
2624-
for td in self._to_timedeltaarray()
2625-
],
2625+
self._to_timedeltaarray().components.hours,
2626+
from_pandas=True,
26262627
type=pa.int32(),
26272628
)
26282629
)
@@ -2631,10 +2632,8 @@ def _dt_hours(self) -> Self:
26312632
def _dt_minutes(self) -> Self:
26322633
return type(self)(
26332634
pa.array(
2634-
[
2635-
td.components.minutes if td is not NaT else None
2636-
for td in self._to_timedeltaarray()
2637-
],
2635+
self._to_timedeltaarray().components.minutes,
2636+
from_pandas=True,
26382637
type=pa.int32(),
26392638
)
26402639
)
@@ -2643,18 +2642,18 @@ def _dt_minutes(self) -> Self:
26432642
def _dt_seconds(self) -> Self:
26442643
return type(self)(
26452644
pa.array(
2646-
self._to_timedeltaarray().seconds, from_pandas=True, type=pa.int32()
2645+
self._to_timedeltaarray().components.seconds,
2646+
from_pandas=True,
2647+
type=pa.int32(),
26472648
)
26482649
)
26492650

26502651
@property
26512652
def _dt_milliseconds(self) -> Self:
26522653
return type(self)(
26532654
pa.array(
2654-
[
2655-
td.components.milliseconds if td is not NaT else None
2656-
for td in self._to_timedeltaarray()
2657-
],
2655+
self._to_timedeltaarray().components.milliseconds,
2656+
from_pandas=True,
26582657
type=pa.int32(),
26592658
)
26602659
)
@@ -2663,7 +2662,7 @@ def _dt_milliseconds(self) -> Self:
26632662
def _dt_microseconds(self) -> Self:
26642663
return type(self)(
26652664
pa.array(
2666-
self._to_timedeltaarray().microseconds,
2665+
self._to_timedeltaarray().components.microseconds,
26672666
from_pandas=True,
26682667
type=pa.int32(),
26692668
)
@@ -2673,7 +2672,9 @@ def _dt_microseconds(self) -> Self:
26732672
def _dt_nanoseconds(self) -> Self:
26742673
return type(self)(
26752674
pa.array(
2676-
self._to_timedeltaarray().nanoseconds, from_pandas=True, type=pa.int32()
2675+
self._to_timedeltaarray().components.nanoseconds,
2676+
from_pandas=True,
2677+
type=pa.int32(),
26772678
)
26782679
)
26792680

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13078,7 +13078,7 @@ def quantile(
1307813078

1307913079
if len(data.columns) == 0:
1308013080
# GH#23925 _get_numeric_data may have dropped all columns
13081-
cols = Index([], name=self.columns.name)
13081+
cols = self.columns[:0]
1308213082

1308313083
dtype = np.float64
1308413084
if axis == 1:

pandas/core/generic.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@
158158
Index,
159159
MultiIndex,
160160
PeriodIndex,
161-
RangeIndex,
162161
default_index,
163162
ensure_index,
164163
)
@@ -1852,7 +1851,7 @@ def _drop_labels_or_levels(self, keys, axis: AxisInt = 0):
18521851
else:
18531852
# Drop the last level of Index by replacing with
18541853
# a RangeIndex
1855-
dropped.columns = RangeIndex(dropped.columns.size)
1854+
dropped.columns = default_index(dropped.columns.size)
18561855

18571856
# Handle dropping index labels
18581857
if labels_to_drop:

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ class providing the base-class of operations.
128128
from pandas.core.indexes.api import (
129129
Index,
130130
MultiIndex,
131-
RangeIndex,
132131
default_index,
133132
)
134133
from pandas.core.internals.blocks import ensure_block_shape
@@ -1264,7 +1263,7 @@ def _set_result_index_ordered(
12641263
if self._grouper.has_dropped_na:
12651264
# Add back in any missing rows due to dropna - index here is integral
12661265
# with values referring to the row of the input so can use RangeIndex
1267-
result = result.reindex(RangeIndex(len(index)), axis=0)
1266+
result = result.reindex(default_index(len(index)), axis=0)
12681267
result = result.set_axis(index, axis=0)
12691268

12701269
return result
@@ -1334,7 +1333,7 @@ def _wrap_aggregated_output(
13341333
# enforced in __init__
13351334
result = self._insert_inaxis_grouper(result, qs=qs)
13361335
result = result._consolidate()
1337-
result.index = RangeIndex(len(result))
1336+
result.index = default_index(len(result))
13381337

13391338
else:
13401339
index = self._grouper.result_index

pandas/core/groupby/grouper.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pandas.core.indexes.api import (
3535
Index,
3636
MultiIndex,
37+
default_index,
3738
)
3839
from pandas.core.series import Series
3940

@@ -901,7 +902,7 @@ def is_in_obj(gpr) -> bool:
901902
if len(groupings) == 0 and len(obj):
902903
raise ValueError("No group keys passed!")
903904
if len(groupings) == 0:
904-
groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp)))
905+
groupings.append(Grouping(default_index(0), np.array([], dtype=np.intp)))
905906

906907
# create the internals grouper
907908
grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, dropna=dropna)

pandas/core/indexes/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def _get_combined_index(
130130
# TODO: handle index names!
131131
indexes = _get_distinct_objs(indexes)
132132
if len(indexes) == 0:
133-
index = Index([])
133+
index: Index = default_index(0)
134134
elif len(indexes) == 1:
135135
index = indexes[0]
136136
elif intersect:

pandas/core/internals/managers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def blklocs(self) -> npt.NDArray[np.intp]:
249249
def make_empty(self, axes=None) -> Self:
250250
"""return an empty BlockManager with the items axis of len 0"""
251251
if axes is None:
252-
axes = [Index([])] + self.axes[1:]
252+
axes = [default_index(0)] + self.axes[1:]
253253

254254
# preserve dtype if possible
255255
if self.ndim == 1:

pandas/core/methods/selectn.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
)
3030
from pandas.core.dtypes.dtypes import BaseMaskedDtype
3131

32+
from pandas.core.indexes.api import default_index
33+
3234
if TYPE_CHECKING:
3335
from pandas._typing import (
3436
DtypeObj,
@@ -38,6 +40,7 @@
3840

3941
from pandas import (
4042
DataFrame,
43+
Index,
4144
Series,
4245
)
4346
else:
@@ -199,8 +202,6 @@ def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> No
199202
self.columns = columns
200203

201204
def compute(self, method: str) -> DataFrame:
202-
from pandas.core.api import Index
203-
204205
n = self.n
205206
frame = self.obj
206207
columns = self.columns
@@ -227,7 +228,7 @@ def get_indexer(current_indexer: Index, other_indexer: Index) -> Index:
227228
original_index = frame.index
228229
cur_frame = frame = frame.reset_index(drop=True)
229230
cur_n = n
230-
indexer = Index([], dtype=np.int64)
231+
indexer: Index = default_index(0)
231232

232233
for i, column in enumerate(columns):
233234
# For each column we apply method to cur_frame[column].

pandas/core/reshape/reshape.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from pandas.core.indexes.api import (
4343
Index,
4444
MultiIndex,
45-
RangeIndex,
45+
default_index,
4646
)
4747
from pandas.core.reshape.concat import concat
4848
from pandas.core.series import Series
@@ -1047,7 +1047,7 @@ def stack_reshape(
10471047
if data.ndim == 1:
10481048
data.name = 0
10491049
else:
1050-
data.columns = RangeIndex(len(data.columns))
1050+
data.columns = default_index(len(data.columns))
10511051
buf.append(data)
10521052

10531053
if len(buf) > 0 and not frame.empty:

pandas/io/html.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1178,7 +1178,7 @@ def read_html(
11781178
**after** `skiprows` is applied.
11791179
11801180
This function will *always* return a list of :class:`DataFrame` *or*
1181-
it will fail, e.g., it will *not* return an empty list.
1181+
it will fail, i.e., it will *not* return an empty list.
11821182
11831183
Examples
11841184
--------

pandas/io/pytables.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,7 +2655,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
26552655
# reverse converts
26562656
if dtype.startswith("datetime64"):
26572657
# recreate with tz if indicated
2658-
converted = _set_tz(converted, tz)
2658+
converted = _set_tz(converted, tz, dtype)
26592659

26602660
elif dtype == "timedelta64":
26612661
converted = np.asarray(converted, dtype="m8[ns]")
@@ -3036,7 +3036,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
30363036
if dtype and dtype.startswith("datetime64"):
30373037
# reconstruct a timezone if indicated
30383038
tz = getattr(attrs, "tz", None)
3039-
ret = _set_tz(ret, tz)
3039+
ret = _set_tz(ret, tz, dtype)
30403040

30413041
elif dtype == "timedelta64":
30423042
ret = np.asarray(ret, dtype="m8[ns]")
@@ -4964,19 +4964,23 @@ def _get_tz(tz: tzinfo) -> str | tzinfo:
49644964
return zone
49654965

49664966

4967-
def _set_tz(values: npt.NDArray[np.int64], tz: str | tzinfo | None) -> DatetimeArray:
4967+
def _set_tz(
4968+
values: npt.NDArray[np.int64], tz: str | tzinfo | None, datetime64_dtype: str
4969+
) -> DatetimeArray:
49684970
"""
49694971
Coerce the values to a DatetimeArray with appropriate tz.
49704972
49714973
Parameters
49724974
----------
49734975
values : ndarray[int64]
49744976
tz : str, tzinfo, or None
4977+
datetime64_dtype : str, e.g. "datetime64[ns]", "datetime64[25s]"
49754978
"""
49764979
assert values.dtype == "i8", values.dtype
49774980
# Argument "tz" to "tz_to_dtype" has incompatible type "str | tzinfo | None";
49784981
# expected "tzinfo"
4979-
dtype = tz_to_dtype(tz=tz, unit="ns") # type: ignore[arg-type]
4982+
unit, _ = np.datetime_data(datetime64_dtype) # parsing dtype: unit, count
4983+
dtype = tz_to_dtype(tz=tz, unit=unit) # type: ignore[arg-type]
49804984
dta = DatetimeArray._from_sequence(values, dtype=dtype)
49814985
return dta
49824986

pandas/tests/extension/test_arrow.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2905,6 +2905,31 @@ def test_dt_components():
29052905
tm.assert_frame_equal(result, expected)
29062906

29072907

2908+
def test_dt_components_large_values():
2909+
ser = pd.Series(
2910+
[
2911+
pd.Timedelta("365 days 23:59:59.999000"),
2912+
None,
2913+
],
2914+
dtype=ArrowDtype(pa.duration("ns")),
2915+
)
2916+
result = ser.dt.components
2917+
expected = pd.DataFrame(
2918+
[[365, 23, 59, 59, 999, 0, 0], [None, None, None, None, None, None, None]],
2919+
columns=[
2920+
"days",
2921+
"hours",
2922+
"minutes",
2923+
"seconds",
2924+
"milliseconds",
2925+
"microseconds",
2926+
"nanoseconds",
2927+
],
2928+
dtype="int32[pyarrow]",
2929+
)
2930+
tm.assert_frame_equal(result, expected)
2931+
2932+
29082933
@pytest.mark.parametrize("skipna", [True, False])
29092934
def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
29102935
# GH51624

pandas/tests/frame/methods/test_quantile.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -710,14 +710,14 @@ def test_quantile_empty_no_columns(self, interp_method):
710710
result = df.quantile(
711711
0.5, numeric_only=True, interpolation=interpolation, method=method
712712
)
713-
expected = Series([], index=[], name=0.5, dtype=np.float64)
713+
expected = Series([], name=0.5, dtype=np.float64)
714714
expected.index.name = "captain tightpants"
715715
tm.assert_series_equal(result, expected)
716716

717717
result = df.quantile(
718718
[0.5], numeric_only=True, interpolation=interpolation, method=method
719719
)
720-
expected = DataFrame([], index=[0.5], columns=[])
720+
expected = DataFrame([], index=[0.5])
721721
expected.columns.name = "captain tightpants"
722722
tm.assert_frame_equal(result, expected)
723723

@@ -926,3 +926,12 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
926926
expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
927927
)
928928
tm.assert_series_equal(result, expected)
929+
930+
931+
def test_multi_quantile_numeric_only_retains_columns():
932+
df = DataFrame(list("abc"))
933+
result = df.quantile([0.5, 0.7], numeric_only=True)
934+
expected = DataFrame(index=[0.5, 0.7])
935+
tm.assert_frame_equal(
936+
result, expected, check_index_type=True, check_column_type=True
937+
)

pandas/tests/generic/test_generic.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,19 @@ def test_get_numeric_data(self, frame_or_series):
9393
if isinstance(o, DataFrame):
9494
# preserve columns dtype
9595
expected.columns = o.columns[:0]
96-
# https://github.com/pandas-dev/pandas/issues/50862
97-
tm.assert_equal(result.reset_index(drop=True), expected)
96+
tm.assert_equal(result, expected)
9897

9998
# get the bool data
10099
arr = np.array([True, True, False, True])
101100
o = construct(frame_or_series, n, value=arr, **kwargs)
102101
result = o._get_numeric_data()
103102
tm.assert_equal(result, o)
104103

104+
def test_get_bool_data_empty_preserve_index(self):
105+
expected = Series([], dtype="bool")
106+
result = expected._get_bool_data()
107+
tm.assert_series_equal(result, expected, check_index_type=True)
108+
105109
def test_nonzero(self, frame_or_series):
106110
# GH 4633
107111
# look at the boolean/nonzero behavior for objects

0 commit comments

Comments
 (0)