Skip to content

Commit

Permalink
Merge branch 'main' into fix/group_by_agg_pyarrow_bool_numpy_same_type
Browse files Browse the repository at this point in the history
  • Loading branch information
Kei committed Apr 8, 2024
2 parents d510052 + b8a4691 commit 62a31d9
Show file tree
Hide file tree
Showing 23 changed files with 86 additions and 156 deletions.
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def setup(self):
self.codes = np.tile(range(len(self.categories)), N)

self.datetimes = pd.Series(
pd.date_range("1995-01-01 00:00:00", periods=N / 10, freq="s")
pd.date_range("1995-01-01 00:00:00", periods=N // 10, freq="s")
)
self.datetimes_with_nat = self.datetimes.copy()
self.datetimes_with_nat.iloc[-1] = pd.NaT
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def setup(self, index_type):
"dst": date_range(
start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="s"
),
"repeated": date_range(start="2000", periods=N / 10, freq="s").repeat(10),
"repeated": date_range(start="2000", periods=N // 10, freq="s").repeat(10),
"tz_aware": date_range(start="2000", periods=N, freq="s", tz="US/Eastern"),
"tz_local": date_range(
start="2000", periods=N, freq="s", tz=dateutil.tz.tzlocal()
Expand Down
1 change: 0 additions & 1 deletion doc/redirects.csv
Original file line number Diff line number Diff line change
Expand Up @@ -1422,7 +1422,6 @@ reference/api/pandas.Series.transpose,pandas.Series.T
reference/api/pandas.Index.transpose,pandas.Index.T
reference/api/pandas.Index.notnull,pandas.Index.notna
reference/api/pandas.Index.tolist,pandas.Index.to_list
reference/api/pandas.arrays.PandasArray,pandas.arrays.NumpyExtensionArray
reference/api/pandas.core.groupby.DataFrameGroupBy.backfill,pandas.core.groupby.DataFrameGroupBy.bfill
reference/api/pandas.core.groupby.GroupBy.backfill,pandas.core.groupby.DataFrameGroupBy.bfill
reference/api/pandas.core.resample.Resampler.backfill,pandas.core.resample.Resampler.bfill
Expand Down
8 changes: 7 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,20 +208,25 @@ Removal of prior version deprecations/changes
- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
- Disallow constructing a :class:`arrays.SparseArray` with scalar data (:issue:`53039`)
- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
- Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`)
- Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`)
- Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`)
- Removed 'fastpath' keyword in :class:`Categorical` constructor (:issue:`20110`)
- Removed alias :class:`arrays.PandasArray` for :class:`arrays.NumpyExtensionArray` (:issue:`53694`)
- Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)
- Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`)
- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`)
- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64`` and :class:`DatetimeTZDtype` dtypes (:issue:`58029`)
- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`)
- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`)
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`)
- Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`)
Expand Down Expand Up @@ -448,6 +453,7 @@ Other
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

.. ***DO NOT USE THIS SECTION***
Expand Down
17 changes: 0 additions & 17 deletions pandas/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,3 @@
"StringArray",
"TimedeltaArray",
]


def __getattr__(name: str) -> type[NumpyExtensionArray]:
if name == "PandasArray":
# GH#53694
import warnings

from pandas.util._exceptions import find_stack_level

warnings.warn(
"PandasArray has been renamed NumpyExtensionArray. Use that "
"instead. This alias will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return NumpyExtensionArray
raise AttributeError(f"module 'pandas.arrays' has no attribute '{name}'")
21 changes: 0 additions & 21 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,6 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
provided).
dtype : CategoricalDtype
An instance of ``CategoricalDtype`` to use for this categorical.
fastpath : bool
The 'fastpath' keyword in Categorical is deprecated and will be
removed in a future version. Use Categorical.from_codes instead.
copy : bool, default True
Whether to copy if the codes are unchanged.
Expand Down Expand Up @@ -391,33 +388,15 @@ def __init__(
categories=None,
ordered=None,
dtype: Dtype | None = None,
fastpath: bool | lib.NoDefault = lib.no_default,
copy: bool = True,
) -> None:
if fastpath is not lib.no_default:
# GH#20110
warnings.warn(
"The 'fastpath' keyword in Categorical is deprecated and will "
"be removed in a future version. Use Categorical.from_codes instead",
DeprecationWarning,
stacklevel=find_stack_level(),
)
else:
fastpath = False

dtype = CategoricalDtype._from_values_or_dtype(
values, categories, ordered, dtype
)
# At this point, dtype is always a CategoricalDtype, but
# we may have dtype.categories be None, and we need to
# infer categories in a factorization step further below

if fastpath:
codes = coerce_indexer_dtype(values, dtype.categories)
dtype = CategoricalDtype(ordered=False).update_dtype(dtype)
super().__init__(codes, dtype)
return

if not is_list_like(values):
# GH#38433
raise TypeError("Categorical input must be list-like")
Expand Down
43 changes: 19 additions & 24 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1661,20 +1661,24 @@ def _groupby_op(
dtype = self.dtype
if dtype.kind == "M":
# Adding/multiplying datetimes is not valid
if how in ["any", "all", "sum", "prod", "cumsum", "cumprod", "var", "skew"]:
if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
raise TypeError(f"datetime64 type does not support operation '{how}'")
if how in ["any", "all"]:
# GH#34479
raise TypeError(
f"'{how}' with datetime64 dtypes is no longer supported. "
f"Use (obj != pd.Timestamp(0)).{how}() instead."
)

elif isinstance(dtype, PeriodDtype):
# Adding/multiplying Periods is not valid
if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
raise TypeError(f"Period type does not support {how} operations")
if how in ["any", "all"]:
# GH#34479
warnings.warn(
f"'{how}' with PeriodDtype is deprecated and will raise in a "
f"future version. Use (obj != pd.Period(0, freq)).{how}() instead.",
FutureWarning,
stacklevel=find_stack_level(),
raise TypeError(
f"'{how}' with PeriodDtype is no longer supported. "
f"Use (obj != pd.Period(0, freq)).{how}() instead."
)
else:
# timedeltas we can add but not multiply
Expand Down Expand Up @@ -2424,17 +2428,17 @@ def validate_periods(periods: None) -> None: ...


@overload
def validate_periods(periods: int | float) -> int: ...
def validate_periods(periods: int) -> int: ...


def validate_periods(periods: int | float | None) -> int | None:
def validate_periods(periods: int | None) -> int | None:
"""
If a `periods` argument is passed to the Datetime/Timedelta Array/Index
constructor, cast it to an integer.
Parameters
----------
periods : None, float, int
periods : None, int
Returns
-------
Expand All @@ -2443,22 +2447,13 @@ def validate_periods(periods: int | float | None) -> int | None:
Raises
------
TypeError
if periods is None, float, or int
if periods is not None or int
"""
if periods is not None:
if lib.is_float(periods):
warnings.warn(
# GH#56036
"Non-integer 'periods' in pd.date_range, pd.timedelta_range, "
"pd.period_range, and pd.interval_range are deprecated and "
"will raise in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
periods = int(periods)
elif not lib.is_integer(periods):
raise TypeError(f"periods must be a number, got {periods}")
return periods
if periods is not None and not lib.is_integer(periods):
raise TypeError(f"periods must be an integer, got {periods}")
# error: Incompatible return value type (got "int | integer[Any] | None",
# expected "int | None")
return periods # type: ignore[return-value]


def _validate_inferred_freq(
Expand Down
16 changes: 3 additions & 13 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@

from pandas.core.dtypes.astype import astype_array
from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar,
find_common_type,
maybe_box_datetimelike,
)
Expand Down Expand Up @@ -399,19 +398,10 @@ def __init__(
dtype = dtype.subtype

if is_scalar(data):
warnings.warn(
f"Constructing {type(self).__name__} with scalar data is deprecated "
"and will raise in a future version. Pass a sequence instead.",
FutureWarning,
stacklevel=find_stack_level(),
raise TypeError(
f"Cannot construct {type(self).__name__} from scalar data. "
"Pass a sequence instead."
)
if sparse_index is None:
npoints = 1
else:
npoints = sparse_index.length

data = construct_1d_arraylike_from_scalar(data, npoints, dtype=None)
dtype = data.dtype

if dtype is not None:
dtype = pandas_dtype(dtype)
Expand Down
18 changes: 6 additions & 12 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1762,24 +1762,18 @@ def _check_fill_value(self) -> None:
val = self._fill_value
if isna(val):
if not is_valid_na_for_dtype(val, self.subtype):
warnings.warn(
"Allowing arbitrary scalar fill_value in SparseDtype is "
"deprecated. In a future version, the fill_value must be "
"a valid value for the SparseDtype.subtype.",
FutureWarning,
stacklevel=find_stack_level(),
raise ValueError(
# GH#53043
"fill_value must be a valid value for the SparseDtype.subtype"
)
else:
dummy = np.empty(0, dtype=self.subtype)
dummy = ensure_wrapped_if_datetimelike(dummy)

if not can_hold_element(dummy, val):
warnings.warn(
"Allowing arbitrary scalar fill_value in SparseDtype is "
"deprecated. In a future version, the fill_value must be "
"a valid value for the SparseDtype.subtype.",
FutureWarning,
stacklevel=find_stack_level(),
raise ValueError(
# GH#53043
"fill_value must be a valid value for the SparseDtype.subtype"
)

@property
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def ensure_key_mapped(
if isinstance(
values, Index
): # convert to a new Index subclass, not necessarily the same
result = Index(result)
result = Index(result, tupleize_cols=False)
else:
# try to revert to original type otherwise
type_of_values = type(values)
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,13 +395,5 @@ def test_util_in_top_level(self):
pd.util.foo


def test_pandas_array_alias():
msg = "PandasArray has been renamed NumpyExtensionArray"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = pd.arrays.PandasArray

assert res is pd.arrays.NumpyExtensionArray


def test_set_module():
assert pd.DataFrame.__module__ == "pandas"
7 changes: 0 additions & 7 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,6 @@


class TestCategoricalConstructors:
def test_fastpath_deprecated(self):
codes = np.array([1, 2, 3])
dtype = CategoricalDtype(categories=["a", "b", "c", "d"], ordered=False)
msg = "The 'fastpath' keyword in Categorical is deprecated"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
Categorical(codes, dtype=dtype, fastpath=True)

def test_categorical_from_cat_and_dtype_str_preserve_ordered(self):
# GH#49309 we should preserve orderedness in `res`
cat = Categorical([3, 1], categories=[3, 2, 1], ordered=True)
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,11 @@ def test_set_fill_value(self):
arr.fill_value = 2
assert arr.fill_value == 2

msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
msg = "fill_value must be a valid value for the SparseDtype.subtype"
with pytest.raises(ValueError, match=msg):
# GH#53043
arr.fill_value = 3.1
assert arr.fill_value == 3.1
assert arr.fill_value == 2

arr.fill_value = np.nan
assert np.isnan(arr.fill_value)
Expand All @@ -64,8 +65,9 @@ def test_set_fill_value(self):
arr.fill_value = True
assert arr.fill_value is True

with tm.assert_produces_warning(FutureWarning, match=msg):
with pytest.raises(ValueError, match=msg):
arr.fill_value = 0
assert arr.fill_value is True

arr.fill_value = np.nan
assert np.isnan(arr.fill_value)
Expand Down
18 changes: 5 additions & 13 deletions pandas/tests/arrays/sparse/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,12 @@ def test_constructor_spindex_dtype(self):
@pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])])
def test_constructor_spindex_dtype_scalar(self, sparse_index):
# scalar input
msg = "Constructing SparseArray with scalar data is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
exp = SparseArray([1], dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
msg = "Cannot construct SparseArray from scalar data. Pass a sequence instead"
with pytest.raises(TypeError, match=msg):
SparseArray(data=1, sparse_index=sparse_index, dtype=None)

with tm.assert_produces_warning(FutureWarning, match=msg):
arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
exp = SparseArray([1], dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == SparseDtype(np.int64)
assert arr.fill_value == 0
with pytest.raises(TypeError, match=msg):
SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)

def test_constructor_spindex_dtype_scalar_broadcasts(self):
arr = SparseArray(
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/arrays/sparse/test_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ def test_nans_not_equal():
(SparseDtype("float64"), SparseDtype("float32")),
(SparseDtype("float64"), SparseDtype("float64", 0)),
(SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)),
(SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
(SparseDtype("float64"), np.dtype("float64")),
]

Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/frame/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,19 +1406,21 @@ def test_to_csv_categorical_and_interval(self):
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert result == expected

def test_to_csv_warn_when_zip_tar_and_append_mode(self):
def test_to_csv_warn_when_zip_tar_and_append_mode(self, tmp_path):
# GH57875
df = DataFrame({"a": [1, 2, 3]})
msg = (
"zip and tar do not support mode 'a' properly. This combination will "
"result in multiple files with same name being added to the archive"
)
zip_path = tmp_path / "test.zip"
tar_path = tmp_path / "test.tar"
with tm.assert_produces_warning(
RuntimeWarning, match=msg, raise_on_extra_warnings=False
):
df.to_csv("test.zip", mode="a")
df.to_csv(zip_path, mode="a")

with tm.assert_produces_warning(
RuntimeWarning, match=msg, raise_on_extra_warnings=False
):
df.to_csv("test.tar", mode="a")
df.to_csv(tar_path, mode="a")
Loading

0 comments on commit 62a31d9

Please sign in to comment.