From 225190264c9eb65be8018972b2cfe76b45bc6a39 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 10 May 2023 17:57:22 -0700 Subject: [PATCH 1/7] BUG: SparseDtype requires numpy dtype (#53160) * BUG: SparseDtype requires numpy dtype * GH ref --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/sparse/dtype.py | 3 +++ pandas/tests/arrays/sparse/test_astype.py | 16 +--------------- pandas/tests/arrays/sparse/test_dtype.py | 7 +++++++ 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 010773b2806a2..e3bfd0cf4150f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -426,6 +426,7 @@ Reshaping Sparse ^^^^^^ +- Bug in :class:`SparseDtype` constructor failing to raise ``TypeError`` when given an incompatible ``dtype`` for its subtype, which must be a ``numpy`` dtype (:issue:`53160`) - Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`) - diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index dadd161ceeb38..5747ff807600d 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -91,6 +91,9 @@ def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None: dtype = pandas_dtype(dtype) if is_string_dtype(dtype): dtype = np.dtype("object") + if not isinstance(dtype, np.dtype): + # GH#53160 + raise TypeError("SparseDtype subtype must be a numpy dtype") if fill_value is None: fill_value = na_value_for_dtype(dtype) diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py index 86d69610059b3..d729a31668ade 100644 --- a/pandas/tests/arrays/sparse/test_astype.py +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -3,11 +3,7 @@ from pandas._libs.sparse import IntIndex -from pandas import ( - DataFrame, - Series, - Timestamp, -) +from pandas import Timestamp import pandas._testing as tm from pandas.core.arrays.sparse import ( SparseArray, @@ -135,13 +131,3 @@ def test_astype_dt64_to_int64(self): arr3 = SparseArray(values, dtype=dtype) result3 = arr3.astype("int64") tm.assert_numpy_array_equal(result3, expected) - - -def test_dtype_sparse_with_fill_value_not_present_in_data(): - # GH 49987 - df = DataFrame([["a", 0], ["b", 1], ["b", 2]], columns=["A", "B"]) - result = df["A"].astype(SparseDtype("category", fill_value="c")) - expected = Series( - ["a", "b", "b"], name="A", dtype=SparseDtype("object", fill_value="c") - ) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 58fedbd3e4231..88f8577ded5b0 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -207,3 +207,10 @@ def test_repr(): result = str(SparseDtype(object, fill_value="0")) expected = "Sparse[object, '0']" assert result == expected + + +def test_sparse_dtype_subtype_must_be_numpy_dtype(): + # GH#53160 + msg = "SparseDtype subtype must be a numpy dtype" + with pytest.raises(TypeError, match=msg): + SparseDtype("category", fill_value="c") From 0a18ad7eb63ba055e63cd566084b0d970d300553 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 May 2023 18:01:48 -0700 Subject: [PATCH 2/7] API/BUG: Make `to_json` `index=` arg consistent with `orient` arg (#52143) * API/BUG: Make to_json index= consistent with orient - split and table allow index=True/False - records and values only allow index=False - index and columns only allow index=True - raise for contradictions in the latter two - see #25513 * style: lint * style: make mypy happy * review: simplify * review: clarify and consolidate branches * style: add explainer comment * doc: change error message in _json * docs: update whatsnew 2.1.0 * docs: sort whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/generic.py | 11 ++++++----- pandas/io/json/_json.py | 19 ++++++++++++++----- pandas/tests/io/json/test_pandas.py | 23 ++++++++++++++++++++--- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e3bfd0cf4150f..5b62883c2741e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -92,13 +92,13 @@ Other enhancements - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`) +- Improved error handling when using :meth:`DataFrame.to_json` with incompatible ``index`` and ``orient`` arguments (:issue:`52143`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`) - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"`` - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) -- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 017fb44413c8f..93fecc4a7b096 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2306,7 +2306,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = None, lines: bool_t = False, compression: CompressionOptions = "infer", - index: bool_t = True, + index: bool_t | None = None, indent: int | None = None, storage_options: StorageOptions = None, mode: Literal["a", "w"] = "w", @@ -2375,10 +2375,11 @@ def to_json( .. versionchanged:: 1.4.0 Zstandard support. - index : bool, default True - Whether to include the index values in the JSON string. Not - including the index (``index=False``) is only supported when - orient is 'split' or 'table'. + index : bool or None, default None + The index is only used when 'orient' is 'split', 'index', 'column', + or 'table'. Of these, 'index' and 'column' do not support + `index=False`. + indent : int, optional Length of whitespace used to indent each record. diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 8775c65f140a8..5c2fba814375f 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -100,7 +100,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = ..., lines: bool = ..., compression: CompressionOptions = ..., - index: bool = ..., + index: bool | None = ..., indent: int = ..., storage_options: StorageOptions = ..., mode: Literal["a", "w"] = ..., @@ -120,7 +120,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = ..., lines: bool = ..., compression: CompressionOptions = ..., - index: bool = ..., + index: bool | None = ..., indent: int = ..., storage_options: StorageOptions = ..., mode: Literal["a", "w"] = ..., @@ -139,15 +139,24 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = None, lines: bool = False, compression: CompressionOptions = "infer", - index: bool = True, + index: bool | None = None, indent: int = 0, storage_options: StorageOptions = None, mode: Literal["a", "w"] = "w", ) -> str | None: - if not index and orient not in ["split", "table"]: + if orient in ["records", "values"] and index is True: raise ValueError( - "'index=False' is only valid when 'orient' is 'split' or 'table'" + "'index=True' is only valid when 'orient' is 'split', 'table', " + "'index', or 'columns'." ) + elif orient in ["index", "columns"] and index is False: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split', 'table', " + "'records', or 'values'." + ) + elif index is None: + # will be ignored for orient='records' and 'values' + index = True if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 788a6e97e3d0f..e93cd836fa307 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1472,17 +1472,34 @@ def test_index_false_to_json_table(self, data): assert result == expected - @pytest.mark.parametrize("orient", ["records", "index", "columns", "values"]) + @pytest.mark.parametrize("orient", ["index", "columns"]) def test_index_false_error_to_json(self, orient): - # GH 17394 + # GH 17394, 25513 # Testing error message from to_json with index=False df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) - msg = "'index=False' is only valid when 'orient' is 'split' or 'table'" + msg = ( + "'index=False' is only valid when 'orient' is 'split', " + "'table', 'records', or 'values'" + ) with pytest.raises(ValueError, match=msg): df.to_json(orient=orient, index=False) + @pytest.mark.parametrize("orient", ["records", "values"]) + def test_index_true_error_to_json(self, orient): + # GH 25513 + # Testing error message from to_json with index=True + + df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) + + msg = ( + "'index=True' is only valid when 'orient' is 'split', " + "'table', 'index', or 'columns'" + ) + with pytest.raises(ValueError, match=msg): + df.to_json(orient=orient, index=True) + @pytest.mark.parametrize("orient", ["split", "table"]) @pytest.mark.parametrize("index", [True, False]) def test_index_false_from_json_to_json(self, orient, index): From 3827cafb2390824a05bc81d1bcc045c67e27606d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 10 May 2023 18:04:35 -0700 Subject: [PATCH 3/7] DEPS: Unpin pydata-sphinx-theme (#53029) * DOC: migrate from sphinx-panels to sphinx-design * update css for latest pydata-sphinx-theme * add sphinx-design to env * further fix css * Convert panels to grid, address warning, remove panel dependency * Try addressing failures * Fix css * Fix more css * Just use one comparison card class * Unpin pydata-sphinx-theme * Uncomment other workflows * address some warnings * Revert "Uncomment other workflows" This reverts commit dbdfc389baa0044009d2dea542ff06228af75a2e. * Change bath to json_url * Change back to path * No / * Change version * Update doc/source/conf.py Co-authored-by: Marc Garcia --------- Co-authored-by: Joris Van den Bossche Co-authored-by: Marc Garcia --- doc/source/conf.py | 6 +++--- doc/source/versions.json | 42 ++++++++++++++++++++++++++++++++++++++++ environment.yml | 2 +- requirements-dev.txt | 2 +- web/pandas/versions.json | 42 ---------------------------------------- 5 files changed, 47 insertions(+), 47 deletions(-) create mode 100644 doc/source/versions.json delete mode 100644 web/pandas/versions.json diff --git a/doc/source/conf.py b/doc/source/conf.py index 6f7e770e5d554..66fca61c2c6e5 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -237,14 +237,14 @@ html_theme_options = { "external_links": [], - "footer_items": ["pandas_footer", "sphinx-version"], + "footer_start": ["pandas_footer", "sphinx-version"], "github_url": "https://github.com/pandas-dev/pandas", "twitter_url": "https://twitter.com/pandas_dev", - "google_analytics_id": "UA-27880019-2", + "analytics": {"google_analytics_id": "UA-27880019-2"}, "logo": {"image_dark": "https://pandas.pydata.org/static/img/pandas_white.svg"}, "navbar_end": ["version-switcher", "theme-switcher", "navbar-icon-links"], "switcher": { - "json_url": "/versions.json", + "json_url": "https://pandas.pydata.org/versions.json", "version_match": switcher_version, }, "icon_links": [ diff --git a/doc/source/versions.json b/doc/source/versions.json new file mode 100644 index 0000000000000..4be8f10a88334 --- /dev/null +++ b/doc/source/versions.json @@ -0,0 +1,42 @@ +[ + { + "name": "dev", + "version": "dev", + "url": "https://pandas.pydata.org/docs/dev/" + }, + { + "name": "2.0 (stable)", + "version": "2.0", + "url": "https://pandas.pydata.org/docs/" + }, + { + "name": "1.5", + "version": "1.5", + "url": "https://pandas.pydata.org/pandas-docs/version/1.5/" + }, + { + "name": "1.4", + "version": "1.4", + "url": "https://pandas.pydata.org/pandas-docs/version/1.4/" + }, + { + "name": "1.3", + "version": "1.3", + "url": "https://pandas.pydata.org/pandas-docs/version/1.3/" + }, + { + "name": "1.2", + "version": "1.2", + "url": "https://pandas.pydata.org/pandas-docs/version/1.2/" + }, + { + "name": "1.1", + "version": "1.1", + "url": "https://pandas.pydata.org/pandas-docs/version/1.1/" + }, + { + "name": "1.0", + "version": "1.0", + "url": "https://pandas.pydata.org/pandas-docs/version/1.0/" + } +] diff --git a/environment.yml b/environment.yml index 90ed7634ec74b..fb8321a9fb6a7 100644 --- a/environment.yml +++ b/environment.yml @@ -89,7 +89,7 @@ dependencies: - gitdb - natsort # DataFrame.sort_values doctest - numpydoc - - pydata-sphinx-theme<0.11 + - pydata-sphinx-theme - pytest-cython # doctest - sphinx - sphinx-design diff --git a/requirements-dev.txt b/requirements-dev.txt index d3054ee34a1f4..546116b1fa23d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -64,7 +64,7 @@ gitpython gitdb natsort numpydoc -pydata-sphinx-theme<0.11 +pydata-sphinx-theme pytest-cython sphinx sphinx-design diff --git a/web/pandas/versions.json b/web/pandas/versions.json deleted file mode 100644 index 81021e5a7c72f..0000000000000 --- a/web/pandas/versions.json +++ /dev/null @@ -1,42 +0,0 @@ -[ - { - "name": "dev", - "version": "docs/dev", - "url": "/docs/dev/" - }, - { - "name": "2.0 (stable)", - "version": "docs", - "url": "/docs/" - }, - { - "name": "1.5", - "version": "pandas-docs/version/1.5", - "url": "/pandas-docs/version/1.5/" - }, - { - "name": "1.4", - "version": "pandas-docs/version/1.4", - "url": "/pandas-docs/version/1.4/" - }, - { - "name": "1.3", - "version": "pandas-docs/version/1.3", - "url": "/pandas-docs/version/1.3/" - }, - { - "name": "1.2", - "version": "pandas-docs/version/1.2", - "url": "/pandas-docs/version/1.2/" - }, - { - "name": "1.1", - "version": "pandas-docs/version/1.1", - "url": "/pandas-docs/version/1.1/" - }, - { - "name": "1.0", - "version": "pandas-docs/version/1.0", - "url": "/pandas-docs/version/1.0/" - } -] From fa9bab9e1a7fd40c862411ce481dba2d6a3607b9 Mon Sep 17 00:00:00 2001 From: Stelios Petrakis <92467926+steliospetrakis02@users.noreply.github.com> Date: Thu, 11 May 2023 18:23:30 +0300 Subject: [PATCH 4/7] Add test for groupby with TimeGrouper (#53173) --- pandas/tests/groupby/test_groupby.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2e432a768af9e..7bda7c575d994 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3058,3 +3058,27 @@ def test_groupby_selection_other_methods(df): tm.assert_frame_equal( g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3) ) + + +def test_groupby_with_Time_Grouper(): + idx2 = [ + to_datetime("2016-08-31 22:08:12.000"), + to_datetime("2016-08-31 22:09:12.200"), + to_datetime("2016-08-31 22:20:12.400"), + ] + + test_data = DataFrame( + {"quant": [1.0, 1.0, 3.0], "quant2": [1.0, 1.0, 3.0], "time2": idx2} + ) + + expected_output = DataFrame( + { + "time2": date_range("2016-08-31 22:08:00", periods=13, freq="1T"), + "quant": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + "quant2": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + } + ) + + df = test_data.groupby(Grouper(key="time2", freq="1T")).count().reset_index() + + tm.assert_frame_equal(df, expected_output) From fa69e1427afaec4a1f57788538095c2bcd7a5718 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Thu, 11 May 2023 14:17:25 -0600 Subject: [PATCH 5/7] DOC: Fix rst formatting in dev environment docs (#53187) --- doc/source/development/contributing_environment.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst index 38e354d8c57d6..8bc15d6968afc 100644 --- a/doc/source/development/contributing_environment.rst +++ b/doc/source/development/contributing_environment.rst @@ -225,7 +225,7 @@ To compile pandas with meson, run:: # Build and install pandas python -m pip install -ve . --no-build-isolation -** Build options ** +**Build options** It is possible to pass options from the pip frontend to the meson backend if you would like to configure your install. Occasionally, you'll want to use this to adjust the build directory, and/or toggle debug/optimization levels. From 45eb702c2a38216231ba7c02214e7659aa0eb5ef Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 May 2023 13:24:01 -0700 Subject: [PATCH 6/7] DEPR: require SparseDtype.fill_value be compatible with SparseDtype.subtype (#53043) * DEPR: require SparseDtype.fill_value be compatible with SparseDtype.subtype * filter more specific --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/sparse/dtype.py | 52 ++++++++++++++++++------ pandas/tests/arrays/sparse/test_array.py | 28 ++++--------- pandas/tests/arrays/sparse/test_dtype.py | 16 ++++++-- pandas/tests/reshape/test_get_dummies.py | 48 +++++++++++++++------- 5 files changed, 94 insertions(+), 51 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 5b62883c2741e..63e212e40e9a3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -261,6 +261,7 @@ Deprecations - Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`) - Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`) - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`) +- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 5747ff807600d..f4f87f60cc3a7 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -18,6 +18,7 @@ ExtensionDtype, register_extension_dtype, ) +from pandas.core.dtypes.cast import can_hold_element from pandas.core.dtypes.common import ( is_bool_dtype, is_object_dtype, @@ -25,11 +26,15 @@ is_string_dtype, pandas_dtype, ) +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, isna, na_value_for_dtype, ) +from pandas.core.construction import ensure_wrapped_if_datetimelike + if TYPE_CHECKING: from pandas._typing import ( Dtype, @@ -164,18 +169,41 @@ def _check_fill_value(self): raise ValueError( f"fill_value must be a scalar. Got {self._fill_value} instead" ) - # TODO: Right now we can use Sparse boolean array - # with any fill_value. Here was an attempt - # to allow only 3 value: True, False or nan - # but plenty test has failed. - # see pull 44955 - # if self._is_boolean and not ( - # is_bool(self._fill_value) or isna(self._fill_value) - # ): - # raise ValueError( - # "fill_value must be True, False or nan " - # f"for boolean type. Got {self._fill_value} instead" - # ) + + # GH#23124 require fill_value and subtype to match + val = self._fill_value + if isna(val): + if not is_valid_na_for_dtype(val, self.subtype): + warnings.warn( + "Allowing arbitrary scalar fill_value in SparseDtype is " + "deprecated. In a future version, the fill_value must be " + "a valid value for the SparseDtype.subtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif isinstance(self.subtype, CategoricalDtype): + # TODO: is this even supported? It is reached in + # test_dtype_sparse_with_fill_value_not_present_in_data + if self.subtype.categories is None or val not in self.subtype.categories: + warnings.warn( + "Allowing arbitrary scalar fill_value in SparseDtype is " + "deprecated. In a future version, the fill_value must be " + "a valid value for the SparseDtype.subtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + dummy = np.empty(0, dtype=self.subtype) + dummy = ensure_wrapped_if_datetimelike(dummy) + + if not can_hold_element(dummy, val): + warnings.warn( + "Allowing arbitrary scalar fill_value in SparseDtype is " + "deprecated. In a future version, the fill_value must be " + "a valid value for the SparseDtype.subtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) @property def _is_na_fill_value(self) -> bool: diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 4a0795137f80b..b8effc3eff1d1 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -52,33 +52,21 @@ def test_set_fill_value(self): arr.fill_value = 2 assert arr.fill_value == 2 - # TODO: this seems fine? You can construct an integer - # sparsearray with NaN fill value, why not update one? - # coerces to int - # msg = "unable to set fill_value 3\\.1 to int64 dtype" - # with pytest.raises(ValueError, match=msg): - arr.fill_value = 3.1 + msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + arr.fill_value = 3.1 assert arr.fill_value == 3.1 - # msg = "unable to set fill_value nan to int64 dtype" - # with pytest.raises(ValueError, match=msg): arr.fill_value = np.nan assert np.isnan(arr.fill_value) arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) arr.fill_value = True - assert arr.fill_value - - # FIXME: don't leave commented-out - # coerces to bool - # TODO: we can construct an sparse array of bool - # type and use as fill_value any value - # msg = "fill_value must be True, False or nan" - # with pytest.raises(ValueError, match=msg): - # arr.fill_value = 0 - - # msg = "unable to set fill_value nan to bool dtype" - # with pytest.raises(ValueError, match=msg): + assert arr.fill_value is True + + with tm.assert_produces_warning(FutureWarning, match=msg): + arr.fill_value = 0 + arr.fill_value = np.nan assert np.isnan(arr.fill_value) diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 88f8577ded5b0..8337a79e10243 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -1,4 +1,5 @@ import re +import warnings import numpy as np import pytest @@ -67,15 +68,22 @@ def test_nans_equal(): assert b == a -@pytest.mark.parametrize( - "a, b", - [ +with warnings.catch_warnings(): + msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated" + warnings.filterwarnings("ignore", msg, category=FutureWarning) + + tups = [ (SparseDtype("float64"), SparseDtype("float32")), (SparseDtype("float64"), SparseDtype("float64", 0)), (SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)), (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)), (SparseDtype("float64"), np.dtype("float64")), - ], + ] + + +@pytest.mark.parametrize( + "a, b", + tups, ) def test_not_equal(a, b): assert a != b diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py index fab9b0a5d1846..6e943863072f1 100644 --- a/pandas/tests/reshape/test_get_dummies.py +++ b/pandas/tests/reshape/test_get_dummies.py @@ -57,7 +57,10 @@ def test_get_dummies_basic(self, sparse, dtype): dtype=self.effective_dtype(dtype), ) if sparse: - expected = expected.apply(SparseArray, fill_value=0.0) + if dtype.kind == "b": + expected = expected.apply(SparseArray, fill_value=False) + else: + expected = expected.apply(SparseArray, fill_value=0.0) result = get_dummies(s_list, sparse=sparse, dtype=dtype) tm.assert_frame_equal(result, expected) @@ -142,7 +145,10 @@ def test_get_dummies_include_na(self, sparse, dtype): {"a": [1, 0, 0], "b": [0, 1, 0]}, dtype=self.effective_dtype(dtype) ) if sparse: - exp = exp.apply(SparseArray, fill_value=0.0) + if dtype.kind == "b": + exp = exp.apply(SparseArray, fill_value=False) + else: + exp = exp.apply(SparseArray, fill_value=0.0) tm.assert_frame_equal(res, exp) # Sparse dataframes do not allow nan labelled columns, see #GH8822 @@ -155,7 +161,10 @@ def test_get_dummies_include_na(self, sparse, dtype): # hack (NaN handling in assert_index_equal) exp_na.columns = res_na.columns if sparse: - exp_na = exp_na.apply(SparseArray, fill_value=0.0) + if dtype.kind == "b": + exp_na = exp_na.apply(SparseArray, fill_value=False) + else: + exp_na = exp_na.apply(SparseArray, fill_value=0.0) tm.assert_frame_equal(res_na, exp_na) res_just_na = get_dummies([np.nan], dummy_na=True, sparse=sparse, dtype=dtype) @@ -174,7 +183,7 @@ def test_get_dummies_unicode(self, sparse): {"letter_e": [True, False, False], f"letter_{eacute}": [False, True, True]} ) if sparse: - exp = exp.apply(SparseArray, fill_value=0) + exp = exp.apply(SparseArray, fill_value=False) tm.assert_frame_equal(res, exp) def test_dataframe_dummies_all_obj(self, df, sparse): @@ -216,7 +225,10 @@ def test_dataframe_dummies_mix_default(self, df, sparse, dtype): result = get_dummies(df, sparse=sparse, dtype=dtype) if sparse: arr = SparseArray - typ = SparseDtype(dtype, 0) + if dtype.kind == "b": + typ = SparseDtype(dtype, False) + else: + typ = SparseDtype(dtype, 0) else: arr = np.array typ = dtype @@ -296,7 +308,7 @@ def test_dataframe_dummies_subset(self, df, sparse): expected[["C"]] = df[["C"]] if sparse: cols = ["from_A_a", "from_A_b"] - expected[cols] = expected[cols].astype(SparseDtype("bool", 0)) + expected[cols] = expected[cols].astype(SparseDtype("bool", False)) tm.assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_sep(self, df, sparse): @@ -314,7 +326,7 @@ def test_dataframe_dummies_prefix_sep(self, df, sparse): expected = expected[["C", "A..a", "A..b", "B..b", "B..c"]] if sparse: cols = ["A..a", "A..b", "B..b", "B..c"] - expected[cols] = expected[cols].astype(SparseDtype("bool", 0)) + expected[cols] = expected[cols].astype(SparseDtype("bool", False)) tm.assert_frame_equal(result, expected) @@ -359,7 +371,7 @@ def test_dataframe_dummies_prefix_dict(self, sparse): columns = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"] expected[columns] = expected[columns].astype(bool) if sparse: - expected[columns] = expected[columns].astype(SparseDtype("bool", 0)) + expected[columns] = expected[columns].astype(SparseDtype("bool", False)) tm.assert_frame_equal(result, expected) @@ -371,7 +383,10 @@ def test_dataframe_dummies_with_na(self, df, sparse, dtype): if sparse: arr = SparseArray - typ = SparseDtype(dtype, 0) + if dtype.kind == "b": + typ = SparseDtype(dtype, False) + else: + typ = SparseDtype(dtype, 0) else: arr = np.array typ = dtype @@ -399,7 +414,10 @@ def test_dataframe_dummies_with_categorical(self, df, sparse, dtype): result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1) if sparse: arr = SparseArray - typ = SparseDtype(dtype, 0) + if dtype.kind == "b": + typ = SparseDtype(dtype, False) + else: + typ = SparseDtype(dtype, 0) else: arr = np.array typ = dtype @@ -456,7 +474,7 @@ def test_get_dummies_basic_drop_first(self, sparse): result = get_dummies(s_list, drop_first=True, sparse=sparse) if sparse: - expected = expected.apply(SparseArray, fill_value=0) + expected = expected.apply(SparseArray, fill_value=False) tm.assert_frame_equal(result, expected) result = get_dummies(s_series, drop_first=True, sparse=sparse) @@ -490,7 +508,7 @@ def test_get_dummies_basic_drop_first_NA(self, sparse): res = get_dummies(s_NA, drop_first=True, sparse=sparse) exp = DataFrame({"b": [0, 1, 0]}, dtype=bool) if sparse: - exp = exp.apply(SparseArray, fill_value=0) + exp = exp.apply(SparseArray, fill_value=False) tm.assert_frame_equal(res, exp) @@ -499,7 +517,7 @@ def test_get_dummies_basic_drop_first_NA(self, sparse): ["b", np.nan], axis=1 ) if sparse: - exp_na = exp_na.apply(SparseArray, fill_value=0) + exp_na = exp_na.apply(SparseArray, fill_value=False) tm.assert_frame_equal(res_na, exp_na) res_just_na = get_dummies( @@ -513,7 +531,7 @@ def test_dataframe_dummies_drop_first(self, df, sparse): result = get_dummies(df, drop_first=True, sparse=sparse) expected = DataFrame({"A_b": [0, 1, 0], "B_c": [0, 0, 1]}, dtype=bool) if sparse: - expected = expected.apply(SparseArray, fill_value=0) + expected = expected.apply(SparseArray, fill_value=False) tm.assert_frame_equal(result, expected) def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype): @@ -632,7 +650,7 @@ def test_get_dummies_duplicate_columns(self, df): def test_get_dummies_all_sparse(self): df = DataFrame({"A": [1, 2]}) result = get_dummies(df, columns=["A"], sparse=True) - dtype = SparseDtype("bool", 0) + dtype = SparseDtype("bool", False) expected = DataFrame( { "A_1": SparseArray([1, 0], dtype=dtype), From e19ceb76a4c5903469fea93040c174633c84b9b9 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 12 May 2023 01:45:29 +0200 Subject: [PATCH 7/7] DOC: Fix version switcher (#53188) --- {doc/source => web/pandas}/versions.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {doc/source => web/pandas}/versions.json (100%) diff --git a/doc/source/versions.json b/web/pandas/versions.json similarity index 100% rename from doc/source/versions.json rename to web/pandas/versions.json