Skip to content

Commit e7ad884

Browse files
TomAugspurgerjreback
authored andcommitted
Deprecate SparseDataFrame and SparseSeries (#26137)
1 parent 7629a18 commit e7ad884

40 files changed

+488
-175
lines changed

doc/source/user_guide/sparse.rst

Lines changed: 204 additions & 122 deletions
Large diffs are not rendered by default.

doc/source/whatsnew/v0.25.0.rst

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,14 +299,39 @@ Other API Changes
299299
Deprecations
300300
~~~~~~~~~~~~
301301

302+
Sparse Subclasses
303+
^^^^^^^^^^^^^^^^^
304+
305+
The ``SparseSeries`` and ``SparseDataFrame`` subclasses are deprecated. Their functionality is better-provided
306+
by a ``Series`` or ``DataFrame`` with sparse values.
307+
308+
**Previous Way**
309+
310+
.. ipython:: python
311+
:okwarning:
312+
313+
df = pd.SparseDataFrame({"A": [0, 0, 1, 2]})
314+
df.dtypes
315+
316+
**New Way**
317+
318+
.. ipython:: python
319+
320+
df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])})
321+
df.dtypes
322+
323+
The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`).
324+
325+
Other Deprecations
326+
^^^^^^^^^^^^^^^^^^
327+
302328
- The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
303329
- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
304330
- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
305331
the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
306332
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
307333
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
308334

309-
310335
.. _whatsnew_0250.prior_deprecations:
311336

312337
Removal of prior version deprecations/changes

pandas/core/arrays/sparse.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2014,9 +2014,9 @@ def from_coo(cls, A, dense_index=False):
20142014
from pandas.core.sparse.scipy_sparse import _coo_to_sparse_series
20152015
from pandas import Series
20162016

2017-
result = _coo_to_sparse_series(A, dense_index=dense_index)
2018-
# SparseSeries -> Series[sparse]
2019-
result = Series(result.values, index=result.index, copy=False)
2017+
result = _coo_to_sparse_series(A, dense_index=dense_index,
2018+
sparse_series=False)
2019+
result = Series(result.array, index=result.index, copy=False)
20202020

20212021
return result
20222022

pandas/core/frame.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1930,13 +1930,13 @@ def to_sparse(self, fill_value=None, kind='block'):
19301930
>>> type(df)
19311931
<class 'pandas.core.frame.DataFrame'>
19321932
1933-
>>> sdf = df.to_sparse()
1934-
>>> sdf
1933+
>>> sdf = df.to_sparse() # doctest: +SKIP
1934+
>>> sdf # doctest: +SKIP
19351935
0 1
19361936
0 NaN NaN
19371937
1 1.0 NaN
19381938
2 NaN 1.0
1939-
>>> type(sdf)
1939+
>>> type(sdf) # doctest: +SKIP
19401940
<class 'pandas.core.sparse.frame.SparseDataFrame'>
19411941
"""
19421942
from pandas.core.sparse.api import SparseDataFrame

pandas/core/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5589,7 +5589,7 @@ def ftypes(self):
55895589
3 float64:dense
55905590
dtype: object
55915591
5592-
>>> pd.SparseDataFrame(arr).ftypes
5592+
>>> pd.SparseDataFrame(arr).ftypes # doctest: +SKIP
55935593
0 float64:sparse
55945594
1 float64:sparse
55955595
2 float64:sparse

pandas/core/series.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1586,7 +1586,6 @@ def to_sparse(self, kind='block', fill_value=None):
15861586
SparseSeries
15871587
Sparse representation of the Series.
15881588
"""
1589-
# TODO: deprecate
15901589
from pandas.core.sparse.series import SparseSeries
15911590

15921591
values = SparseArray(self, kind=kind, fill_value=fill_value)

pandas/core/sparse/frame.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,24 @@
2828
from pandas.core.sparse.series import SparseSeries
2929

3030
_shared_doc_kwargs = dict(klass='SparseDataFrame')
31+
depr_msg = """\
32+
SparseDataFrame is deprecated and will be removed in a future version.
33+
Use a regular DataFrame whose columns are SparseArrays instead.
34+
35+
See http://pandas.pydata.org/pandas-docs/stable/\
36+
user_guide/sparse.html#migrating for more.
37+
"""
3138

3239

3340
class SparseDataFrame(DataFrame):
3441
"""
3542
DataFrame containing sparse floating point data in the form of SparseSeries
3643
objects
3744
45+
.. deprectaed:: 0.25.0
46+
47+
Use a DataFrame with sparse values instead.
48+
3849
Parameters
3950
----------
4051
data : same types as can be passed to DataFrame or scipy.sparse.spmatrix
@@ -56,6 +67,7 @@ class SparseDataFrame(DataFrame):
5667
def __init__(self, data=None, index=None, columns=None, default_kind=None,
5768
default_fill_value=None, dtype=None, copy=False):
5869

70+
warnings.warn(depr_msg, FutureWarning, stacklevel=2)
5971
# pick up the defaults from the Sparse structures
6072
if isinstance(data, SparseDataFrame):
6173
if index is None:

pandas/core/sparse/scipy_sparse.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,32 @@ def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ),
116116
return sparse_matrix, rows, columns
117117

118118

119-
def _coo_to_sparse_series(A, dense_index=False):
119+
def _coo_to_sparse_series(A, dense_index: bool = False,
120+
sparse_series: bool = True):
120121
"""
121122
Convert a scipy.sparse.coo_matrix to a SparseSeries.
122-
Use the defaults given in the SparseSeries constructor.
123+
124+
Parameters
125+
----------
126+
A : scipy.sparse.coo.coo_matrix
127+
dense_index : bool, default False
128+
sparse_series : bool, default True
129+
130+
Returns
131+
-------
132+
Series or SparseSeries
123133
"""
134+
from pandas import SparseDtype
135+
124136
s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
125137
s = s.sort_index()
126-
s = s.to_sparse() # TODO: specify kind?
138+
if sparse_series:
139+
# TODO(SparseSeries): remove this and the sparse_series keyword.
140+
# This is just here to avoid a DeprecationWarning when
141+
# _coo_to_sparse_series is called via Series.sparse.from_coo
142+
s = s.to_sparse() # TODO: specify kind?
143+
else:
144+
s = s.astype(SparseDtype(s.dtype))
127145
if dense_index:
128146
# is there a better constructor method to use here?
129147
i = range(A.shape[0])

pandas/core/sparse/series.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,24 @@
3232
optional_labels='', optional_axis='')
3333

3434

35+
depr_msg = """\
36+
SparseSeries is deprecated and will be removed in a future version.
37+
Use a Series with sparse values instead.
38+
39+
>>> series = pd.Series(pd.SparseArray(...))
40+
41+
See http://pandas.pydata.org/pandas-docs/stable/\
42+
user_guide/sparse.html#migrating for more.
43+
"""
44+
45+
3546
class SparseSeries(Series):
3647
"""Data structure for labeled, sparse floating point data
3748
49+
.. deprectaed:: 0.25.0
50+
51+
Use a Series with sparse values instead.
52+
3853
Parameters
3954
----------
4055
data : {array-like, Series, SparseSeries, dict}
@@ -60,6 +75,7 @@ class SparseSeries(Series):
6075
def __init__(self, data=None, index=None, sparse_index=None, kind='block',
6176
fill_value=None, name=None, dtype=None, copy=False,
6277
fastpath=False):
78+
warnings.warn(depr_msg, FutureWarning, stacklevel=2)
6379
# TODO: Most of this should be refactored and shared with Series
6480
# 1. BlockManager -> array
6581
# 2. Series.index, Series.name, index, name reconciliation

pandas/tests/arrays/sparse/test_accessor.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,21 @@ def test_density(self):
101101
res = df.sparse.density
102102
expected = 0.75
103103
assert res == expected
104+
105+
@pytest.mark.parametrize("dtype", ['int64', 'float64'])
106+
@pytest.mark.parametrize("dense_index", [True, False])
107+
@td.skip_if_no_scipy
108+
def test_series_from_coo(self, dtype, dense_index):
109+
import scipy.sparse
110+
111+
A = scipy.sparse.eye(3, format='coo', dtype=dtype)
112+
result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
113+
index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
114+
expected = pd.Series(pd.SparseArray(np.array([1, 1, 1], dtype=dtype)),
115+
index=index)
116+
if dense_index:
117+
expected = expected.reindex(
118+
pd.MultiIndex.from_product(index.levels)
119+
)
120+
121+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)