Skip to content

Commit 20c2278

Browse files
committed
fixup
1 parent 79870b4 commit 20c2278

File tree

8 files changed

+67
-156
lines changed

8 files changed

+67
-156
lines changed

pandas/core/indexing.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -526,10 +526,7 @@ def setter(item, v):
526526
# set the item, possibly having a dtype change
527527
s._consolidate_inplace()
528528
s = s.copy()
529-
# if is_sparse(s):
530-
# s.set_value(pi, v, takeable=is_list_like(pi))
531-
# else:
532-
s._data = s._data.setitem(indexer=pi, value=v) # TODO THIS USE THIS
529+
s._data = s._data.setitem(indexer=pi, value=v)
533530
s._maybe_update_cacher(clear=True)
534531

535532
# reset the sliced object if unique
@@ -634,13 +631,8 @@ def can_do_equal_len():
634631

635632
# actually do the set
636633
self.obj._consolidate_inplace()
637-
if is_sparse(self.obj):
638-
# SparseSeries has underlying SparseArray, which doesn't
639-
# support resizing
640-
self.obj[indexer] = value
641-
else:
642-
self.obj._data = self.obj._data.setitem(indexer=indexer,
643-
value=value)
634+
self.obj._data = self.obj._data.setitem(indexer=indexer,
635+
value=value)
644636
self.obj._maybe_update_cacher(clear=True)
645637

646638
def _align_series(self, indexer, ser, multiindex_indexer=False):

pandas/core/internals.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1768,6 +1768,8 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
17681768
# Cannot modify a SparseArray
17691769
if is_sparse(new_values):
17701770
new_values = new_values.to_dense()
1771+
if is_sparse(mask):
1772+
mask = mask.to_dense()
17711773

17721774
if isinstance(new, np.ndarray) and len(new) == len(mask):
17731775
new = new[mask]
@@ -2812,8 +2814,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
28122814
placement=self.mgr_locs)
28132815

28142816
def _can_hold_element(self, element):
2815-
return np.issubdtype(np.asanyarray(element).dtype,
2816-
self.sp_values.dtype)
2817+
return np.can_cast(np.asarray(element).dtype, self.sp_values.dtype)
28172818

28182819
def _try_coerce_result(self, result):
28192820
if np.ndim(result) > 0 and not is_sparse(result):

pandas/core/series.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -949,8 +949,7 @@ def set_value(self, label, value, takeable=False):
949949
950950
Returns
951951
-------
952-
series : Series
953-
self
952+
self : Series
954953
"""
955954
warnings.warn("set_value is deprecated and will be removed "
956955
"in a future release. Please use "

pandas/core/sparse/array.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import pandas.core.algorithms as algos
3737
import pandas.core.ops as ops
3838
import pandas.io.formats.printing as printing
39+
from pandas.errors import PerformanceWarning
3940
from pandas.util._decorators import Appender
4041
from pandas.core.indexes.base import _index_shared_docs
4142

@@ -398,27 +399,39 @@ def set_values(self, indexer, value):
398399
"""
399400
# If indexer is not a single int position, easiest to handle via dense
400401
if not is_scalar(indexer):
402+
warnings.warn(
403+
'Setting SparseSeries/Array values is particularly '
404+
'inefficient when indexing with multiple keys because the '
405+
'whole series series is made dense interim.',
406+
PerformanceWarning, stacklevel=2)
407+
401408
values = self.to_dense()
402409
values[indexer] = value
403410
return SparseArray(values, kind=self.kind,
404411
fill_value=self.fill_value)
405412

413+
warnings.warn(
414+
'Setting SparseSeries/Array values is inefficient '
415+
'(a copy of data is made)', PerformanceWarning, stacklevel=2)
416+
406417
# If label already in sparse index, just switch the value on a copy
407418
idx = self.sp_index.lookup(indexer)
408419
if idx != -1:
409420
obj = self.copy()
410421
obj.sp_values[idx] = value
411422
return obj
412423

424+
# Otherwise, construct a new array, and insert the new value in the
425+
# correct position
413426
indices = self.sp_index.to_int_index().indices
414427
pos = np.searchsorted(indices, indexer)
415428

416429
indices = np.insert(indices, pos, indexer)
417430
sp_values = np.insert(self.sp_values, pos, value)
431+
sp_index = _make_index(self.sp_index.length, indices, self.kind)
418432

419-
return SparseArray(
420-
sp_values, fill_value=self.fill_value,
421-
sparse_index=_make_index(self.sp_index.length, indices, self.kind))
433+
return SparseArray(sp_values, sparse_index=sp_index,
434+
fill_value=self.fill_value)
422435

423436
def to_dense(self, fill=None):
424437
"""

pandas/core/sparse/frame.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -465,29 +465,6 @@ def _get_value(self, index, col, takeable=False):
465465
return series._get_value(index, takeable=takeable)
466466
_get_value.__doc__ = get_value.__doc__
467467

468-
def set_value(self, index, col, value, takeable=False):
469-
"""
470-
Put single value at passed column and index
471-
472-
Parameters
473-
----------
474-
index : row label
475-
col : column label
476-
value : scalar value
477-
takeable : interpret the index/col as indexers, default False
478-
479-
Notes
480-
-----
481-
This method *always* returns a new object for backwards compatibility.
482-
483-
Returns
484-
-------
485-
frame : DataFrame
486-
"""
487-
self = self.copy(deep=False)
488-
return super(SparseDataFrame, self).set_value(index, col, value,
489-
takeable=takeable)
490-
491468
def _slice(self, slobj, axis=0, kind=None):
492469
if axis == 0:
493470
new_index = self.index[slobj]

pandas/core/sparse/series.py

Lines changed: 4 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,10 @@
1010

1111
from pandas.core.dtypes.missing import isna, notna
1212
from pandas.core.dtypes.common import is_scalar, is_sparse
13-
from pandas.core.common import _values_from_object, _maybe_match_name, \
14-
PerformanceWarning
13+
from pandas.core.common import _values_from_object, _maybe_match_name
1514

1615
from pandas.compat.numpy import function as nv
1716
from pandas.core.index import Index, _ensure_index, InvalidIndexError
18-
from pandas.core.indexing import check_bool_indexer
1917
from pandas.core.series import Series
2018
from pandas.core.frame import DataFrame
2119
from pandas.core.internals import SingleBlockManager
@@ -317,7 +315,7 @@ def __array_wrap__(self, result, context=None):
317315
else:
318316
fill_value = self.fill_value
319317

320-
# Results size unchanged, old sparse index is valid ???
318+
# If result size matches, old sparse index is valid ???
321319
if np.size(result) == self.sp_index.npoints:
322320
sp_index = self.sp_index
323321
else:
@@ -434,22 +432,7 @@ def _get_values(self, indexer):
434432
return self[indexer]
435433

436434
def _set_with_engine(self, key, value):
437-
<<<<<<< HEAD
438435
return self._set_value(key, value)
439-
||||||| parent of e95270f1e... ENH: Allow SparseDataFrame/SparseSeries values assignment
440-
return self.set_value(key, value)
441-
=======
442-
takeable = False
443-
444-
# Sparse doesn't support reshaping so the standard .where() does
445-
# not apply. We short-circuit bool indexers here by treating them as
446-
# regular list of indexes and setting each array/value separately
447-
if com.is_bool_indexer(key):
448-
key = check_bool_indexer(self.index, key).nonzero()[0]
449-
takeable = True
450-
451-
return self.set_value(key, value, takeable=takeable)
452-
>>>>>>> e95270f1e... ENH: Allow SparseDataFrame/SparseSeries values assignment
453436

454437
def abs(self):
455438
"""
@@ -539,7 +522,7 @@ def set_value(self, label, value, takeable=False):
539522
540523
Returns
541524
-------
542-
series : SparseSeries
525+
self : SparseSeries
543526
"""
544527
warnings.warn("set_value is deprecated and will be removed "
545528
"in a future release. Please use "
@@ -548,62 +531,7 @@ def set_value(self, label, value, takeable=False):
548531
return self._set_value(label, value, takeable=takeable)
549532

550533
def _set_value(self, label, value, takeable=False):
551-
try:
552-
loc = self.index.get_loc(label)
553-
except (KeyError, TypeError):
554-
loc = None
555-
556-
warnings.warn(
557-
'Setting SparseSeries values is inefficient '
558-
'(a copy of data is made)', PerformanceWarning, stacklevel=2)
559-
560-
# If label is not unique in index, or it is takeable,
561-
# amend the series by amending its dense copy
562-
if not isinstance(loc, int) or takeable:
563-
warnings.warn(
564-
'Setting SparseSeries values is particularly inefficient when '
565-
'indexing with a non-unique label because the whole series '
566-
'is made dense interim.', PerformanceWarning, stacklevel=2)
567-
values = self.to_dense()
568-
values.set_value(label, value, takeable=takeable)
569-
570-
index = values.index
571-
sp_index = None
572-
values = values.to_sparse(kind=self.kind,
573-
fill_value=self.fill_value)
574-
575-
# If label is unique key and not takeable, then it is more space-
576-
# efficient to not make the whole series dense, rather just its
577-
# defined part
578-
else:
579-
values = self._to_dense(sparse_only=True)
580-
old_index = values.index
581-
values.set_value(label, value, takeable=takeable)
582-
index = self.index
583-
584-
# label was already in sparse index, we can just reuse old index
585-
if label in old_index:
586-
sp_index = self.sp_index
587-
588-
# label might have been at least in .index
589-
else:
590-
# and if not, just add it, then construct both indexes anew
591-
if loc is None:
592-
index = self.index.append(Index((label,)))
593-
loc = len(index) - 1
594-
595-
indices = np.append(
596-
self.sp_index.to_int_index().indices,
597-
np.array(loc, dtype=np.int32))
598-
order = indices.argsort()
599-
values = values.values.take(order)
600-
indices = indices.take(order)
601-
sp_index = _make_index(len(index), indices, self.kind)
602-
603-
values = SparseArray(values, sparse_index=sp_index, kind=self.kind,
604-
fill_value=self.fill_value)
605-
self._data = SingleBlockManager(values, index)
606-
self._index = index
534+
self._data = self._data.copy().setitem(indexer=label, value=value)
607535
return self
608536
_set_value.__doc__ = set_value.__doc__
609537

@@ -643,23 +571,6 @@ def to_dense(self, sparse_only=False):
643571
warnings.warn(("The 'sparse_only' parameter has been deprecated "
644572
"and will be removed in a future version."),
645573
FutureWarning, stacklevel=2)
646-
return self._to_dense(sparse_only=sparse_only)
647-
648-
def _to_dense(self, sparse_only=False):
649-
"""
650-
Convert SparseSeries to a Series.
651-
652-
Parameters
653-
----------
654-
sparse_only: bool, default False
655-
If True, return just the non-sparse values, or the dense version
656-
of `self.values` if False.
657-
658-
Returns
659-
-------
660-
s : Series
661-
"""
662-
if sparse_only:
663574
int_index = self.sp_index.to_int_index()
664575
index = self.index.take(int_index.indices)
665576
return Series(self.sp_values, index=index, name=self.name)

pandas/tests/sparse/test_frame.py

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,31 +1536,49 @@ def test_quantile_multi(self):
15361536
tm.assert_sp_frame_equal(result, sparse_expected)
15371537

15381538

1539-
@pytest.mark.parametrize('kind', ['integer', 'block'])
1540-
@pytest.mark.parametrize('indexer', [None, 'loc', 'iloc', 'at', 'iat'])
1541-
@pytest.mark.parametrize('key', [0, [0, 1], [True, False], None])
1542-
def test_frame_assignment(kind, indexer, key):
1543-
try_multiple = 'at' not in (indexer or '')
1544-
is_multi_key = np.asarray(key).ndim > 0
1545-
if is_multi_key and not try_multiple:
1546-
return
1547-
if not indexer and not is_multi_key and key is not None: # skip non-multikey with setitem
1548-
return
1549-
if indexer and key is None: # skip df indexer with non-setitem
1550-
return
1551-
1539+
def _test_assignment(kind, indexer, key=None):
15521540
arr = np.array([[1, nan],
15531541
[nan, 1]])
1542+
df = DataFrame(arr, copy=True)
15541543
sdf = SparseDataFrame(arr, default_kind=kind).to_sparse(kind=kind)
15551544

1545+
def get_indexer(df):
1546+
return getattr(df, indexer) if indexer else df
1547+
15561548
if key is None:
15571549
key = pd.isnull(sdf).to_sparse()
15581550

1559-
arr = arr.copy()
1560-
arr[np.asarray(key)] = 2
1561-
res = SparseDataFrame(arr, default_kind=kind).to_sparse(kind=kind)
1551+
get_indexer(sdf)[key] = 2
15621552

1563-
sdf_setitem = getattr(sdf, indexer) if indexer else sdf
1564-
sdf_setitem[key] = 2
1553+
get_indexer(df)[key] = 2
1554+
res = df.to_sparse(kind=kind)
15651555

15661556
tm.assert_sp_frame_equal(sdf, res)
1557+
1558+
1559+
@pytest.fixture(params=['integer', 'block'])
1560+
def spindex_kind(request):
1561+
return request.param
1562+
1563+
1564+
@pytest.mark.parametrize('indexer', ['at', 'iat'])
1565+
@pytest.mark.parametrize('key', [0])
1566+
def test_frame_assignment_at(spindex_kind, indexer, key):
1567+
_test_assignment(spindex_kind, indexer, key)
1568+
1569+
1570+
@pytest.mark.parametrize('indexer', ['loc', 'iloc'])
1571+
@pytest.mark.parametrize('key', [0, [0, 1], [True, False]])
1572+
def test_frame_assignment_loc(spindex_kind, indexer, key):
1573+
_test_assignment(spindex_kind, indexer, key)
1574+
1575+
1576+
@pytest.mark.parametrize('key', [None, [0, 1], [True, False]])
1577+
def test_frame_assignment_setitem(spindex_kind, key):
1578+
_test_assignment(spindex_kind, None, key)
1579+
1580+
1581+
@pytest.mark.parametrize('indexer', ['loc', 'at'])
1582+
@pytest.mark.parametrize('key', [3])
1583+
def test_frame_assignment_extend_index(spindex_kind, indexer, key):
1584+
_test_assignment(spindex_kind, indexer, key)

pandas/tests/sparse/test_series.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1541,11 +1541,11 @@ def test_constructor_dict_datetime64_index(datetime_type):
15411541
@pytest.mark.parametrize('indexer', [None, 'loc', 'iloc', 'at', 'iat'])
15421542
@pytest.mark.parametrize('key', [0, [0, 1], 2, [2, 3],
15431543
[True, False, False, False],
1544-
[False, False, False, True],])
1544+
[False, False, False, True]])
15451545
def test_series_assignment(kind, indexer, key):
1546-
try_multiple = 'at' not in (indexer or '')
1547-
is_multi_key = np.asarray(key).ndim > 0
1548-
if is_multi_key and not try_multiple:
1546+
is_multikey = np.asarray(key).ndim > 0
1547+
skip_multikey = 'at' in (indexer or '')
1548+
if is_multikey and skip_multikey:
15491549
return
15501550

15511551
arr = np.array([0., 0., nan, nan])

0 commit comments

Comments
 (0)