Skip to content

BUG: Consistent division by zero behavior for Index/Series #27321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Jul 11, 2019
Merged
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6bd4fb1
TST: parametrize tests
jbrockmendel Jul 7, 2019
d9943be
for loop, preparing to parametrize
jbrockmendel Jul 7, 2019
eb8b6d7
parametrize/fixturize mix
jbrockmendel Jul 7, 2019
3edebac
fixture for op
jbrockmendel Jul 7, 2019
eb60fc3
blackify
jbrockmendel Jul 7, 2019
3cf8b1e
parametrize more
jbrockmendel Jul 7, 2019
c34a9cc
param scalar
jbrockmendel Jul 7, 2019
158b003
Merge branch 'master' of https://github.com/pandas-dev/pandas into sp…
jbrockmendel Jul 7, 2019
b59b81a
docstring, xfail
jbrockmendel Jul 7, 2019
f085d6b
remove defunct comment
jbrockmendel Jul 7, 2019
339ed84
Merge branch 'master' of https://github.com/pandas-dev/pandas into sp…
jbrockmendel Jul 8, 2019
f03bd93
implement all_arithmetic_functions fixture
jbrockmendel Jul 8, 2019
cd2f564
isort fixup
jbrockmendel Jul 9, 2019
e362a08
check early for non-scalar default_fill_value
jbrockmendel Jul 9, 2019
7c6e127
try to bring dispatch_fill_zeros, dispatch_mnissing in sync, remove u…
jbrockmendel Jul 8, 2019
97611c3
standardize calls
jbrockmendel Jul 8, 2019
c30b40c
use pandas convention for divmod
jbrockmendel Jul 9, 2019
1ed12b8
remove comment
jbrockmendel Jul 9, 2019
45ffbc2
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 10, 2019
591eaaa
patch sparse and IntegerArray tests
jbrockmendel Jul 10, 2019
2841be0
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 10, 2019
518e2e9
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 10, 2019
2580048
fixup for docs
jbrockmendel Jul 10, 2019
f440c59
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 10, 2019
188ac99
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 11, 2019
6b411c4
requested comments
jbrockmendel Jul 11, 2019
77c2383
whatsnew
jbrockmendel Jul 11, 2019
7b8880c
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 11, 2019
14010c8
dummy commit to force ci
jbrockmendel Jul 11, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
parametrize/fixturize mix
  • Loading branch information
jbrockmendel committed Jul 7, 2019
commit eb8b6d750be1d269294a7f4431e5b72756b817da
148 changes: 75 additions & 73 deletions pandas/tests/arrays/sparse/test_arithmetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ def kind(request):
return request.param


@pytest.fixture(params=[True, False])
def mix(request):
# whether to operate op(sparse, dense) instead of op(sparse, sparse)
return request.param


@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
class TestSparseArrayArithmetics:
Expand All @@ -24,41 +30,37 @@ class TestSparseArrayArithmetics:
def _assert(self, a, b):
tm.assert_numpy_array_equal(a, b)

def _check_numeric_ops(self, a, b, a_dense, b_dense):
def _check_numeric_ops(self, a, b, a_dense, b_dense, mix):
with np.errstate(invalid="ignore", divide="ignore"):
# Unfortunately, trying to wrap the computation of each expected
# value is with np.errstate() is too tedious.

for mix in [True, False]:
# True --> sparse & dense
# False --> sparse & sparse

# sparse & sparse
for op in [operator.add, ops.radd,
operator.sub, ops.rsub,
operator.mul, ops.rmul,
operator.truediv, ops.rtruediv,
operator.floordiv, ops.rfloordiv,
operator.mod, ops.rmod,
operator.pow, ops.rpow]:

if op in [operator.floordiv, ops.rfloordiv]:
# FIXME: GH#13843
if (self._base == pd.Series and a.dtype.subtype == np.dtype("int64")):
continue

if mix:
result = op(a, b_dense).to_dense()
else:
result = op(a, b).to_dense()

if op in [operator.truediv, ops.rtruediv]:
# pandas uses future division
expected = op(a_dense * 1.0, b_dense)
else:
expected = op(a_dense, b_dense)

self._assert(result, expected)
# sparse & sparse
for op in [operator.add, ops.radd,
operator.sub, ops.rsub,
operator.mul, ops.rmul,
operator.truediv, ops.rtruediv,
operator.floordiv, ops.rfloordiv,
operator.mod, ops.rmod,
operator.pow, ops.rpow]:

if op in [operator.floordiv, ops.rfloordiv]:
# FIXME: GH#13843
if (self._base == pd.Series and a.dtype.subtype == np.dtype("int64")):
continue

if mix:
result = op(a, b_dense).to_dense()
else:
result = op(a, b).to_dense()

if op in [operator.truediv, ops.rtruediv]:
# pandas uses future division
expected = op(a_dense * 1.0, b_dense)
else:
expected = op(a_dense, b_dense)

self._assert(result, expected)

def _check_bool_result(self, res):
assert isinstance(res, self._klass)
Expand Down Expand Up @@ -123,23 +125,23 @@ def _check_logical_ops(self, a, b, a_dense, b_dense):
self._check_bool_result(a | b_dense)
self._assert((a | b_dense).to_dense(), a_dense | b_dense)

def test_float_scalar(self, kind):
def test_float_scalar(self, kind, mix):
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])

a = self._klass(values, kind=kind)
self._check_numeric_ops(a, 1, values, 1)
self._check_numeric_ops(a, 0, values, 0)
self._check_numeric_ops(a, 3, values, 3)
self._check_numeric_ops(a, 1, values, 1, mix)
self._check_numeric_ops(a, 0, values, 0, mix)
self._check_numeric_ops(a, 3, values, 3, mix)

a = self._klass(values, kind=kind, fill_value=0)
self._check_numeric_ops(a, 1, values, 1)
self._check_numeric_ops(a, 0, values, 0)
self._check_numeric_ops(a, 3, values, 3)
self._check_numeric_ops(a, 1, values, 1, mix)
self._check_numeric_ops(a, 0, values, 0, mix)
self._check_numeric_ops(a, 3, values, 3, mix)

a = self._klass(values, kind=kind, fill_value=2)
self._check_numeric_ops(a, 1, values, 1)
self._check_numeric_ops(a, 0, values, 0)
self._check_numeric_ops(a, 3, values, 3)
self._check_numeric_ops(a, 1, values, 1, mix)
self._check_numeric_ops(a, 0, values, 0, mix)
self._check_numeric_ops(a, 3, values, 3, mix)

def test_float_scalar_comparison(self, kind):
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
Expand All @@ -159,21 +161,21 @@ def test_float_scalar_comparison(self, kind):
self._check_comparison_ops(a, 0, values, 0)
self._check_comparison_ops(a, 3, values, 3)

def test_float_same_index(self, kind):
def test_float_same_index(self, kind, mix):
# when sp_index are the same
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])

a = self._klass(values, kind=kind)
b = self._klass(rvalues, kind=kind)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])

a = self._klass(values, kind=kind, fill_value=0)
b = self._klass(rvalues, kind=kind, fill_value=0)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

def test_float_same_index_comparison(self, kind):
# when sp_index are the same
Expand All @@ -191,47 +193,47 @@ def test_float_same_index_comparison(self, kind):
b = self._klass(rvalues, kind=kind, fill_value=0)
self._check_comparison_ops(a, b, values, rvalues)

def test_float_array(self, kind):
def test_float_array(self, kind, mix):
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])

a = self._klass(values, kind=kind)
b = self._klass(rvalues, kind=kind)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
self._check_numeric_ops(a, b, values, rvalues, mix)
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix)

a = self._klass(values, kind=kind, fill_value=0)
b = self._klass(rvalues, kind=kind)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

a = self._klass(values, kind=kind, fill_value=0)
b = self._klass(rvalues, kind=kind, fill_value=0)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

a = self._klass(values, kind=kind, fill_value=1)
b = self._klass(rvalues, kind=kind, fill_value=2)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

def test_float_array_different_kind(self):
def test_float_array_different_kind(self, mix):
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])

a = self._klass(values, kind="integer")
b = self._klass(rvalues, kind="block")
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
self._check_numeric_ops(a, b, values, rvalues, mix)
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix)

a = self._klass(values, kind="integer", fill_value=0)
b = self._klass(rvalues, kind="block")
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

a = self._klass(values, kind="integer", fill_value=0)
b = self._klass(rvalues, kind="block", fill_value=0)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

a = self._klass(values, kind="integer", fill_value=1)
b = self._klass(rvalues, kind="block", fill_value=2)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

def test_float_array_comparison(self, kind):
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
Expand All @@ -254,7 +256,7 @@ def test_float_array_comparison(self, kind):
b = self._klass(rvalues, kind=kind, fill_value=2)
self._check_comparison_ops(a, b, values, rvalues)

def test_int_array(self, kind):
def test_int_array(self, kind, mix):
# have to specify dtype explicitly until fixing GH 667
dtype = np.int64

Expand All @@ -266,27 +268,27 @@ def test_int_array(self, kind):
b = self._klass(rvalues, dtype=dtype, kind=kind)
assert b.dtype == SparseDtype(dtype)

self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
self._check_numeric_ops(a, b, values, rvalues, mix)
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix)

a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
assert a.dtype == SparseDtype(dtype)
b = self._klass(rvalues, dtype=dtype, kind=kind)
assert b.dtype == SparseDtype(dtype)

self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
assert a.dtype == SparseDtype(dtype)
b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
assert b.dtype == SparseDtype(dtype)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
assert a.dtype == SparseDtype(dtype, fill_value=1)
b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
assert b.dtype == SparseDtype(dtype, fill_value=2)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

def test_int_array_comparison(self, kind):
dtype = "int64"
Expand Down Expand Up @@ -334,7 +336,7 @@ def test_bool_array_logical(self, kind, fill_value):
b = self._klass(rvalues, kind=kind, dtype=np.bool, fill_value=fill_value)
self._check_logical_ops(a, b, values, rvalues)

def test_mixed_array_float_int(self, kind):
def test_mixed_array_float_int(self, kind, mix):
rdtype = "int64"

values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
Expand All @@ -344,23 +346,23 @@ def test_mixed_array_float_int(self, kind):
b = self._klass(rvalues, kind=kind)
assert b.dtype == SparseDtype(rdtype)

self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
self._check_numeric_ops(a, b, values, rvalues, mix)
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix)

a = self._klass(values, kind=kind, fill_value=0)
b = self._klass(rvalues, kind=kind)
assert b.dtype == SparseDtype(rdtype)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

a = self._klass(values, kind=kind, fill_value=0)
b = self._klass(rvalues, kind=kind, fill_value=0)
assert b.dtype == SparseDtype(rdtype)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

a = self._klass(values, kind=kind, fill_value=1)
b = self._klass(rvalues, kind=kind, fill_value=2)
assert b.dtype == SparseDtype(rdtype, fill_value=2)
self._check_numeric_ops(a, b, values, rvalues)
self._check_numeric_ops(a, b, values, rvalues, mix)

def test_mixed_array_comparison(self, kind):
rdtype = "int64"
Expand Down Expand Up @@ -400,21 +402,21 @@ class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
def _assert(self, a, b):
tm.assert_series_equal(a, b)

def test_alignment(self):
def test_alignment(self, mix):
da = pd.Series(np.arange(4))
db = pd.Series(np.arange(4), index=[1, 2, 3, 4])

sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
sb = pd.SparseSeries(
np.arange(4), index=[1, 2, 3, 4], dtype=np.int64, fill_value=0
)
self._check_numeric_ops(sa, sb, da, db)
self._check_numeric_ops(sa, sb, da, db, mix)

sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
sb = pd.SparseSeries(
np.arange(4), index=[1, 2, 3, 4], dtype=np.int64, fill_value=np.nan
)
self._check_numeric_ops(sa, sb, da, db)
self._check_numeric_ops(sa, sb, da, db, mix)

da = pd.Series(np.arange(4))
db = pd.Series(np.arange(4), index=[10, 11, 12, 13])
Expand All @@ -423,13 +425,13 @@ def test_alignment(self):
sb = pd.SparseSeries(
np.arange(4), index=[10, 11, 12, 13], dtype=np.int64, fill_value=0
)
self._check_numeric_ops(sa, sb, da, db)
self._check_numeric_ops(sa, sb, da, db, mix)

sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
sb = pd.SparseSeries(
np.arange(4), index=[10, 11, 12, 13], dtype=np.int64, fill_value=np.nan
)
self._check_numeric_ops(sa, sb, da, db)
self._check_numeric_ops(sa, sb, da, db, mix)


@pytest.mark.parametrize("op", [operator.eq, operator.add])
Expand Down