From 0c7f1960114bac7ad2cb4c83c1bb6db5f5435bc3 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 14 Jun 2023 16:33:57 +0200 Subject: [PATCH] Add inplace support of divide (#1434) --- dpnp/dpnp_algo/dpnp_elementwise_common.py | 26 ++++++++++++++++++- tests/test_usm_type.py | 20 +++++++++----- .../cupy/linalg_tests/test_product.py | 4 --- .../cupy/math_tests/test_arithmetic.py | 1 - 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index 527994a27ad..5115f39a8e7 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -35,6 +35,10 @@ BinaryElementwiseFunc ) import dpctl.tensor._tensor_impl as ti +import dpctl.tensor as dpt +import dpctl + +import numpy __all__ = [ @@ -125,12 +129,27 @@ def _call_divide(src1, src2, dst, sycl_queue, depends=[]): return vmi._div(sycl_queue, src1, src2, dst, depends) return ti._divide(src1, src2, dst, sycl_queue, depends) + def _call_divide_inplace(lhs, rhs, sycl_queue, depends=[]): + """In place workaround until dpctl.tensor provides the functionality.""" + + # allocate temporary memory for out array + out = dpt.empty_like(lhs, dtype=numpy.result_type((lhs.dtype, rhs.dtype))) + + # call a general callback + div_ht_, div_ev_ = _call_divide(lhs, rhs, out, sycl_queue, depends) + + # store the result into left input array and return events + cp_ht_, cp_ev_ = ti._copy_usm_ndarray_into_usm_ndarray(src=out, dst=lhs, sycl_queue=sycl_queue, depends=[div_ev_]) + dpctl.SyclEvent.wait_for([div_ht_]) + return (cp_ht_, cp_ev_) + # dpctl.tensor only works with usm_ndarray or scalar x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1) x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2) out_usm = None if out is None else dpnp.get_usm_ndarray(out) - func = BinaryElementwiseFunc("divide", ti._divide_result_type, _call_divide, _divide_docstring_) + func = BinaryElementwiseFunc("divide", ti._divide_result_type, _call_divide, + _divide_docstring_, _call_divide_inplace) res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order) return dpnp_array._create_from_usm_ndarray(res_usm) @@ -208,6 +227,11 @@ def dpnp_subtract(x1, x2, out=None, order='K'): """ + # TODO: discuss with dpctl if the check is needed to be moved there + if not dpnp.isscalar(x1) and not dpnp.isscalar(x2) and x1.dtype == x2.dtype == dpnp.bool: + raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, " + "use the bitwise_xor, the `^` operator, or the logical_xor function instead.") + # dpctl.tensor only works with usm_ndarray or scalar x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1) x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 9bd0ab16716..61145de42c7 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -19,9 +19,9 @@ def test_coerced_usm_types_sum(usm_type_x, usm_type_y): z = 1.3 + x + y + 2 - # TODO: unmute once dpctl support that - # z += x - # z += 7.4 + # inplace add + z += x + z += 7.4 assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y @@ -36,9 +36,9 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y): z = 3 * x * y * 1.5 - # TODO: unmute once dpctl support that - # z *= x - # z *= 4.8 + # inplace multiply + z *= x + z *= 4.8 assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y @@ -53,6 +53,10 @@ def test_coerced_usm_types_subtract(usm_type_x, usm_type_y): z = 20 - x - y - 7.4 + # inplace subtract + z -= x + z -= -3.4 + assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) @@ -66,6 +70,10 @@ def test_coerced_usm_types_divide(usm_type_x, usm_type_y): z = 2 / x / y / 1.5 + # inplace divide + z /= x + z /= -2.4 + assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) diff --git a/tests/third_party/cupy/linalg_tests/test_product.py b/tests/third_party/cupy/linalg_tests/test_product.py index 0f6a2f22fe8..d25cebbfa67 100644 --- a/tests/third_party/cupy/linalg_tests/test_product.py +++ b/tests/third_party/cupy/linalg_tests/test_product.py @@ -228,7 +228,6 @@ def test_transposed_multidim_vdot(self, xp, dtype): @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - @pytest.mark.skip("mute until dpctl support in-place add") def test_inner(self, xp, dtype): a = testing.shaped_arange((5,), xp, dtype) b = testing.shaped_reverse_arange((5,), xp, dtype) @@ -237,7 +236,6 @@ def test_inner(self, xp, dtype): @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - @pytest.mark.skip("mute until dpctl support in-place add") def test_reversed_inner(self, xp, dtype): a = testing.shaped_arange((5,), xp, dtype)[::-1] b = testing.shaped_reverse_arange((5,), xp, dtype)[::-1] @@ -246,7 +244,6 @@ def test_reversed_inner(self, xp, dtype): @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - @pytest.mark.skip("mute until dpctl support in-place add") def test_multidim_inner(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) b = testing.shaped_arange((3, 2, 4), xp, dtype) @@ -254,7 +251,6 @@ def test_multidim_inner(self, xp, dtype): @testing.for_all_dtypes() @testing.numpy_cupy_allclose() - @pytest.mark.skip("mute until dpctl support in-place add") def test_transposed_higher_order_inner(self, xp, dtype): a = testing.shaped_arange((2, 4, 3), xp, dtype).transpose(2, 0, 1) b = testing.shaped_arange((4, 2, 3), xp, dtype).transpose(1, 2, 0) diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index ade3c4c8f6e..c52b2d2df3a 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -280,7 +280,6 @@ def test_modf(self, xp, dtype): 'shape': [(3, 2), (), (3, 0, 2)] })) @testing.gpu -@pytest.mark.skip("dpctl doesn't raise an error") class TestBoolSubtract(unittest.TestCase): def test_bool_subtract(self):