Skip to content

Extend support of out parameter for dpnp.sqrt() #1377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions dpnp/dpnp_algo/dpnp_algo.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -347,33 +347,42 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name,
""" get the FPTR data structure """
cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, param1_type, param1_type)

result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
return_type = kernel_data.return_type

cdef shape_type_c x1_shape = x1.shape
cdef shape_type_c x1_strides = utils.strides_to_vector(x1.strides, x1_shape)

cdef shape_type_c result_shape = x1_shape
cdef utils.dpnp_descriptor result

if out is None:
""" Create result array with type given by FPTR data """
"""" Check `out` parameter data """
if out is not None:
if out.shape != result_shape:
utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)

utils.get_common_usm_allocation(x1, out) #check USM allocation is common

if out is None or out.is_array_overlapped(x1) or not out.match_ctype(return_type):
""""
Create result array with type given by FPTR data.
If 'out' array has another dtype than expected or overlaps a memory from any input array,
we have to create a temporary array and to copy data from the temporary into 'out' array,
once the computation is completed.
Otherwise simultaneously access to the same memory may cause a race condition issue
which will result into undefined behaviour.
"""
is_result_memory_allocated = True
x1_obj = x1.get_array()
result = utils.create_output_descriptor(result_shape,
kernel_data.return_type,
return_type,
None,
device=x1_obj.sycl_device,
usm_type=x1_obj.usm_type,
sycl_queue=x1_obj.sycl_queue)
else:
if out.dtype != result_type:
utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
if out.shape != result_shape:
utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)

is_result_memory_allocated = False
result = out

utils.get_common_usm_allocation(x1, result) # check USM allocation is common

result_sycl_queue = result.get_array().sycl_queue

cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
Expand All @@ -400,7 +409,10 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name,
with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
c_dpctl.DPCTLEvent_Delete(event_ref)

return result
if out is not None and is_result_memory_allocated:
return out.get_result_desc(result)

return result.get_result_desc()


cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name,
Expand Down
50 changes: 32 additions & 18 deletions dpnp/dpnp_iface_trigonometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,34 @@
]


def _check_nd_call(origin_func, dpnp_func, x1, out=None, where=True, dtype=None, subok=True, **kwargs):
"""Choose function to call based on input and call chosen fucntion."""

if kwargs:
pass
elif where is not True:
pass
elif dtype is not None:
pass
elif subok is not True:
pass
elif dpnp.isscalar(x1):
pass
else:
x1_desc = dpnp.get_dpnp_descriptor(
x1, copy_when_strides=False, copy_when_nondefault_queue=False
)

if x1_desc:
if out is not None:
if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
raise TypeError("return array must be of supported array type")
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
else:
out_desc = None
return dpnp_func(x1_desc, out=out_desc).get_pyobj()
return call_origin(origin_func, x1, dtype=dtype, out=out, where=where, subok=subok, **kwargs)

def arccos(x1):
"""
Trigonometric inverse cosine, element-wise.
Expand Down Expand Up @@ -907,7 +935,7 @@ def sinh(x1):
return call_origin(numpy.sinh, x1, **kwargs)


def sqrt(x1, /, out = None, **kwargs):
def sqrt(x1, /, out = None, where=True, dtype=None, subok=True, **kwargs):
"""
Return the positive square-root of an array, element-wise.

Expand All @@ -918,6 +946,8 @@ def sqrt(x1, /, out = None, **kwargs):
Input array is supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
Parameter `out` is supported as class:`dpnp.ndarray`, class:`dpctl.tensor.usm_ndarray` or
with default value ``None``.
Parameters `where`, `dtype` and `subok` are supported with their default values.
Keyword arguments ``kwargs`` are currently unsupported.
Otherwise the function will be executed sequentially on CPU.
Keyword arguments ``kwargs`` are currently unsupported.
Input array data types are limited by supported DPNP :ref:`Data types`.
Expand All @@ -932,23 +962,7 @@ def sqrt(x1, /, out = None, **kwargs):

"""

x1_desc = (
dpnp.get_dpnp_descriptor(
x1, copy_when_strides=False, copy_when_nondefault_queue=False
)
if not kwargs
else None
)
if x1_desc:
if out is not None:
if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
raise TypeError("return array must be of supported array type")
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
else:
out_desc = None
return dpnp_sqrt(x1_desc, out=out_desc).get_pyobj()

return call_origin(numpy.sqrt, x1, out=out, **kwargs)
return _check_nd_call(numpy.sqrt, dpnp_sqrt, x1, out=out, where=where, dtype=dtype, subok=subok, **kwargs)


def square(x1):
Expand Down
60 changes: 35 additions & 25 deletions tests/test_mathematical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .helper import (
get_all_dtypes,
get_float_complex_dtypes,
get_float_dtypes,
is_cpu_device,
is_win_platform
)
Expand Down Expand Up @@ -387,7 +388,7 @@ def test_ediff1d_int(self, array, data_type):
expected = numpy.ediff1d(np_a)
assert_array_equal(expected, result)


@pytest.mark.usefixtures("allow_fall_back_on_numpy")
def test_ediff1d_args(self):
np_a = numpy.array([1, 2, 4, 7, 0])
Expand Down Expand Up @@ -532,16 +533,19 @@ def test_ceil(self):

assert_array_equal(expected, result)

@pytest.mark.parametrize("dtype",
[numpy.float32, numpy.int64, numpy.int32],
ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
def test_invalid_dtype(self, dtype):
@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
@pytest.mark.parametrize("dtype_out", get_float_dtypes())
def test_out_dtype(self, dtype, dtype_out):

dp_array = dpnp.arange(10, dtype=dpnp.float64)
dp_out = dpnp.empty(10, dtype=dtype)
np_array = numpy.arange(10, dtype=dtype)
np_out = numpy.empty(10, dtype=dtype_out)
expected = numpy.ceil(np_array, np_out)

with pytest.raises(ValueError):
dpnp.ceil(dp_array, out=dp_out)
dp_array = dpnp.arange(10, dtype=dtype)
dp_out = dpnp.empty(10, dtype=dtype_out)
result = dpnp.ceil(dp_array, dp_out)

assert_allclose(expected, result, rtol=1e-06)

@pytest.mark.parametrize("shape",
[(0,), (15, ), (2, 2)],
Expand Down Expand Up @@ -572,16 +576,19 @@ def test_floor(self):

assert_array_equal(expected, result)

@pytest.mark.parametrize("dtype",
[numpy.float32, numpy.int64, numpy.int32],
ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
def test_invalid_dtype(self, dtype):
@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
@pytest.mark.parametrize("dtype_out", get_float_dtypes())
def test_out_dtype(self, dtype, dtype_out):

dp_array = dpnp.arange(10, dtype=dpnp.float64)
dp_out = dpnp.empty(10, dtype=dtype)
np_array = numpy.arange(10, dtype=dtype)
np_out = numpy.empty(10, dtype=dtype_out)
expected = numpy.floor(np_array, np_out)

with pytest.raises(ValueError):
dpnp.floor(dp_array, out=dp_out)
dp_array = dpnp.arange(10, dtype=dtype)
dp_out = dpnp.empty(10, dtype=dtype_out)
result = dpnp.floor(dp_array, dp_out)

assert_allclose(expected, result, rtol=1e-06)

@pytest.mark.parametrize("shape",
[(0,), (15, ), (2, 2)],
Expand Down Expand Up @@ -612,16 +619,19 @@ def test_trunc(self):

assert_array_equal(expected, result)

@pytest.mark.parametrize("dtype",
[numpy.float32, numpy.int64, numpy.int32],
ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
def test_invalid_dtype(self, dtype):
@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
@pytest.mark.parametrize("dtype_out", get_float_dtypes())
def test_out_dtype(self, dtype, dtype_out):

np_array = numpy.arange(10, dtype=dtype)
np_out = numpy.empty(10, dtype=dtype_out)
expected = numpy.trunc(np_array, np_out)

dp_array = dpnp.arange(10, dtype=dpnp.float64)
dp_out = dpnp.empty(10, dtype=dtype)
dp_array = dpnp.arange(10, dtype=dtype)
dp_out = dpnp.empty(10, dtype=dtype_out)
result = dpnp.trunc(dp_array, dp_out)

with pytest.raises(ValueError):
dpnp.trunc(dp_array, out=dp_out)
assert_allclose(expected, result, rtol=1e-06)

@pytest.mark.parametrize("shape",
[(0,), (15, ), (2, 2)],
Expand Down
55 changes: 54 additions & 1 deletion tests/test_strides.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import math
import pytest
from .helper import get_all_dtypes, is_cpu_device
from .helper import get_all_dtypes, get_float_dtypes

import dpnp

Expand Down Expand Up @@ -215,6 +215,59 @@ def test_strides_true_devide(dtype, shape):

assert_allclose(result, expected)

@pytest.mark.parametrize("func_name",
["sqrt",])
@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
def test_strided_out_1args(func_name, dtype):
np_out = numpy.ones((5, 3, 2))[::3]
np_a = numpy.arange(numpy.prod(np_out.shape), dtype=dtype).reshape(np_out.shape)

dp_out = dpnp.ones((5, 3, 2))[::3]
dp_a = dpnp.array(np_a)

np_res = _getattr(numpy, func_name)(np_a, out=np_out)
dp_res = _getattr(dpnp, func_name)(dp_a, out=dp_out)

assert_allclose(dp_res.asnumpy(), np_res)
assert_allclose(dp_out.asnumpy(), np_out)

@pytest.mark.parametrize("func_name",
["sqrt",])
@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
def test_strided_in_out_1args(func_name, dtype):
sh = (3, 4, 2)
prod = numpy.prod(sh)

np_out = numpy.ones(sh, dtype=numpy.float64)[::2]
np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2].T

dp_out = dpnp.ones(sh, dtype=dpnp.float64)[::2]
dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2].T

np_res = _getattr(numpy, func_name)(np_a, out=np_out)
dp_res = _getattr(dpnp, func_name)(dp_a, out=dp_out)

assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06)


@pytest.mark.parametrize("func_name",
["sqrt",])
@pytest.mark.parametrize("dtype", get_float_dtypes())
def test_strided_in_out_1args_overlap(func_name, dtype):
sh = (4, 3, 2)
prod = numpy.prod(sh)

np_a = numpy.arange(prod, dtype=dtype).reshape(sh)

dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)

np_res = _getattr(numpy, func_name)(np_a[:3:], out=np_a[1::])
dp_res = _getattr(dpnp, func_name)(dp_a[:3:], out=dp_a[1::])

assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06)


@pytest.mark.parametrize("func_name",
["add", "multiply", "power"])
Expand Down
Loading