IntelPython · vlad-perevezentsev · Apr 12, 2023 · Apr 12, 2023 · Apr 12, 2023 · Apr 12, 2023
@@ -347,33 +347,42 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name,
     """ get the FPTR data structure """
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, param1_type, param1_type)
 
-    result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
+    return_type = kernel_data.return_type
 
     cdef shape_type_c x1_shape = x1.shape
     cdef shape_type_c x1_strides = utils.strides_to_vector(x1.strides, x1_shape)
 
     cdef shape_type_c result_shape = x1_shape
     cdef utils.dpnp_descriptor result
 
-    if out is None:
-        """ Create result array with type given by FPTR data """
+    """" Check `out` parameter data """
+    if out is not None:
+        if out.shape != result_shape:
+            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
+
+        utils.get_common_usm_allocation(x1, out)    #check USM allocation is common
+
+    if out is None or out.is_array_overlapped(x1) or not out.match_ctype(return_type):
+        """"
+        Create result array with type given by FPTR data.
+        If 'out' array has another dtype than expected or overlaps a memory from any input array,
+        we have to create a temporary array and to copy data from the temporary into 'out' array,
+        once the computation is completed.
+        Otherwise simultaneously access to the same memory may cause a race condition issue
+        which will result into undefined behaviour.
+        """
+        is_result_memory_allocated = True
         x1_obj = x1.get_array()
         result = utils.create_output_descriptor(result_shape,
-                                                kernel_data.return_type,
+                                                return_type,
                                                 None,
                                                 device=x1_obj.sycl_device,
                                                 usm_type=x1_obj.usm_type,
                                                 sycl_queue=x1_obj.sycl_queue)
     else:
-        if out.dtype != result_type:
-            utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
-        if out.shape != result_shape:
-            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
-
+        is_result_memory_allocated = False
         result = out
 
-        utils.get_common_usm_allocation(x1, result)  # check USM allocation is common
-
     result_sycl_queue = result.get_array().sycl_queue
 
     cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
@@ -400,7 +409,10 @@ cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name,
     with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
-    return result
+    if out is not None and is_result_memory_allocated:
+        return out.get_result_desc(result)
+
+    return result.get_result_desc()
 
 
 cdef utils.dpnp_descriptor call_fptr_2in_1out(DPNPFuncName fptr_name,

@@ -82,6 +82,34 @@
 ]
 
 
+def _check_nd_call(origin_func, dpnp_func, x1, out=None, where=True, dtype=None, subok=True, **kwargs):
+    """Choose function to call based on input and call chosen fucntion."""
+
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1):
+        pass
+    else:
+        x1_desc = dpnp.get_dpnp_descriptor(
+            x1, copy_when_strides=False, copy_when_nondefault_queue=False
+        )
+
+        if x1_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
+            else:
+                out_desc = None
+            return dpnp_func(x1_desc, out=out_desc).get_pyobj()
+    return call_origin(origin_func, x1, dtype=dtype, out=out, where=where, subok=subok, **kwargs)
+
 def arccos(x1):
     """
     Trigonometric inverse cosine, element-wise.
@@ -907,7 +935,7 @@ def sinh(x1):
     return call_origin(numpy.sinh, x1, **kwargs)
 
 
-def sqrt(x1, /, out = None, **kwargs):
+def sqrt(x1, /, out = None, where=True, dtype=None, subok=True, **kwargs):
     """
     Return the positive square-root of an array, element-wise.
 
@@ -918,6 +946,8 @@ def sqrt(x1, /, out = None, **kwargs):
     Input array is supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
     Parameter `out` is supported as class:`dpnp.ndarray`, class:`dpctl.tensor.usm_ndarray` or
     with default value ``None``.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
+    Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Keyword arguments ``kwargs`` are currently unsupported.
     Input array data types are limited by supported DPNP :ref:`Data types`.
@@ -932,23 +962,7 @@ def sqrt(x1, /, out = None, **kwargs):
 
     """
 
-    x1_desc = (
-        dpnp.get_dpnp_descriptor(
-            x1, copy_when_strides=False, copy_when_nondefault_queue=False
-        )
-        if not kwargs
-        else None
-    )
-    if x1_desc:
-        if out is not None:
-            if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
-                raise TypeError("return array must be of supported array type")
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
-        else:
-            out_desc = None
-        return dpnp_sqrt(x1_desc, out=out_desc).get_pyobj()
-
-    return call_origin(numpy.sqrt, x1, out=out, **kwargs)
+    return _check_nd_call(numpy.sqrt, dpnp_sqrt, x1, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def square(x1):

@@ -2,6 +2,7 @@
 from .helper import (
     get_all_dtypes,
     get_float_complex_dtypes,
+    get_float_dtypes,
     is_cpu_device,
     is_win_platform
 )
@@ -387,7 +388,7 @@ def test_ediff1d_int(self, array, data_type):
         expected = numpy.ediff1d(np_a)
         assert_array_equal(expected, result)
 
-    
+
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_ediff1d_args(self):
         np_a = numpy.array([1, 2, 4, 7, 0])
@@ -532,16 +533,19 @@ def test_ceil(self):
 
         assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("dtype",
-                             [numpy.float32, numpy.int64, numpy.int32],
-                             ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
-    def test_invalid_dtype(self, dtype):
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    @pytest.mark.parametrize("dtype_out", get_float_dtypes())
+    def test_out_dtype(self, dtype, dtype_out):
 
-        dp_array = dpnp.arange(10, dtype=dpnp.float64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+        np_array = numpy.arange(10, dtype=dtype)
+        np_out = numpy.empty(10, dtype=dtype_out)
+        expected = numpy.ceil(np_array, np_out)
 
-        with pytest.raises(ValueError):
-            dpnp.ceil(dp_array, out=dp_out)
+        dp_array = dpnp.arange(10, dtype=dtype)
+        dp_out = dpnp.empty(10, dtype=dtype_out)
+        result = dpnp.ceil(dp_array, dp_out)
+
+        assert_allclose(expected, result, rtol=1e-06)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],
@@ -572,16 +576,19 @@ def test_floor(self):
 
         assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("dtype",
-                             [numpy.float32, numpy.int64, numpy.int32],
-                             ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
-    def test_invalid_dtype(self, dtype):
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    @pytest.mark.parametrize("dtype_out", get_float_dtypes())
+    def test_out_dtype(self, dtype, dtype_out):
 
-        dp_array = dpnp.arange(10, dtype=dpnp.float64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+        np_array = numpy.arange(10, dtype=dtype)
+        np_out = numpy.empty(10, dtype=dtype_out)
+        expected = numpy.floor(np_array, np_out)
 
-        with pytest.raises(ValueError):
-            dpnp.floor(dp_array, out=dp_out)
+        dp_array = dpnp.arange(10, dtype=dtype)
+        dp_out = dpnp.empty(10, dtype=dtype_out)
+        result = dpnp.floor(dp_array, dp_out)
+
+        assert_allclose(expected, result, rtol=1e-06)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],
@@ -612,16 +619,19 @@ def test_trunc(self):
 
         assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("dtype",
-                             [numpy.float32, numpy.int64, numpy.int32],
-                             ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
-    def test_invalid_dtype(self, dtype):
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+    @pytest.mark.parametrize("dtype_out", get_float_dtypes())
+    def test_out_dtype(self, dtype, dtype_out):
+
+        np_array = numpy.arange(10, dtype=dtype)
+        np_out = numpy.empty(10, dtype=dtype_out)
+        expected = numpy.trunc(np_array, np_out)
 
-        dp_array = dpnp.arange(10, dtype=dpnp.float64)
-        dp_out = dpnp.empty(10, dtype=dtype)
+        dp_array = dpnp.arange(10, dtype=dtype)
+        dp_out = dpnp.empty(10, dtype=dtype_out)
+        result = dpnp.trunc(dp_array, dp_out)
 
-        with pytest.raises(ValueError):
-            dpnp.trunc(dp_array, out=dp_out)
+        assert_allclose(expected, result, rtol=1e-06)
 
     @pytest.mark.parametrize("shape",
                              [(0,), (15, ), (2, 2)],

@@ -1,6 +1,6 @@
 import math
 import pytest
-from .helper import get_all_dtypes, is_cpu_device
+from .helper import get_all_dtypes, get_float_dtypes
 
 import dpnp
 
@@ -215,6 +215,59 @@ def test_strides_true_devide(dtype, shape):
 
     assert_allclose(result, expected)
 
+@pytest.mark.parametrize("func_name",
+                         ["sqrt",])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_out_1args(func_name, dtype):
+    np_out = numpy.ones((5, 3, 2))[::3]
+    np_a = numpy.arange(numpy.prod(np_out.shape), dtype=dtype).reshape(np_out.shape)
+
+    dp_out = dpnp.ones((5, 3, 2))[::3]
+    dp_a = dpnp.array(np_a)
+
+    np_res = _getattr(numpy, func_name)(np_a, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res)
+    assert_allclose(dp_out.asnumpy(), np_out)
+
+@pytest.mark.parametrize("func_name",
+                         ["sqrt",])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strided_in_out_1args(func_name, dtype):
+    sh = (3, 4, 2)
+    prod = numpy.prod(sh)
+
+    np_out = numpy.ones(sh, dtype=numpy.float64)[::2]
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2].T
+
+    dp_out = dpnp.ones(sh, dtype=dpnp.float64)[::2]
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2].T
+
+    np_res = _getattr(numpy, func_name)(np_a, out=np_out)
+    dp_res = _getattr(dpnp, func_name)(dp_a, out=dp_out)
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06)
+
+
+@pytest.mark.parametrize("func_name",
+                         ["sqrt",])
+@pytest.mark.parametrize("dtype", get_float_dtypes())
+def test_strided_in_out_1args_overlap(func_name, dtype):
+    sh = (4, 3, 2)
+    prod = numpy.prod(sh)
+
+    np_a = numpy.arange(prod, dtype=dtype).reshape(sh)
+
+    dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)
+
+    np_res = _getattr(numpy, func_name)(np_a[:3:], out=np_a[1::])
+    dp_res = _getattr(dpnp, func_name)(dp_a[:3:], out=dp_a[1::])
+
+    assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06)
+    assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06)
+
 
 @pytest.mark.parametrize("func_name",
                          ["add", "multiply", "power"])