use_dpctl_conj_for_dpnp

vtavana · vtavana · commit 3cba1ce1a07f · 2023-08-16T12:36:35.000-05:00
diff --git a/dpnp/backend/extensions/vm/conj.hpp b/dpnp/backend/extensions/vm/conj.hpp
@@ -0,0 +1,78 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+
+#include "common.hpp"
+#include "types_matrix.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace vm
+{
+template <typename T>
+sycl::event conj_contig_impl(sycl::queue exec_q,
+                             const std::int64_t n,
+                             const char *in_a,
+                             char *out_y,
+                             const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    const T *a = reinterpret_cast<const T *>(in_a);
+    T *y = reinterpret_cast<T *>(out_y);
+
+    return mkl_vm::conj(exec_q,
+                        n, // number of elements to be calculated
+                        a, // pointer `a` containing input vector of size n
+                        y, // pointer `y` to the output vector of size n
+                        depends);
+}
+
+template <typename fnT, typename T>
+struct ConjContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename types::ConjOutputType<T>::value_type, void>)
+        {
+            return nullptr;
+        }
+        else {
+            return conj_contig_impl<T>;
+        }
+    }
+};
+} // namespace vm
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp
@@ -68,6 +68,23 @@ struct DivOutputType
         dpctl_td_ns::DefaultResultEntry<void>>::result_type;
 };
 
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL VM library provides support in oneapi::mkl::vm::conj<T> function.
+ *
+ * @tparam T Type of input vector `a` and of result vector `y`.
+ */
+template <typename T>
+struct ConjOutputType
+{
+    using value_type = typename std::disjunction<
+        dpctl_td_ns::
+            TypeMapResultEntry<T, std::complex<double>, std::complex<double>>,
+        dpctl_td_ns::
+            TypeMapResultEntry<T, std::complex<float>, std::complex<float>>,
+        dpctl_td_ns::DefaultResultEntry<void>>::result_type;
+};
+
 /**
  * @brief A factory to define pairs of supported types for which
  * MKL VM library provides support in oneapi::mkl::vm::cos<T> function.
diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp
@@ -31,6 +31,7 @@
 #include <pybind11/stl.h>
 
 #include "common.hpp"
+#include "conj.hpp"
 #include "cos.hpp"
 #include "div.hpp"
 #include "ln.hpp"
@@ -48,6 +49,7 @@ using vm_ext::unary_impl_fn_ptr_t;
 static binary_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types];
 
 static unary_impl_fn_ptr_t cos_dispatch_vector[dpctl_td_ns::num_types];
+static unary_impl_fn_ptr_t conj_dispatch_vector[dpctl_td_ns::num_types];
 static unary_impl_fn_ptr_t ln_dispatch_vector[dpctl_td_ns::num_types];
 static unary_impl_fn_ptr_t sin_dispatch_vector[dpctl_td_ns::num_types];
 static unary_impl_fn_ptr_t sqr_dispatch_vector[dpctl_td_ns::num_types];
@@ -116,6 +118,34 @@ PYBIND11_MODULE(_vm_impl, m)
               py::arg("sycl_queue"), py::arg("src"), py::arg("dst"));
     }
 
+    // UnaryUfunc: ==== Conj(x) ====
+    {
+        vm_ext::init_ufunc_dispatch_vector<unary_impl_fn_ptr_t,
+                                           vm_ext::ConjContigFactory>(
+            conj_dispatch_vector);
+
+        auto conj_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst,
+                              const event_vecT &depends = {}) {
+            return vm_ext::unary_ufunc(exec_q, src, dst, depends,
+                                       conj_dispatch_vector);
+        };
+        m.def("_conj", conj_pyapi,
+              "Call `conj` function from OneMKL VM library to compute "
+              "conjugate of vector elements",
+              py::arg("sycl_queue"), py::arg("src"), py::arg("dst"),
+              py::arg("depends") = py::list());
+
+        auto conj_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src,
+                                           arrayT dst) {
+            return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst,
+                                                    conj_dispatch_vector);
+        };
+        m.def("_mkl_conj_to_call", conj_need_to_call_pyapi,
+              "Check input arguments to answer if `conj` function from "
+              "OneMKL VM library can be used",
+              py::arg("sycl_queue"), py::arg("src"), py::arg("dst"));
+    }
+
     // UnaryUfunc: ==== Ln(x) ====
     {
         vm_ext::init_ufunc_dispatch_vector<unary_impl_fn_ptr_t,
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -116,15 +116,13 @@ enum class DPNPFuncName : size_t
     DPNP_FN_CEIL_EXT, /**< Used in numpy.ceil() impl, requires extra parameters
                        */
     DPNP_FN_CHOLESKY, /**< Used in numpy.linalg.cholesky() impl  */
-    DPNP_FN_CHOLESKY_EXT,   /**< Used in numpy.linalg.cholesky() impl, requires
-                               extra parameters */
-    DPNP_FN_CONJIGUATE,     /**< Used in numpy.conjugate() impl  */
-    DPNP_FN_CONJIGUATE_EXT, /**< Used in numpy.conjugate() impl, requires extra
-                               parameters */
-    DPNP_FN_CHOOSE,         /**< Used in numpy.choose() impl  */
-    DPNP_FN_CHOOSE_EXT,     /**< Used in numpy.choose() impl, requires extra
-                               parameters */
-    DPNP_FN_COPY,           /**< Used in numpy.copy() impl  */
+    DPNP_FN_CHOLESKY_EXT, /**< Used in numpy.linalg.cholesky() impl, requires
+                             extra parameters */
+    DPNP_FN_CONJUGATE,    /**< Used in numpy.conjugate() impl  */
+    DPNP_FN_CHOOSE,       /**< Used in numpy.choose() impl  */
+    DPNP_FN_CHOOSE_EXT,   /**< Used in numpy.choose() impl, requires extra
+                             parameters */
+    DPNP_FN_COPY,         /**< Used in numpy.copy() impl  */
     DPNP_FN_COPY_EXT, /**< Used in numpy.copy() impl, requires extra parameters
                        */
     DPNP_FN_COPYSIGN, /**< Used in numpy.copysign() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -1029,28 +1029,17 @@ constexpr auto dispatch_fmod_op(T elem1, T elem2)
 
 static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
 {
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_INT][eft_INT] = {
+    fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_INT][eft_INT] = {
         eft_INT, (void *)dpnp_copy_c_default<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_LNG][eft_LNG] = {
+    fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_LNG][eft_LNG] = {
         eft_LNG, (void *)dpnp_copy_c_default<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_FLT][eft_FLT] = {
+    fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_FLT][eft_FLT] = {
         eft_FLT, (void *)dpnp_copy_c_default<float>};
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_DBL][eft_DBL] = {
+    fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_copy_c_default<double>};
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE][eft_C128][eft_C128] = {
+    fmap[DPNPFuncName::DPNP_FN_CONJUGATE][eft_C128][eft_C128] = {
         eft_C128, (void *)dpnp_conjugate_c_default<std::complex<double>>};
 
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_copy_c_ext<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_copy_c_ext<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_copy_c_ext<float>};
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_copy_c_ext<double>};
-    fmap[DPNPFuncName::DPNP_FN_CONJIGUATE_EXT][eft_C128][eft_C128] = {
-        eft_C128, (void *)dpnp_conjugate_c_ext<std::complex<double>>};
-
     fmap[DPNPFuncName::DPNP_FN_COPY][eft_BLN][eft_BLN] = {
         eft_BLN, (void *)dpnp_copy_c_default<bool>};
     fmap[DPNPFuncName::DPNP_FN_COPY][eft_INT][eft_INT] = {
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -70,8 +70,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_CHOLESKY_EXT
         DPNP_FN_CHOOSE
         DPNP_FN_CHOOSE_EXT
-        DPNP_FN_CONJIGUATE
-        DPNP_FN_CONJIGUATE_EXT
         DPNP_FN_COPY
         DPNP_FN_COPY_EXT
         DPNP_FN_COPYSIGN
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
@@ -40,7 +40,6 @@ __all__ += [
     "dpnp_arctan2",
     "dpnp_around",
     "dpnp_ceil",
-    "dpnp_conjugate",
     "dpnp_copysign",
     "dpnp_cross",
     "dpnp_cumprod",
@@ -163,10 +162,6 @@ cpdef utils.dpnp_descriptor dpnp_ceil(utils.dpnp_descriptor x1, utils.dpnp_descr
     return call_fptr_1in_1out_strides(DPNP_FN_CEIL_EXT, x1, dtype=None, out=out, where=True, func_name='ceil')
 
 
-cpdef utils.dpnp_descriptor dpnp_conjugate(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_CONJIGUATE_EXT, x1)
-
-
 cpdef utils.dpnp_descriptor dpnp_copysign(utils.dpnp_descriptor x1_obj,
                                           utils.dpnp_descriptor x2_obj,
                                           object dtype=None,
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -46,6 +46,7 @@
     "dpnp_bitwise_and",
     "dpnp_bitwise_or",
     "dpnp_bitwise_xor",
+    "dpnp_conj",
     "dpnp_cos",
     "dpnp_divide",
     "dpnp_equal",
@@ -367,6 +368,58 @@ def _call_cos(src, dst, sycl_queue, depends=None):
     return dpnp_array._create_from_usm_ndarray(res_usm)
 
 
+_conj_docstring = """
+conj(x, out=None, order='K')
+
+Computes conjugate for each element `x_i` for input array `x`.
+
+Args:
+    x (dpnp.ndarray):
+        Input array, expected to have numeric data type.
+    out ({None, dpnp.ndarray}, optional):
+        Output array to populate. Array must have the correct
+        shape and the expected data type.
+    order ("C","F","A","K", optional): memory layout of the new
+        output array, if parameter `out` is `None`.
+        Default: "K".
+Return:
+    dpnp.ndarray:
+        An array containing the element-wise conjugate.
+        The returned array has the same data type as `x`.
+"""
+
+
+def _call_conj(src, dst, sycl_queue, depends=None):
+    """A callback to register in UnaryElementwiseFunc class of dpctl.tensor"""
+
+    if depends is None:
+        depends = []
+
+    if vmi._mkl_conj_to_call(sycl_queue, src, dst):
+        # call pybind11 extension for conj() function from OneMKL VM
+        return vmi._conj(sycl_queue, src, dst, depends)
+    return ti._conj(src, dst, sycl_queue, depends)
+
+
+conj_func = UnaryElementwiseFunc(
+    "conj", ti._conj_result_type, _call_conj, _conj_docstring
+)
+
+
+def dpnp_conj(x, out=None, order="K"):
+    """
+    Invokes conj() function from pybind11 extension of OneMKL VM if possible.
+
+    Otherwise fully relies on dpctl.tensor implementation for conj() function.
+    """
+    # dpctl.tensor only works with usm_ndarray
+    x1_usm = dpnp.get_usm_ndarray(x)
+    out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+
+    res_usm = conj_func(x1_usm, out=out_usm, order=order)
+    return dpnp_array._create_from_usm_ndarray(res_usm)
+
+
 _divide_docstring_ = """
 divide(x1, x2, out=None, order="K")
 
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
@@ -49,7 +49,9 @@
 
 from .dpnp_algo import *
 from .dpnp_algo.dpnp_elementwise_common import (
+    check_nd_call_func,
     dpnp_add,
+    dpnp_conj,
     dpnp_divide,
     dpnp_floor_divide,
     dpnp_multiply,
@@ -387,7 +389,17 @@ def ceil(x1, out=None, **kwargs):
     return call_origin(numpy.ceil, x1, out=out, **kwargs)
 
 
-def conjugate(x1, **kwargs):
+def conjugate(
+    x,
+    /,
+    out=None,
+    *,
+    order="K",
+    where=True,
+    dtype=None,
+    subok=True,
+    **kwargs,
+):
     """
     Return the complex conjugate, element-wise.
 
@@ -396,6 +408,18 @@ def conjugate(x1, **kwargs):
 
     For full documentation refer to :obj:`numpy.conjugate`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        The conjugate of each element of `x`.
+
+    Limitations
+    -----------
+    Parameters `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
+    Otherwise the function will be executed sequentially on CPU.
+    Input array data types are limited by supported DPNP :ref:`Data types`.
+
     Examples
     --------
     >>> import dpnp as np
@@ -409,13 +433,17 @@ def conjugate(x1, **kwargs):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(
-        x1, copy_when_strides=False, copy_when_nondefault_queue=False
+    return check_nd_call_func(
+        numpy.conjugate,
+        dpnp_conj,
+        x,
+        out=out,
+        where=where,
+        order=order,
+        dtype=dtype,
+        subok=subok,
+        **kwargs,
     )
-    if x1_desc and not kwargs:
-        return dpnp_conjugate(x1_desc).get_pyobj()
-
-    return call_origin(numpy.conjugate, x1, **kwargs)
 
 
 conj = conjugate
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
@@ -13,7 +13,6 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.astype(dpnp.asarray(x), dpnp.float32)]
 
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
@@ -22,7 +21,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
 
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5]
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py