dpnp.fmod() doesn't work properly with a scalar

antonwolfy · antonwolfy · commit db9ee1e3b43f · 2023-03-22T05:33:47.000-05:00
diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -138,9 +138,9 @@ MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
                      MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
-                     sycl::fmod((double)input1_elem, (double)input2_elem),
-                     nullptr,
-                     std::false_type,
+                     dispatch_fmod_op(input1_elem, input2_elem),
+                     x1 % x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::fmod,
                      MACRO_UNPACK_TYPES(float, double))
 
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -848,6 +848,18 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     return;
 }
 
+template <typename T>
+constexpr auto dispatch_fmod_op(T elem1, T elem2)
+{
+    if constexpr (is_any_v<T, std::int32_t, std::int64_t>)
+    {
+        return elem1 % elem2;
+    }
+    else
+    {
+        return sycl::fmod(elem1, elem2);
+    }
+}
 
 #define MACRO_2ARG_3TYPES_OP(                                                                                          \
     __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
@@ -995,8 +1007,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
                     const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim];                       \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim];                   \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
@@ -1261,6 +1273,19 @@ static constexpr DPNPFuncType get_divide_res_type()
     return widest_type;
 }
 
+template <DPNPFuncType FT1, DPNPFuncType FT2>
+static constexpr DPNPFuncType get_fmod_res_type()
+{
+    constexpr auto widest_type = populate_func_types<FT1, FT2>();
+    constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1;
+
+    if constexpr (shortes_type == DPNPFuncType::DPNP_FT_BOOL)
+    {
+        return DPNPFuncType::DPNP_FT_INT;
+    }
+    return widest_type;
+}
+
 template <DPNPFuncType FT1, DPNPFuncType... FTs>
 static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
 {
@@ -1300,12 +1325,29 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
      ...);
 }
 
+template <DPNPFuncType FT1, DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_core_no_complex(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][FT1][FTs] =
+          {get_fmod_res_type<FT1, FTs>(),
+           (void*)dpnp_fmod_c_ext<func_type_map_t::find_type<get_fmod_res_type<FT1, FTs>()>,
+                                  func_type_map_t::find_type<FT1>,
+                                  func_type_map_t::find_type<FTs>>}),
+     ...);
+}
+
 template <DPNPFuncType... FTs>
 static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap)
 {
     ((func_map_elemwise_2arg_3type_core<FTs, FTs...>(fmap)), ...);
 }
 
+template <DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_helper_no_complex(func_map_t& fmap)
+{
+    ((func_map_elemwise_2arg_3type_core_no_complex<FTs, FTs...>(fmap)), ...);
+}
+
 static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT,
@@ -1539,39 +1581,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_DBL][eft_DBL] = {eft_DBL,
                                                           (void*)dpnp_fmod_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_INT] = {eft_INT,
-                                                              (void*)dpnp_fmod_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_LNG] = {eft_LNG,
-                                                              (void*)dpnp_fmod_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_INT] = {eft_LNG,
-                                                              (void*)dpnp_fmod_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_LNG] = {eft_LNG,
-                                                              (void*)dpnp_fmod_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                              (void*)dpnp_fmod_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                              (void*)dpnp_fmod_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_INT] = {eft_DBL,
                                                            (void*)dpnp_hypot_c_default<double, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_LNG] = {eft_DBL,
@@ -1918,6 +1927,7 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
         eft_DBL, (void*)dpnp_subtract_c_default<double, double, double>};
 
     func_map_elemwise_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
+    func_map_elemwise_2arg_3type_helper_no_complex<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
 
     return;
 }
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
@@ -95,6 +95,41 @@
 ]
 
 
+def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=None, subok=True, **kwargs):
+    """Choose function to call based on input and call chosen fucntion."""
+
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
+            else:
+                out_desc = None
+
+            return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+
+    return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+
+
 def abs(*args, **kwargs):
     """
     Calculate the absolute value element-wise.
@@ -852,63 +887,46 @@ def fmin(*args, **kwargs):
     return dpnp.minimum(*args, **kwargs)
 
 
-def fmod(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def fmod(x1,
+         x2,
+         /,
+         out=None,
+         *,
+         where=True,
+         dtype=None,
+         subok=True,
+         **kwargs):
     """
     Calculate the element-wise remainder of division.
 
     For full documentation refer to :obj:`numpy.fmod`.
 
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
+    Keyword argument `kwargs` is currently unsupported.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     See Also
     --------
-    :obj:`dpnp.reminder` : Remainder complementary to floor_divide.
+    :obj:`dpnp.remainder` : Remainder complementary to floor_divide.
     :obj:`dpnp.divide` : Standard division.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> a = np.array([2, -3, 4, 5, -4.5])
-    >>> b = np.array([2, 2, 2, 2, 2])
-    >>> result = np.fmod(a, b)
+    >>> import dpnp as dp
+    >>> a = dp.array([2, -3, 4, 5, -4.5])
+    >>> b = dp.array([2, 2, 2, 2, 2])
+    >>> result = dp.fmod(a, b)
     >>> [x for x in result]
     [0.0, -1.0, 0.0, 1.0, -0.5]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
-
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_fmod(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
-
-    return call_origin(numpy.fmod, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return _check_nd_call(numpy.fmod, dpnp_fmod, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def gradient(x1, *varargs, **kwargs):