Skip to content

dpnp.fmod() doesn't work properly with a scalar #1348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
- [ ] Have you added a test, reproducer or referred to issue with a reproducer?
- [ ] Have you tested your changes locally for CPU and GPU devices?
- [ ] Have you made sure that new changes do not introduce compiler warnings?
- [ ] Have you checked performance impact of proposed changes?
- [ ] If this PR is a work in progress, are you filing the PR as a draft?
6 changes: 3 additions & 3 deletions dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@ MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))

MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
sycl::fmod((double)input1_elem, (double)input2_elem),
nullptr,
std::false_type,
dispatch_fmod_op(input1_elem, input2_elem),
x1 % x2,
MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
oneapi::mkl::vm::fmod,
MACRO_UNPACK_TYPES(float, double))

Expand Down
4 changes: 2 additions & 2 deletions dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
{ \
const shape_elem_type* result_strides_data = &dev_strides_data[0]; \
const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \
const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \
const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \
const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim]; \
\
size_t input1_id = 0; \
size_t input2_id = 0; \
Expand Down
81 changes: 44 additions & 37 deletions dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
{ \
const shape_elem_type* result_strides_data = &dev_strides_data[0]; \
const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \
const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \
\
size_t input_id = 0; \
for (size_t i = 0; i < input1_ndim; ++i) \
Expand Down Expand Up @@ -635,7 +635,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
{ \
const shape_elem_type* result_strides_data = &dev_strides_data[0]; \
const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \
const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \
\
size_t input_id = 0; \
for (size_t i = 0; i < input1_ndim; ++i) \
Expand Down Expand Up @@ -848,6 +848,18 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
return;
}

template <typename T>
constexpr auto dispatch_fmod_op(T elem1, T elem2)
{
if constexpr (is_any_v<T, std::int32_t, std::int64_t>)
{
return elem1 % elem2;
}
else
{
return sycl::fmod(elem1, elem2);
}
}

#define MACRO_2ARG_3TYPES_OP( \
__name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \
Expand Down Expand Up @@ -995,8 +1007,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
{ \
const shape_elem_type* result_strides_data = &dev_strides_data[0]; \
const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \
const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \
const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \
const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim]; \
\
size_t input1_id = 0; \
size_t input2_id = 0; \
Expand Down Expand Up @@ -1261,6 +1273,16 @@ static constexpr DPNPFuncType get_divide_res_type()
return widest_type;
}

template <DPNPFuncType FT1, DPNPFuncType FT2>
static constexpr DPNPFuncType get_fmod_res_type()
{
if constexpr ((FT1 == DPNPFuncType::DPNP_FT_BOOL) && (FT2 == DPNPFuncType::DPNP_FT_BOOL))
{
return DPNPFuncType::DPNP_FT_INT;
}
return populate_func_types<FT1, FT2>();
}

template <DPNPFuncType FT1, DPNPFuncType... FTs>
static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
{
Expand Down Expand Up @@ -1300,12 +1322,29 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
...);
}

template <DPNPFuncType FT1, DPNPFuncType... FTs>
static void func_map_elemwise_2arg_3type_core_no_complex(func_map_t& fmap)
{
((fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][FT1][FTs] =
{get_fmod_res_type<FT1, FTs>(),
(void*)dpnp_fmod_c_ext<func_type_map_t::find_type<get_fmod_res_type<FT1, FTs>()>,
func_type_map_t::find_type<FT1>,
func_type_map_t::find_type<FTs>>}),
...);
}

template <DPNPFuncType... FTs>
static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap)
{
((func_map_elemwise_2arg_3type_core<FTs, FTs...>(fmap)), ...);
}

template <DPNPFuncType... FTs>
static void func_map_elemwise_2arg_3type_helper_no_complex(func_map_t& fmap)
{
((func_map_elemwise_2arg_3type_core_no_complex<FTs, FTs...>(fmap)), ...);
}

static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
{
fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT,
Expand Down Expand Up @@ -1539,39 +1578,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
fmap[DPNPFuncName::DPNP_FN_FMOD][eft_DBL][eft_DBL] = {eft_DBL,
(void*)dpnp_fmod_c_default<double, double, double>};

fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_INT] = {eft_INT,
(void*)dpnp_fmod_c_ext<int32_t, int32_t, int32_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_LNG] = {eft_LNG,
(void*)dpnp_fmod_c_ext<int64_t, int32_t, int64_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_FLT] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, int32_t, float>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_DBL] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, int32_t, double>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_INT] = {eft_LNG,
(void*)dpnp_fmod_c_ext<int64_t, int64_t, int32_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_LNG] = {eft_LNG,
(void*)dpnp_fmod_c_ext<int64_t, int64_t, int64_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_FLT] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, int64_t, float>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_DBL] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, int64_t, double>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_INT] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, float, int32_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_LNG] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, float, int64_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_FLT] = {eft_FLT,
(void*)dpnp_fmod_c_ext<float, float, float>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_DBL] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, float, double>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_INT] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, double, int32_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_LNG] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, double, int64_t>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_FLT] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, double, float>};
fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_DBL] = {eft_DBL,
(void*)dpnp_fmod_c_ext<double, double, double>};

fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_INT] = {eft_DBL,
(void*)dpnp_hypot_c_default<double, int32_t, int32_t>};
fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_LNG] = {eft_DBL,
Expand Down Expand Up @@ -1918,6 +1924,7 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
eft_DBL, (void*)dpnp_subtract_c_default<double, double, double>};

func_map_elemwise_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
func_map_elemwise_2arg_3type_helper_no_complex<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);

return;
}
Expand Down
6 changes: 3 additions & 3 deletions dpnp/backend/kernels/dpnp_krnl_logic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
{ \
const shape_elem_type *result_strides_data = &dev_strides_data[0]; \
const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \
const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim]; \
\
size_t input1_id = 0; \
\
Expand Down Expand Up @@ -635,8 +635,8 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
{ \
const shape_elem_type *result_strides_data = &dev_strides_data[0]; \
const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \
const shape_elem_type *input2_strides_data = &dev_strides_data[2]; \
const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim]; \
const shape_elem_type *input2_strides_data = &dev_strides_data[2 * result_ndim]; \
\
size_t input1_id = 0; \
size_t input2_id = 0; \
Expand Down
6 changes: 3 additions & 3 deletions dpnp/backend/kernels/dpnp_krnl_searching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,9 @@ DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
{
const shape_elem_type* result_strides_data = &dev_strides_data[0];
const shape_elem_type* condition_strides_data = &dev_strides_data[1];
const shape_elem_type* input1_strides_data = &dev_strides_data[2];
const shape_elem_type* input2_strides_data = &dev_strides_data[3];
const shape_elem_type* condition_strides_data = &dev_strides_data[result_ndim];
const shape_elem_type* input1_strides_data = &dev_strides_data[2 * result_ndim];
const shape_elem_type* input2_strides_data = &dev_strides_data[3 * result_ndim];

size_t condition_id = 0;
size_t input1_id = 0;
Expand Down
33 changes: 22 additions & 11 deletions dpnp/dpnp_algo/dpnp_algo.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -505,25 +505,33 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
return_type = kernel_data.return_type_no_fp64
func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64

if out is None:
""" Create result array with type given by FPTR data """
# check 'out' parameter data
if out is not None:
if out.shape != result_shape:
utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)

utils.get_common_usm_allocation(x1_obj, out) # check USM allocation is common

if out is None or out.is_array_overlapped(x1_obj) or out.is_array_overlapped(x2_obj) or not out.match_ctype(return_type):
"""
Create result array with type given by FPTR data.
If 'out' array has another dtype than expected or overlaps a memory from any input array,
we have to create a temporary array and to copy data from the temporary into 'out' array,
once the computation is completed.
Otherwise simultaneously access to the same memory may cause a race condition issue
which will result into undefined behaviour.
"""
is_result_memory_allocated = True
result = utils.create_output_descriptor(result_shape,
return_type,
None,
device=result_sycl_device,
usm_type=result_usm_type,
sycl_queue=result_sycl_queue)
else:
result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
if out.dtype != result_type:
utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
if out.shape != result_shape:
utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)

is_result_memory_allocated = False
result = out

utils.get_common_usm_allocation(x1_obj, result) # check USM allocation is common

cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)

result_obj = result.get_array()
Expand Down Expand Up @@ -554,4 +562,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
c_dpctl.DPCTLEvent_Delete(event_ref)

return result
if out is not None and is_result_memory_allocated:
return out.get_result_desc(result)

return result.get_result_desc()
47 changes: 40 additions & 7 deletions dpnp/dpnp_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@

import dpnp


def _get_unwrapped_index_key(key):
"""
Return a key where each nested instance of DPNP array is unwrapped into USM ndarray
for futher processing in DPCTL advanced indexing functions.

"""

if isinstance(key, tuple):
if any(isinstance(x, dpnp_array) for x in key):
# create a new tuple from the input key with unwrapped DPNP arrays
return tuple(x.get_array() if isinstance(x, dpnp_array) else x for x in key)
elif isinstance(key, dpnp_array):
return key.get_array()
return key


class dpnp_array:
"""
Multi-dimensional array object.
Expand Down Expand Up @@ -176,8 +193,7 @@ def __ge__(self, other):
# '__getattribute__',

def __getitem__(self, key):
if isinstance(key, dpnp_array):
key = key.get_array()
key = _get_unwrapped_index_key(key)

item = self._array_obj.__getitem__(key)
if not isinstance(item, dpt.usm_ndarray):
Expand All @@ -194,7 +210,10 @@ def __gt__(self, other):
return dpnp.greater(self, other)

# '__hash__',
# '__iadd__',

def __iadd__(self, other):
dpnp.add(self, other, out=self)
return self

def __iand__(self, other):
dpnp.bitwise_and(self, other, out=self)
Expand All @@ -208,7 +227,10 @@ def __ilshift__(self, other):

# '__imatmul__',
# '__imod__',
# '__imul__',

def __imul__(self, other):
dpnp.multiply(self, other, out=self)
return self

def __index__(self):
return self._array_obj.__index__()
Expand Down Expand Up @@ -334,8 +356,8 @@ def __rxor__(self, other):
# '__setattr__',

def __setitem__(self, key, val):
if isinstance(key, dpnp_array):
key = key.get_array()
key = _get_unwrapped_index_key(key)

if isinstance(val, dpnp_array):
val = val.get_array()

Expand Down Expand Up @@ -760,6 +782,8 @@ def item(self, id=None):
@property
def itemsize(self):
"""
Size of one array element in bytes.

"""

return self._array_obj.itemsize
Expand All @@ -785,11 +809,20 @@ def min(self, axis=None, out=None, keepdims=numpy._NoValue, initial=numpy._NoVal

return dpnp.min(self, axis, out, keepdims, initial, where)

# 'nbytes',
@property
def nbytes(self):
"""
Total bytes consumed by the elements of the array.

"""

return self._array_obj.nbytes

@property
def ndim(self):
"""
Number of array dimensions.

"""

return self._array_obj.ndim
Expand Down
Loading