Skip to content

Commit c7ae46e

Browse files
authored
Merge branch 'master' into fix_asarray_sequences
2 parents c7858af + c1c40e3 commit c7ae46e

14 files changed

+300
-82
lines changed

dpnp/backend/kernels/dpnp_krnl_bitwise.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
281281
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
282282
{ \
283283
const shape_elem_type* result_strides_data = &dev_strides_data[0]; \
284-
const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \
285-
const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \
284+
const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \
285+
const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim]; \
286286
\
287287
size_t input1_id = 0; \
288288
size_t input2_id = 0; \

dpnp/backend/kernels/dpnp_krnl_elemwise.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@
111111
size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
112112
{ \
113113
const shape_elem_type* result_strides_data = &dev_strides_data[0]; \
114-
const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \
114+
const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \
115115
\
116116
size_t input_id = 0; \
117117
for (size_t i = 0; i < input1_ndim; ++i) \
@@ -635,7 +635,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
635635
size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
636636
{ \
637637
const shape_elem_type* result_strides_data = &dev_strides_data[0]; \
638-
const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \
638+
const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \
639639
\
640640
size_t input_id = 0; \
641641
for (size_t i = 0; i < input1_ndim; ++i) \
@@ -995,8 +995,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
995995
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
996996
{ \
997997
const shape_elem_type* result_strides_data = &dev_strides_data[0]; \
998-
const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \
999-
const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \
998+
const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \
999+
const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim]; \
10001000
\
10011001
size_t input1_id = 0; \
10021002
size_t input2_id = 0; \

dpnp/backend/kernels/dpnp_krnl_logic.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
396396
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
397397
{ \
398398
const shape_elem_type *result_strides_data = &dev_strides_data[0]; \
399-
const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \
399+
const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim]; \
400400
\
401401
size_t input1_id = 0; \
402402
\
@@ -635,8 +635,8 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
635635
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
636636
{ \
637637
const shape_elem_type *result_strides_data = &dev_strides_data[0]; \
638-
const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \
639-
const shape_elem_type *input2_strides_data = &dev_strides_data[2]; \
638+
const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim]; \
639+
const shape_elem_type *input2_strides_data = &dev_strides_data[2 * result_ndim]; \
640640
\
641641
size_t input1_id = 0; \
642642
size_t input2_id = 0; \

dpnp/backend/kernels/dpnp_krnl_searching.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -294,9 +294,9 @@ DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
294294
const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
295295
{
296296
const shape_elem_type* result_strides_data = &dev_strides_data[0];
297-
const shape_elem_type* condition_strides_data = &dev_strides_data[1];
298-
const shape_elem_type* input1_strides_data = &dev_strides_data[2];
299-
const shape_elem_type* input2_strides_data = &dev_strides_data[3];
297+
const shape_elem_type* condition_strides_data = &dev_strides_data[result_ndim];
298+
const shape_elem_type* input1_strides_data = &dev_strides_data[2 * result_ndim];
299+
const shape_elem_type* input2_strides_data = &dev_strides_data[3 * result_ndim];
300300

301301
size_t condition_id = 0;
302302
size_t input1_id = 0;

dpnp/dpnp_algo/dpnp_algo.pyx

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -505,25 +505,33 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
505505
return_type = kernel_data.return_type_no_fp64
506506
func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64
507507

508-
if out is None:
509-
""" Create result array with type given by FPTR data """
508+
# check 'out' parameter data
509+
if out is not None:
510+
if out.shape != result_shape:
511+
utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
512+
513+
utils.get_common_usm_allocation(x1_obj, out) # check USM allocation is common
514+
515+
if out is None or out.is_array_overlapped(x1_obj) or out.is_array_overlapped(x2_obj) or not out.match_ctype(return_type):
516+
"""
517+
Create result array with type given by FPTR data.
518+
If 'out' array has another dtype than expected or overlaps a memory from any input array,
519+
we have to create a temporary array and to copy data from the temporary into 'out' array,
520+
once the computation is completed.
521+
Otherwise simultaneously access to the same memory may cause a race condition issue
522+
which will result into undefined behaviour.
523+
"""
524+
is_result_memory_allocated = True
510525
result = utils.create_output_descriptor(result_shape,
511526
return_type,
512527
None,
513528
device=result_sycl_device,
514529
usm_type=result_usm_type,
515530
sycl_queue=result_sycl_queue)
516531
else:
517-
result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
518-
if out.dtype != result_type:
519-
utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
520-
if out.shape != result_shape:
521-
utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
522-
532+
is_result_memory_allocated = False
523533
result = out
524534

525-
utils.get_common_usm_allocation(x1_obj, result) # check USM allocation is common
526-
527535
cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
528536

529537
result_obj = result.get_array()
@@ -554,4 +562,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
554562
with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
555563
c_dpctl.DPCTLEvent_Delete(event_ref)
556564

557-
return result
565+
if out is not None and is_result_memory_allocated:
566+
return out.get_result_desc(result)
567+
568+
return result.get_result_desc()

dpnp/dpnp_iface.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,10 @@ def get_dpnp_descriptor(ext_obj,
272272
if use_origin_backend():
273273
return False
274274

275+
# It's required to keep track of input object if a non-strided copy is going to be created.
276+
# Thus there will be an extra descriptor allocated to refer on original input.
277+
orig_desc = None
278+
275279
# If input object is a scalar, it means it was allocated on host memory.
276280
# We need to copy it to USM memory according to compute follows data paradigm.
277281
if isscalar(ext_obj):
@@ -291,6 +295,7 @@ def get_dpnp_descriptor(ext_obj,
291295
ext_obj_offset = 0
292296

293297
if ext_obj.strides != shape_offsets or ext_obj_offset != 0:
298+
orig_desc = dpnp_descriptor(ext_obj)
294299
ext_obj = array(ext_obj)
295300

296301
# while dpnp functions are based on DPNP_QUEUE
@@ -304,7 +309,7 @@ def get_dpnp_descriptor(ext_obj,
304309
if not queue_is_default:
305310
ext_obj = array(ext_obj, sycl_queue=default_queue)
306311

307-
dpnp_desc = dpnp_descriptor(ext_obj)
312+
dpnp_desc = dpnp_descriptor(ext_obj, orig_desc)
308313
if dpnp_desc.is_valid:
309314
return dpnp_desc
310315

dpnp/dpnp_iface_bitwise.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@
6262
def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs):
6363
"""Choose function to call based on input and call chosen fucntion."""
6464

65-
if where is not True:
65+
if kwargs:
66+
pass
67+
elif where is not True:
6668
pass
6769
elif dtype is not None:
6870
pass
@@ -85,7 +87,7 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T
8587
if out is not None:
8688
if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
8789
raise TypeError("return array must be of supported array type")
88-
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
90+
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
8991
else:
9092
out_desc = None
9193

@@ -273,7 +275,7 @@ def invert(x,
273275
if out is not None:
274276
if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
275277
raise TypeError("return array must be of supported array type")
276-
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
278+
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
277279
else:
278280
out_desc = None
279281
return dpnp_invert(x1_desc, out_desc).get_pyobj()

dpnp/dpnp_iface_linearalgebra.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def dot(x1, x2, out=None, **kwargs):
114114
if out is not None:
115115
if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
116116
raise TypeError("return array must be of supported array type")
117-
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
117+
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
118118
else:
119119
out_desc = None
120120
return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj()

dpnp/dpnp_iface_mathematical.py

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,41 @@
9595
]
9696

9797

98+
def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=None, subok=True, **kwargs):
99+
"""Choose function to call based on input and call chosen fucntion."""
100+
101+
if kwargs:
102+
pass
103+
elif where is not True:
104+
pass
105+
elif dtype is not None:
106+
pass
107+
elif subok is not True:
108+
pass
109+
elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
110+
# at least either x1 or x2 has to be an array
111+
pass
112+
else:
113+
# get USM type and queue to copy scalar from the host memory into a USM allocation
114+
usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
115+
116+
x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
117+
alloc_usm_type=usm_type, alloc_queue=queue)
118+
x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
119+
alloc_usm_type=usm_type, alloc_queue=queue)
120+
if x1_desc and x2_desc:
121+
if out is not None:
122+
if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
123+
raise TypeError("return array must be of supported array type")
124+
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None
125+
else:
126+
out_desc = None
127+
128+
return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
129+
130+
return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
131+
132+
98133
def abs(*args, **kwargs):
99134
"""
100135
Calculate the absolute value element-wise.
@@ -1397,34 +1432,7 @@ def power(x1,
13971432
13981433
"""
13991434

1400-
if where is not True:
1401-
pass
1402-
elif dtype is not None:
1403-
pass
1404-
elif subok is not True:
1405-
pass
1406-
elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
1407-
# at least either x1 or x2 has to be an array
1408-
pass
1409-
else:
1410-
# get USM type and queue to copy scalar from the host memory into a USM allocation
1411-
usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
1412-
1413-
x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
1414-
alloc_usm_type=usm_type, alloc_queue=queue)
1415-
x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
1416-
alloc_usm_type=usm_type, alloc_queue=queue)
1417-
if x1_desc and x2_desc:
1418-
if out is not None:
1419-
if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
1420-
raise TypeError("return array must be of supported array type")
1421-
out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
1422-
else:
1423-
out_desc = None
1424-
1425-
return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
1426-
1427-
return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
1435+
return _check_nd_call(numpy.power, dpnp_power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
14281436

14291437

14301438
def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):

dpnp/dpnp_utils/dpnp_algo_utils.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,13 @@ cdef class dpnp_descriptor:
116116

117117
cdef public: # TODO remove "public" as python accessible attribute
118118
object origin_pyobj
119+
dpnp_descriptor origin_desc
119120
dict descriptor
120121
Py_ssize_t dpnp_descriptor_data_size
121122
cpp_bool dpnp_descriptor_is_scalar
122123

123124
cdef void * get_data(self)
125+
cdef cpp_bool match_ctype(self, DPNPFuncType ctype)
124126

125127

126128
cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *

0 commit comments

Comments
 (0)