Skip to content

Boolean indexing: extract, place, nonzero #1097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dpctl/tensor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pybind11_add_module(${python_module_name} MODULE
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_for_reshape.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/integer_advanced_indexing.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/boolean_advanced_indexing.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/eye_ctor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/full_ctor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/triul_ctor.cpp
Expand Down
5 changes: 4 additions & 1 deletion dpctl/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
)
from dpctl.tensor._device import Device
from dpctl.tensor._dlpack import from_dlpack
from dpctl.tensor._indexing_functions import put, take
from dpctl.tensor._indexing_functions import extract, nonzero, place, put, take
from dpctl.tensor._manipulation_functions import (
broadcast_arrays,
broadcast_to,
Expand Down Expand Up @@ -115,6 +115,9 @@
"squeeze",
"take",
"put",
"extract",
"place",
"nonzero",
"from_numpy",
"to_numpy",
"asnumpy",
Expand Down
147 changes: 100 additions & 47 deletions dpctl/tensor/_copy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,45 +389,75 @@ def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True):
return R


def _mock_extract(ary, ary_mask, p):
exec_q = dpctl.utils.get_execution_queue(
(
ary.sycl_queue,
ary_mask.sycl_queue,
def _extract_impl(ary, ary_mask, axis=0):
"""Extract elements of ary by applying mask starting from slot
dimension axis"""
if not isinstance(ary, dpt.usm_ndarray):
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
)
if not isinstance(ary_mask, dpt.usm_ndarray):
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary_mask)}"
)
exec_q = dpctl.utils.get_execution_queue(
(ary.sycl_queue, ary_mask.sycl_queue)
)
if exec_q is None:
raise dpctl.utils.ExecutionPlacementError(
"Can not automatically determine where to allocate the "
"result or performance execution. "
"Use `usm_ndarray.to_device` method to migrate data to "
"be associated with the same queue."
"arrays have different associated queues. "
"Use `Y.to_device(X.device)` to migrate."
)

res_usm_type = dpctl.utils.get_coerced_usm_type(
(
ary.usm_type,
ary_mask.usm_type,
ary_nd = ary.ndim
pp = normalize_axis_index(operator.index(axis), ary_nd)
mask_nd = ary_mask.ndim
if pp < 0 or pp + mask_nd > ary_nd:
raise ValueError(
"Parameter p is inconsistent with input array dimensions"
)
mask_nelems = ary_mask.size
cumsum = dpt.empty(mask_nelems, dtype=dpt.int64, device=ary_mask.device)
exec_q = cumsum.sycl_queue
mask_count = ti.mask_positions(ary_mask, cumsum, sycl_queue=exec_q)
dst_shape = ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
dst = dpt.empty(
dst_shape, dtype=ary.dtype, usm_type=ary.usm_type, device=ary.device
)
ary_np = dpt.asnumpy(ary)
mask_np = dpt.asnumpy(ary_mask)
res_np = ary_np[(slice(None),) * p + (mask_np,)]
res = dpt.empty(
res_np.shape, dtype=ary.dtype, usm_type=res_usm_type, sycl_queue=exec_q
hev, _ = ti._extract(
src=ary,
cumsum=cumsum,
axis_start=pp,
axis_end=pp + mask_nd,
dst=dst,
sycl_queue=exec_q,
)
res[...] = res_np
return res
hev.wait()
return dst


def _mock_nonzero(ary):
def _nonzero_impl(ary):
if not isinstance(ary, dpt.usm_ndarray):
raise TypeError
q = ary.sycl_queue
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
)
exec_q = ary.sycl_queue
usm_type = ary.usm_type
ary_np = dpt.asnumpy(ary)
nz = ary_np.nonzero()
return tuple(dpt.asarray(i, usm_type=usm_type, sycl_queue=q) for i in nz)
mask_nelems = ary.size
cumsum = dpt.empty(
mask_nelems, dtype=dpt.int64, sycl_queue=exec_q, order="C"
)
mask_count = ti.mask_positions(ary, cumsum, sycl_queue=exec_q)
indexes = dpt.empty(
(ary.ndim, mask_count),
dtype=cumsum.dtype,
usm_type=usm_type,
sycl_queue=exec_q,
order="C",
)
hev, _ = ti._nonzero(cumsum, indexes, ary.shape, exec_q)
res = tuple(indexes[i, :] for i in range(ary.ndim))
hev.wait()
return res


def _take_multi_index(ary, inds, p):
Expand Down Expand Up @@ -473,34 +503,57 @@ def _take_multi_index(ary, inds, p):
return res


def _mock_place(ary, ary_mask, p, vals):
def _place_impl(ary, ary_mask, vals, axis=0):
"""Extract elements of ary by applying mask starting from slot
dimension axis"""
if not isinstance(ary, dpt.usm_ndarray):
raise TypeError
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
)
if not isinstance(ary_mask, dpt.usm_ndarray):
raise TypeError
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary_mask)}"
)
if not isinstance(vals, dpt.usm_ndarray):
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary_mask)}"
)
exec_q = dpctl.utils.get_execution_queue(
(ary.sycl_queue, ary_mask.sycl_queue)
(ary.sycl_queue, ary_mask.sycl_queue, vals.sycl_queue)
)
if exec_q is not None and isinstance(vals, dpt.usm_ndarray):
exec_q = dpctl.utils.get_execution_queue((exec_q, vals.sycl_queue))
if exec_q is None:
raise dpctl.utils.ExecutionPlacementError(
"Can not automatically determine where to allocate the "
"result or performance execution. "
"Use `usm_ndarray.to_device` method to migrate data to "
"be associated with the same queue."
"arrays have different associated queues. "
"Use `Y.to_device(X.device)` to migrate."
)

ary_np = dpt.asnumpy(ary)
mask_np = dpt.asnumpy(ary_mask)
if isinstance(vals, dpt.usm_ndarray) or hasattr(
vals, "__sycl_usm_array_interface__"
):
vals_np = dpt.asnumpy(vals)
ary_nd = ary.ndim
pp = normalize_axis_index(operator.index(axis), ary_nd)
mask_nd = ary_mask.ndim
if pp < 0 or pp + mask_nd > ary_nd:
raise ValueError(
"Parameter p is inconsistent with input array dimensions"
)
mask_nelems = ary_mask.size
cumsum = dpt.empty(mask_nelems, dtype=dpt.int64, device=ary_mask.device)
exec_q = cumsum.sycl_queue
mask_count = ti.mask_positions(ary_mask, cumsum, sycl_queue=exec_q)
expected_vals_shape = (
ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
)
if vals.dtype == ary.dtype:
rhs = vals
else:
vals_np = vals
ary_np[(slice(None),) * p + (mask_np,)] = vals_np
ary[...] = ary_np
rhs = dpt.astype(vals, ary.dtype)
rhs = dpt.broadcast_to(rhs, expected_vals_shape)
hev, _ = ti._place(
dst=ary,
cumsum=cumsum,
axis_start=pp,
axis_end=pp + mask_nd,
rhs=rhs,
sycl_queue=exec_q,
)
hev.wait()
return


Expand Down
139 changes: 136 additions & 3 deletions dpctl/tensor/_indexing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

import dpctl
import dpctl.tensor as dpt
from dpctl.tensor._tensor_impl import _put, _take
import dpctl.tensor._tensor_impl as ti

from ._copy_utils import _extract_impl, _nonzero_impl


def take(x, indices, /, *, axis=None, mode="clip"):
Expand Down Expand Up @@ -93,7 +95,7 @@ def take(x, indices, /, *, axis=None, mode="clip"):
res_shape, dtype=x.dtype, usm_type=res_usm_type, sycl_queue=exec_q
)

hev, _ = _take(x, indices, res, axis, mode, sycl_queue=exec_q)
hev, _ = ti._take(x, indices, res, axis, mode, sycl_queue=exec_q)
hev.wait()

return res
Expand Down Expand Up @@ -173,5 +175,136 @@ def put(x, indices, vals, /, *, axis=None, mode="clip"):

vals = dpt.broadcast_to(vals, val_shape)

hev, _ = _put(x, indices, vals, axis, mode, sycl_queue=exec_q)
hev, _ = ti._put(x, indices, vals, axis, mode, sycl_queue=exec_q)
hev.wait()


def extract(condition, arr):
"""extract(condition, arr)

Returns the elements of an array that satisfies the condition.

If `condition` is boolean :func:``dpctl.tensor.extract`` is
equivalent to ``arr[condition]``.

Note that :func:``dpctl.tensor.place`` does the opposite of
:func:``dpctl.tensor.extract``.

Args:
conditions: usm_ndarray
An array whose non-zero or True entries indicate the element
of `arr` to extract.
arr: usm_ndarray
Input array of the same size as `condition`.

Returns:
extract: usm_ndarray
Rank 1 array of values from `arr` where `condition` is True.
"""
if not isinstance(condition, dpt.usm_ndarray):
raise TypeError(
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(condition)}"
)
if not isinstance(arr, dpt.usm_ndarray):
raise TypeError(
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}"
)
exec_q = dpctl.utils.get_execution_queue(
(
condition.sycl_queue,
arr.sycl_queue,
)
)
if exec_q is None:
raise dpctl.utils.ExecutionPlacementError
if condition.shape != arr.shape:
raise ValueError("Arrays are not of the same size")
return _extract_impl(arr, condition)


def place(arr, mask, vals):
"""place(arr, mask, vals)

Change elements of an array based on conditional and input values.

If `mask` is boolean :func:``dpctl.tensor.place`` is
equivalent to ``arr[condition] = vals``.

Args:
arr: usm_ndarray
Array to put data into.
mask: usm_ndarray
Boolean mask array. Must have the same size as `arr`.
vals: usm_ndarray
Values to put into `arr`. Only the first N elements are
used, where N is the number of True values in `mask`. If
`vals` is smaller than N, it will be repeated, and if
elements of `arr` are to be masked, this sequence must be
non-empty. Array `vals` must be one dimensional.
"""
if not isinstance(arr, dpt.usm_ndarray):
raise TypeError(
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}"
)
if not isinstance(mask, dpt.usm_ndarray):
raise TypeError(
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(mask)}"
)
if not isinstance(vals, dpt.usm_ndarray):
raise TypeError(
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(vals)}"
)
exec_q = dpctl.utils.get_execution_queue(
(
arr.sycl_queue,
mask.sycl_queue,
vals.sycl_queue,
)
)
if exec_q is None:
raise dpctl.utils.ExecutionPlacementError
if arr.shape != mask.shape or vals.ndim != 1:
raise ValueError("Array sizes are not as required")
cumsum = dpt.empty(mask.size, dtype="i8", sycl_queue=exec_q)
nz_count = ti.mask_positions(mask, cumsum, sycl_queue=exec_q)
if nz_count == 0:
return
if vals.dtype == arr.dtype:
rhs = vals
else:
rhs = dpt.astype(vals, arr.dtype)
hev, _ = ti._place(
dst=arr,
cumsum=cumsum,
axis_start=0,
axis_end=mask.ndim,
rhs=rhs,
sycl_queue=exec_q,
)
hev.wait()


def nonzero(arr):
"""nonzero(arr)

Return the indices of non-zero elements.

Returns the tuple of usm_narrays, one for each dimension
of `arr`, containing the indices of the non-zero elements
in that dimension. The values of `arr` are always tested in
row-major, C-style order.

Args:
arr: usm_ndarray
Input array, which has non-zero array rank.
Returns:
tuple_of_usm_ndarrays: tuple
Indices of non-zero array elements.
"""
if not isinstance(arr, dpt.usm_ndarray):
raise TypeError(
"Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}"
)
if arr.ndim == 0:
raise ValueError("Array of positive rank is exepcted")
return _nonzero_impl(arr)
Loading