Skip to content

Commit c860b1f

Browse files
Use USMHost as underlying buffer for result
This should improver transfer timing per GPU optimization guidelines
1 parent dca9722 commit c860b1f

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

dpctl/tensor/_copy_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ def _has_memory_overlap(x1, x2):
4646
def _copy_to_numpy(ary):
4747
if not isinstance(ary, dpt.usm_ndarray):
4848
raise TypeError
49-
h = ary.usm_data.copy_to_host().view(ary.dtype)
49+
nb = ary.usm_data.nbytes
50+
hh = dpm.MemoryUSMHost(nb, queue=ary.sycl_queue)
51+
hh.copy_from_device(ary.usm_data)
52+
h = np.ndarray(nb, dtype="u1", buffer=hh).view(ary.dtype)
5053
itsz = ary.itemsize
5154
strides_bytes = tuple(si * itsz for si in ary.strides)
5255
offset = ary.__sycl_usm_array_interface__.get("offset", 0) * itsz

0 commit comments

Comments
 (0)