Skip to content

Commit cc0aee9

Browse files
Add test to check asynchronicity of submit
1 parent af547b3 commit cc0aee9

File tree

1 file changed

+92
-1
lines changed

1 file changed

+92
-1
lines changed

dpctl/tests/test_sycl_kernel_submit.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import dpctl
2626
import dpctl.memory as dpctl_mem
2727
import dpctl.program as dpctl_prog
28+
import dpctl.tensor as dpt
2829

2930

3031
@pytest.mark.parametrize(
@@ -107,4 +108,94 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
107108
ref_c = a * np.array(d, dtype=dtype) + b
108109
host_dt, device_dt = timer.dt
109110
assert type(host_dt) is float and type(device_dt) is float
110-
assert np.allclose(c, ref_c), "Faled for {}, {}".formatg(r, lr)
111+
assert np.allclose(c, ref_c), "Failed for {}, {}".formatg(r, lr)
112+
113+
114+
def test_async_submit():
115+
try:
116+
q = dpctl.SyclQueue("opencl")
117+
except dpctl.SyclQueueCreationError:
118+
pytest.skip("OpenCL queue could not be created")
119+
oclSrc = (
120+
"kernel void kern1(global unsigned int *res, unsigned int mod) {"
121+
" size_t index = get_global_id(0);"
122+
" int ri = (index % mod);"
123+
" res[index] = (ri * ri) % mod;"
124+
"}"
125+
" "
126+
"kernel void kern2(global unsigned int *res, unsigned int mod) {"
127+
" size_t index = get_global_id(0);"
128+
" int ri = (index % mod);"
129+
" int ri2 = (ri * ri) % mod;"
130+
" res[index] = (ri2 * ri) % mod;"
131+
"}"
132+
" "
133+
"kernel void kern3("
134+
" global unsigned int *res, global unsigned int *arg1, "
135+
" global unsigned int *arg2)"
136+
"{"
137+
" size_t index = get_global_id(0);"
138+
" res[index] = "
139+
" (arg1[index] < arg2[index]) ? arg1[index] : arg2[index];"
140+
"}"
141+
)
142+
prog = dpctl_prog.create_program_from_source(q, oclSrc)
143+
kern1Kernel = prog.get_sycl_kernel("kern1")
144+
kern2Kernel = prog.get_sycl_kernel("kern2")
145+
kern3Kernel = prog.get_sycl_kernel("kern3")
146+
147+
assert isinstance(kern1Kernel, dpctl_prog.SyclKernel)
148+
assert isinstance(kern2Kernel, dpctl_prog.SyclKernel)
149+
assert isinstance(kern2Kernel, dpctl_prog.SyclKernel)
150+
151+
n = 1024 * 512
152+
X = dpt.empty((3, n), dtype="u4", usm_type="device", sycl_queue=q)
153+
first_row = dpctl_mem.as_usm_memory(X[0])
154+
second_row = dpctl_mem.as_usm_memory(X[1])
155+
third_row = dpctl_mem.as_usm_memory(X[2])
156+
157+
e1 = q.submit(
158+
kern1Kernel,
159+
[
160+
first_row,
161+
ctypes.c_uint(17),
162+
],
163+
[
164+
n,
165+
],
166+
)
167+
e2 = q.submit(
168+
kern2Kernel,
169+
[
170+
second_row,
171+
ctypes.c_uint(27),
172+
],
173+
[
174+
n,
175+
],
176+
)
177+
e3 = q.submit(
178+
kern3Kernel,
179+
[third_row, first_row, second_row],
180+
[
181+
n,
182+
],
183+
None,
184+
[e1, e2],
185+
)
186+
status_complete = dpctl.event_status_type.complete
187+
assert status_complete not in (
188+
e1.execution_status,
189+
e2.execution_status,
190+
e3.execution_status,
191+
)
192+
193+
e3.wait()
194+
Xnp = dpt.asnumpy(X)
195+
Xref = np.empty((3, n), dtype="u4")
196+
for i in range(n):
197+
Xref[0, i] = (i * i) % 17
198+
Xref[1, i] = (i * i * i) % 27
199+
Xref[2, i] = min(Xref[0, i], Xref[1, i])
200+
201+
assert np.array_equal(Xnp, Xref)

0 commit comments

Comments
 (0)