|
18 | 18 | """
|
19 | 19 |
|
20 | 20 | import ctypes
|
21 |
| -import unittest |
22 | 21 |
|
23 | 22 | import numpy as np
|
| 23 | +import pytest |
24 | 24 |
|
25 | 25 | import dpctl
|
26 | 26 | import dpctl.memory as dpctl_mem
|
|
29 | 29 | from ._helper import has_gpu
|
30 | 30 |
|
31 | 31 |
|
32 |
| -@unittest.skipUnless(has_gpu(), "No OpenCL GPU queues available") |
33 |
| -class Test1DKernelSubmit(unittest.TestCase): |
34 |
| - def test_create_program_from_source(self): |
35 |
| - oclSrc = " \ |
36 |
| - kernel void axpy(global int* a, global int* b, global int* c, int d) { \ |
37 |
| - size_t index = get_global_id(0); \ |
38 |
| - c[index] = d*a[index] + b[index]; \ |
39 |
| - }" |
40 |
| - q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling") |
41 |
| - prog = dpctl_prog.create_program_from_source(q, oclSrc) |
42 |
| - axpyKernel = prog.get_sycl_kernel("axpy") |
| 32 | +@pytest.mark.skipif(not has_gpu(), reason="No OpenCL GPU queues available") |
| 33 | +def test_create_program_from_source(): |
| 34 | + oclSrc = " \ |
| 35 | + kernel void axpy(global int* a, global int* b, global int* c, int d) { \ |
| 36 | + size_t index = get_global_id(0); \ |
| 37 | + c[index] = d*a[index] + b[index]; \ |
| 38 | + }" |
| 39 | + q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling") |
| 40 | + prog = dpctl_prog.create_program_from_source(q, oclSrc) |
| 41 | + axpyKernel = prog.get_sycl_kernel("axpy") |
43 | 42 |
|
44 |
| - n_elems = 1024 * 512 |
45 |
| - bufBytes = n_elems * np.dtype("i").itemsize |
46 |
| - abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) |
47 |
| - bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) |
48 |
| - cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) |
49 |
| - a = np.ndarray((n_elems,), buffer=abuf, dtype="i") |
50 |
| - b = np.ndarray((n_elems,), buffer=bbuf, dtype="i") |
51 |
| - c = np.ndarray((n_elems,), buffer=cbuf, dtype="i") |
52 |
| - a[:] = np.arange(n_elems) |
53 |
| - b[:] = np.arange(n_elems, 0, -1) |
54 |
| - c[:] = 0 |
55 |
| - d = 2 |
56 |
| - args = [] |
| 43 | + n_elems = 1024 * 512 |
| 44 | + bufBytes = n_elems * np.dtype("i").itemsize |
| 45 | + abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) |
| 46 | + bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) |
| 47 | + cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) |
| 48 | + a = np.ndarray((n_elems,), buffer=abuf, dtype="i") |
| 49 | + b = np.ndarray((n_elems,), buffer=bbuf, dtype="i") |
| 50 | + c = np.ndarray((n_elems,), buffer=cbuf, dtype="i") |
| 51 | + a[:] = np.arange(n_elems) |
| 52 | + b[:] = np.arange(n_elems, 0, -1) |
| 53 | + c[:] = 0 |
| 54 | + d = 2 |
| 55 | + args = [] |
57 | 56 |
|
58 |
| - args.append(a.base) |
59 |
| - args.append(b.base) |
60 |
| - args.append(c.base) |
61 |
| - args.append(ctypes.c_int(d)) |
| 57 | + args.append(a.base) |
| 58 | + args.append(b.base) |
| 59 | + args.append(c.base) |
| 60 | + args.append(ctypes.c_int(d)) |
62 | 61 |
|
63 |
| - r = [ |
64 |
| - n_elems, |
65 |
| - ] |
| 62 | + r = [ |
| 63 | + n_elems, |
| 64 | + ] |
66 | 65 |
|
67 |
| - timer = dpctl.SyclTimer() |
68 |
| - with timer(q): |
69 |
| - q.submit(axpyKernel, args, r) |
70 |
| - ref_c = a * d + b |
71 |
| - host_dt, device_dt = timer.dt |
72 |
| - self.assertTrue(host_dt > device_dt) |
73 |
| - self.assertTrue(np.allclose(c, ref_c)) |
74 |
| - |
75 |
| - |
76 |
| -if __name__ == "__main__": |
77 |
| - unittest.main() |
| 66 | + timer = dpctl.SyclTimer() |
| 67 | + with timer(q): |
| 68 | + q.submit(axpyKernel, args, r) |
| 69 | + ref_c = a * d + b |
| 70 | + host_dt, device_dt = timer.dt |
| 71 | + assert host_dt > device_dt |
| 72 | + assert np.allclose(c, ref_c) |
0 commit comments