Skip to content

Moved sycl timer into dpctl.SyclTimer #555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dpctl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
set_global_queue,
)

from ._sycl_timer import SyclTimer
from ._version import get_versions
from .enum_types import backend_type, device_type, event_status_type

Expand All @@ -88,7 +89,7 @@
]
__all__ += [
"SyclEvent",
"SyclEventRaw",
"SyclTimer",
]
__all__ += [
"get_platforms",
Expand Down
55 changes: 47 additions & 8 deletions examples/python/sycl_timer.py → dpctl/_sycl_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,65 @@

import timeit

import dpctl
from . import SyclQueue


class SyclTimer:
def __init__(self, host_time=timeit.default_timer, time_scale=1):
self.timer = host_time
"""
SyclTimer(host_timer=timeit.default_timer, time_scale=1)
Python class to measure device time of execution of commands submitted to
:class:`dpctl.SyclQueue` as well as the wall-time.

:Example:
.. code-block:: python

import dpctl

# Create a default SyclQueue
q = dpctl.SyclQueue(property='enable_profiling')

# create the timer
miliseconds_sc = 1e-3
timer = dpctl.SyclTimer(time_scale = miliseconds_sc)

# use the timer
with timer(queue=q):
code_block

# retrieve elapsed times in miliseconds
sycl_dt, wall_dt = timer.dt

Remark:
The timer synchronizes the queue at the entrance and the
exit of the context.

Args:
host_timer (callable): A callable such that host_timer() returns current
host time in seconds.
time_scale (int, float): Ratio of the unit of time of interest and
one second.
"""

def __init__(self, host_timer=timeit.default_timer, time_scale=1):
self.timer = host_timer
self.time_scale = time_scale
self.queue = None

def __call__(self, queue=None):
if isinstance(queue, dpctl.SyclQueue):
if isinstance(queue, SyclQueue):
if queue.has_enable_profiling:
self.queue = queue
else:
raise ValueError(
"The queue does not contain the enable_profiling property"
"The given queue was not created with the "
"enable_profiling property"
)
else:
raise ValueError(
"The passed queue must be <class 'dpctl._sycl_queue.SyclQueue'>"
raise TypeError(
"The passed queue must have type dpctl.SyclQueue, "
"got {}".format(type(queue))
)
return self.__enter__()
return self

def __enter__(self):
self.event_start = self.queue.submit_barrier()
Expand All @@ -48,6 +86,7 @@ def __exit__(self, *args):
self.event_finish = self.queue.submit_barrier()
self.host_finish = self.timer()

@property
def dt(self):
self.event_start.wait()
self.event_finish.wait()
Expand Down
23 changes: 23 additions & 0 deletions dpctl/tests/test_sycl_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,26 @@ def test_profiling_info():
assert event.profiling_info_end
else:
pytest.skip("No OpenCL CPU queues available")


def test_sycl_timer():
try:
q = dpctl.SyclQueue(property="enable_profiling")
except dpctl.SyclQueueCreationError:
pytest.skip("Queue creation of default device failed")
timer = dpctl.SyclTimer()
m1 = dpctl_mem.MemoryUSMDevice(256 * 1024, queue=q)
m2 = dpctl_mem.MemoryUSMDevice(256 * 1024, queue=q)
with timer(q):
# device task
m1.copy_from_device(m2)
# host task
[x ** 2 for x in range(1024)]
host_dt, device_dt = timer.dt
assert host_dt > device_dt
q_no_profiling = dpctl.SyclQueue()
assert q_no_profiling.has_enable_profiling is False
with pytest.raises(ValueError):
timer(queue=q_no_profiling)
with pytest.raises(TypeError):
timer(queue=None)
28 changes: 18 additions & 10 deletions dpctl/tests/test_sycl_kernel_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,20 @@ def test_create_program_from_source(self):
size_t index = get_global_id(0); \
c[index] = d*a[index] + b[index]; \
}"
q = dpctl.SyclQueue("opencl:gpu")
q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling")
prog = dpctl_prog.create_program_from_source(q, oclSrc)
axpyKernel = prog.get_sycl_kernel("axpy")

bufBytes = 1024 * np.dtype("i").itemsize
n_elems = 1024 * 512
bufBytes = n_elems * np.dtype("i").itemsize
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
a = np.ndarray((1024), buffer=abuf, dtype="i")
b = np.ndarray((1024), buffer=bbuf, dtype="i")
c = np.ndarray((1024), buffer=cbuf, dtype="i")
a[:] = np.arange(1024)
b[:] = np.arange(1024, 0, -1)
a = np.ndarray((n_elems,), buffer=abuf, dtype="i")
b = np.ndarray((n_elems,), buffer=bbuf, dtype="i")
c = np.ndarray((n_elems,), buffer=cbuf, dtype="i")
a[:] = np.arange(n_elems)
b[:] = np.arange(n_elems, 0, -1)
c[:] = 0
d = 2
args = []
Expand All @@ -59,10 +60,17 @@ def test_create_program_from_source(self):
args.append(c.base)
args.append(ctypes.c_int(d))

r = [1024]
r = [
n_elems,
]

q.submit(axpyKernel, args, r)
self.assertTrue(np.allclose(c, a * d + b))
timer = dpctl.SyclTimer()
with timer(q):
q.submit(axpyKernel, args, r)
ref_c = a * d + b
host_dt, device_dt = timer.dt
self.assertTrue(host_dt > device_dt)
self.assertTrue(np.allclose(c, ref_c))


if __name__ == "__main__":
Expand Down
13 changes: 7 additions & 6 deletions examples/python/dppy_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

import numba_dppy
import numpy as np
from sycl_timer import SyclTimer

import dpctl
from dpctl import SyclTimer


@numba_dppy.kernel
Expand All @@ -45,11 +45,12 @@ def dppy_gemm(a, b, c):
c = np.ones_like(a).reshape(X, X)

q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling")
timer = SyclTimer(time_scale=1)
with dpctl.device_context(q):
timers = SyclTimer(time_scale=1)
with timers(q):
with timer(q):
dppy_gemm[griddim, blockdim](a, b, c)
cc = np.dot(a, b)
host_time, device_time = timers.dt()
print("Wall time: ", host_time, "\n", "Device time: ", device_time)
print(np.allclose(c, cc))
host_time, device_time = timer.dt

print("Wall time: ", host_time, "\nDevice time: ", device_time)
print(np.allclose(c, cc))