Skip to content

Commit aa05627

Browse files
authored
[SYCL][CUDA] Remove pi Event Callback implementation (#1735)
Since introduction of host tasks in #1471, `piEventCallback` and related functionality is not required by the SYCL-RT. Removing the implementation of this behaviour from the CUDA backend simplifies the submission of operations to streams and overall increases performance. Signed-off-by: Ruyman Reyes <ruyman@codeplay.com>
1 parent 08f8656 commit aa05627

File tree

7 files changed

+41
-704
lines changed

7 files changed

+41
-704
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 33 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -276,13 +276,13 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue)
276276
isStarted_{false}, evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr},
277277
queue_{queue}, context_{context} {
278278

279-
if (is_native_event()) {
280-
PI_CHECK_ERROR(cuEventCreate(&evEnd_, CU_EVENT_DEFAULT));
279+
assert(type != PI_COMMAND_TYPE_USER);
281280

282-
if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
283-
PI_CHECK_ERROR(cuEventCreate(&evQueued_, CU_EVENT_DEFAULT));
284-
PI_CHECK_ERROR(cuEventCreate(&evStart_, CU_EVENT_DEFAULT));
285-
}
281+
PI_CHECK_ERROR(cuEventCreate(&evEnd_, CU_EVENT_DEFAULT));
282+
283+
if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
284+
PI_CHECK_ERROR(cuEventCreate(&evQueued_, CU_EVENT_DEFAULT));
285+
PI_CHECK_ERROR(cuEventCreate(&evStart_, CU_EVENT_DEFAULT));
286286
}
287287

288288
if (queue_ != nullptr) {
@@ -303,7 +303,7 @@ pi_result _pi_event::start() {
303303
pi_result result;
304304

305305
try {
306-
if (is_native_event() && queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
306+
if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
307307
// NOTE: This relies on the default stream to be unused.
308308
result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0));
309309
result = PI_CHECK_ERROR(cuEventRecord(evStart_, queue_->get()));
@@ -313,8 +313,6 @@ pi_result _pi_event::start() {
313313
}
314314

315315
isStarted_ = true;
316-
// let observers know that the event is "submitted"
317-
trigger_callback(get_execution_status());
318316
return result;
319317
}
320318

@@ -351,37 +349,16 @@ pi_result _pi_event::record() {
351349

352350
pi_result result = PI_INVALID_OPERATION;
353351

354-
if (is_native_event()) {
355-
356-
if (!queue_) {
357-
return PI_INVALID_QUEUE;
358-
}
359-
360-
CUstream cuStream = queue_->get();
352+
if (!queue_) {
353+
return PI_INVALID_QUEUE;
354+
}
361355

362-
try {
363-
result = PI_CHECK_ERROR(cuEventRecord(evEnd_, cuStream));
356+
CUstream cuStream = queue_->get();
364357

365-
result = cuda_piEventRetain(this);
366-
try {
367-
result = PI_CHECK_ERROR(cuLaunchHostFunc(
368-
cuStream,
369-
[](void *userData) {
370-
pi_event event = reinterpret_cast<pi_event>(userData);
371-
event->set_event_complete();
372-
cuda_piEventRelease(event);
373-
},
374-
this));
375-
} catch (...) {
376-
// If host function fails to enqueue we must release the event here
377-
result = cuda_piEventRelease(this);
378-
throw;
379-
}
380-
} catch (pi_result error) {
381-
result = error;
382-
}
383-
} else {
384-
result = PI_SUCCESS;
358+
try {
359+
result = PI_CHECK_ERROR(cuEventRecord(evEnd_, cuStream));
360+
} catch (pi_result error) {
361+
result = error;
385362
}
386363

387364
if (result == PI_SUCCESS) {
@@ -392,65 +369,23 @@ pi_result _pi_event::record() {
392369
}
393370

394371
pi_result _pi_event::wait() {
395-
396372
pi_result retErr;
397-
if (is_native_event()) {
398-
try {
399-
retErr = PI_CHECK_ERROR(cuEventSynchronize(evEnd_));
400-
isCompleted_ = true;
401-
} catch (pi_result error) {
402-
retErr = error;
403-
}
404-
} else {
405-
406-
while (!is_completed()) {
407-
// wait for user event to complete
408-
}
409-
retErr = PI_SUCCESS;
373+
try {
374+
retErr = PI_CHECK_ERROR(cuEventSynchronize(evEnd_));
375+
isCompleted_ = true;
376+
} catch (pi_result error) {
377+
retErr = error;
410378
}
411379

412-
auto is_success = retErr == PI_SUCCESS;
413-
auto status = is_success ? get_execution_status() : pi_int32(retErr);
414-
415-
trigger_callback(status);
416-
417380
return retErr;
418381
}
419382

420383
// makes all future work submitted to queue wait for all work captured in event.
421384
pi_result enqueueEventWait(pi_queue queue, pi_event event) {
422-
if (event->is_native_event()) {
423-
424-
// for native events, the cuStreamWaitEvent call is used.
425-
// This makes all future work submitted to stream wait for all
426-
// work captured in event.
427-
428-
return PI_CHECK_ERROR(cuStreamWaitEvent(queue->get(), event->get(), 0));
429-
430-
} else {
431-
432-
// for user events, we enqueue a callback. When invoked, the
433-
// callback will block until the user event is marked as
434-
// completed.
435-
436-
static auto user_wait_func = [](void *user_data) {
437-
// The host function must not make any CUDA API calls.
438-
auto event = static_cast<pi_event>(user_data);
439-
440-
// busy wait for user event to complete
441-
event->wait();
442-
443-
// this function does not need the event to be kept alive
444-
// anymore
445-
cuda_piEventRelease(event);
446-
};
447-
448-
// retain event to ensure it is still alive when the
449-
// user_wait_func callback is invoked
450-
cuda_piEventRetain(event);
451-
452-
return PI_CHECK_ERROR(cuLaunchHostFunc(queue->get(), user_wait_func, event));
453-
}
385+
// for native events, the cuStreamWaitEvent call is used.
386+
// This makes all future work submitted to stream wait for all
387+
// work captured in event.
388+
return PI_CHECK_ERROR(cuStreamWaitEvent(queue->get(), event->get(), 0));
454389
}
455390

456391
_pi_program::_pi_program(pi_context ctxt)
@@ -2685,24 +2620,7 @@ pi_result cuda_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index,
26852620
// Events
26862621
//
26872622
pi_result cuda_piEventCreate(pi_context context, pi_event *event) {
2688-
assert(context != nullptr);
2689-
assert(event != nullptr);
2690-
pi_result retErr = PI_SUCCESS;
2691-
pi_event retEvent = nullptr;
2692-
2693-
try {
2694-
retEvent = _pi_event::make_user(context);
2695-
if (retEvent == nullptr) {
2696-
retErr = PI_OUT_OF_HOST_MEMORY;
2697-
}
2698-
} catch (pi_result err) {
2699-
retErr = err;
2700-
} catch (...) {
2701-
retErr = PI_OUT_OF_RESOURCES;
2702-
}
2703-
2704-
*event = retEvent;
2705-
return retErr;
2623+
cl::sycl::detail::pi::die("PI Event Create not implemented in CUDA backend");
27062624
}
27072625

27082626
pi_result cuda_piEventGetInfo(pi_event event, pi_event_info param_name,
@@ -2766,37 +2684,13 @@ pi_result cuda_piEventSetCallback(pi_event event,
27662684
pi_int32 command_exec_callback_type,
27672685
pfn_notify notify, void *user_data) {
27682686

2769-
assert(event);
2770-
assert(notify);
2771-
assert(command_exec_callback_type == PI_EVENT_SUBMITTED ||
2772-
command_exec_callback_type == PI_EVENT_RUNNING ||
2773-
command_exec_callback_type == PI_EVENT_COMPLETE);
2774-
event_callback callback(pi_event_status(command_exec_callback_type), notify,
2775-
user_data);
2776-
2777-
event->set_event_callback(callback);
2778-
2687+
cl::sycl::detail::pi::die("Event Callback not implemented in CUDA backend");
27792688
return PI_SUCCESS;
27802689
}
27812690

27822691
pi_result cuda_piEventSetStatus(pi_event event, pi_int32 execution_status) {
27832692

2784-
assert(execution_status >= PI_EVENT_COMPLETE &&
2785-
execution_status <= PI_EVENT_QUEUED);
2786-
2787-
if (!event || event->is_native_event()) {
2788-
return PI_INVALID_EVENT;
2789-
}
2790-
2791-
if (execution_status == PI_EVENT_COMPLETE) {
2792-
return event->set_event_complete();
2793-
} else if (execution_status < 0) {
2794-
// TODO: A negative integer value causes all enqueued commands that wait
2795-
// on this user event to be terminated.
2796-
cl::sycl::detail::pi::die("cuda_piEventSetStatus support for negative execution_status not "
2797-
"implemented.");
2798-
}
2799-
2693+
cl::sycl::detail::pi::die("Event Set Status not implemented in CUDA backend");
28002694
return PI_INVALID_VALUE;
28012695
}
28022696

@@ -2824,19 +2718,13 @@ pi_result cuda_piEventRelease(pi_event event) {
28242718
if (event->decrement_reference_count() == 0) {
28252719
std::unique_ptr<_pi_event> event_ptr{event};
28262720
pi_result result = PI_INVALID_EVENT;
2827-
2828-
if (event->is_native_event()) {
2829-
try {
2830-
ScopedContext active(event->get_context());
2831-
auto cuEvent = event->get();
2832-
result = PI_CHECK_ERROR(cuEventDestroy(cuEvent));
2833-
} catch (...) {
2834-
result = PI_OUT_OF_RESOURCES;
2835-
}
2836-
} else {
2837-
result = PI_SUCCESS;
2721+
try {
2722+
ScopedContext active(event->get_context());
2723+
auto cuEvent = event->get();
2724+
result = PI_CHECK_ERROR(cuEventDestroy(cuEvent));
2725+
} catch (...) {
2726+
result = PI_OUT_OF_RESOURCES;
28382727
}
2839-
28402728
return result;
28412729
}
28422730

@@ -2891,9 +2779,6 @@ pi_result cuda_piEnqueueEventsWait(pi_queue command_queue,
28912779
/// \return PI_SUCCESS on success. PI_INVALID_EVENT if given a user event.
28922780
pi_result cuda_piextEventGetNativeHandle(pi_event event,
28932781
pi_native_handle *nativeHandle) {
2894-
if (event->is_user_event()) {
2895-
return PI_INVALID_EVENT;
2896-
}
28972782
*nativeHandle = reinterpret_cast<pi_native_handle>(event->get());
28982783
return PI_SUCCESS;
28992784
}

0 commit comments

Comments
 (0)