Skip to content

Refactor queue submission to resolve a hang #377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 72 additions & 86 deletions src/acl_command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,93 +630,60 @@ int acl_update_queue(cl_command_queue command_queue) {
}
}

void acl_try_FastKernelRelaunch_ooo_queue_event_dependents(cl_event parent) {
// Try to submit a kernel even if it has unfinished dependences using fast
// kernel relaunch
// Returns true on success, false on failure
bool acl_fast_relaunch_kernel(cl_event event) {
if (!(event->command_queue->properties &
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
return false;

if (event->depend_on.size() != 1)
return false;

cl_event parent = *(event->depend_on.begin());

if (!(parent->command_queue->properties &
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
return;
if (parent->depend_on_me.empty())
return;
return false;

if (parent->cmd.type != CL_COMMAND_TASK &&
parent->cmd.type != CL_COMMAND_NDRANGE_KERNEL)
return;
return false;

if (parent->execution_status > CL_SUBMITTED ||
parent->last_device_op->status > CL_SUBMITTED)
return;

// Check if fast kernel relaunch is safe to use, and we can ignore
// the explicit dependency
for (auto dependent_it = parent->depend_on_me.begin();
dependent_it != parent->depend_on_me.end(); dependent_it++) {
cl_event dependent = *dependent_it;
// Currently we do not handle the case of FKR for mixed queue types
if (!(dependent->command_queue->properties &
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
continue;
// can only FKR if one unresolved dependency
if (dependent->depend_on.size() > 1)
continue;
// can happen if this function gets called twice on same parent
// once during submission and once during completion
if (dependent->is_on_device_op_queue)
continue;

if (!l_is_same_kernel_event(parent, dependent)) {
// dependent on a different kernel than parent,
// must wait for dependency to be resolved
// OR the dependent is not on the same device,
// not safe to preemptively push dependent to device_op_queue
continue;
}

// Special case: if subbuffers are present they may(!) cause a
// migration while another kernel is using that data.
if (acl_kernel_has_unmapped_subbuffers(
&(dependent->cmd.info.ndrange_kernel.memory_migration))) {
continue;
}

// Fast Kernel Relaunch: submitting is safe even though has dependency
// Prior to submitting remove dependency
int local_updates = acl_submit_command(dependent);
if (local_updates) {
dependent->depend_on.erase(parent);
dependent_it = parent->depend_on_me.erase(dependent_it);
dependent_it--; // decrement it otherwise we will skip an element
dependent->command_queue->num_commands_submitted++;
}
}
return false;

if (!l_is_same_kernel_event(parent, event)) {
// dependent on a different kernel than parent,
// must wait for dependency to be resolved
// OR the dependent is not on the same device,
// not safe to preemptively push dependent to device_op_queue
return false;
}

// Special case: if subbuffers are present they may(!) cause a
// migration while another kernel is using that data.
if (acl_kernel_has_unmapped_subbuffers(
&(event->cmd.info.ndrange_kernel.memory_migration)))
return false;

// Fast Kernel Relaunch: submitting is safe even though has dependency
// If submission succeeds, remove dependency
bool success = acl_submit_command(event);
if (!success)
return false;
event->depend_on.erase(parent);
parent->depend_on_me.remove(event);
return true;
}

int acl_update_ooo_queue(cl_command_queue command_queue) {
int num_updates = 0;

// Directly submit the event if it has no dependencies
// unless it is a user_event queue which never submits events
while (!command_queue->new_commands.empty()) {
int success = 1;
cl_event event = command_queue->new_commands.front();
if (command_queue->submits_commands &&
event->execution_status == CL_QUEUED) {
if (event->depend_on.empty()) {
command_queue->num_commands_submitted++;
success = acl_submit_command(event);
} else {
// This is allowed to fail, so no need to mark success as false
// dependent events that fail to be FKRd will still be picked up when
// their parent event finishes
acl_try_FastKernelRelaunch_ooo_queue_event_dependents(
*(event->depend_on.begin()));
}
}

if (success) {
// safe to pop as there is a master copy in command_queue->commands
command_queue->new_commands.pop_front();
}
}

// Remove dependencies on completed events, and launch any events
// that no longer have dependencies.
// First, remove dependencies on completed events,
// as this may unblock other evevnts
// Completed events should be returned to the free pool
while (!command_queue->completed_commands.empty()) {
cl_event event = command_queue->completed_commands.front();
Expand All @@ -735,16 +702,6 @@ int acl_update_ooo_queue(cl_command_queue command_queue) {
dependent->command_queue->completed_commands.push_back(
dependent); // dependent might be on another queue
}
} else if (dependent->depend_on.empty()) {
// dependent has no dependencies safe to submit if in OOO queue
if ((dependent->command_queue->properties &
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) &&
dependent->cmd.type != CL_COMMAND_USER) {
int local_updates = acl_submit_command(dependent);
dependent->command_queue->num_commands_submitted +=
local_updates; // dependent might be on another queue
num_updates += local_updates;
}
}
}

Expand Down Expand Up @@ -772,10 +729,39 @@ int acl_update_ooo_queue(cl_command_queue command_queue) {
command_queue->commands.erase(event);
}
event->not_popped = false;
num_updates++;
command_queue->num_commands--;
acl_release(command_queue);
}

// Next try to submit any events with no dependencies
// or whose only dependences can be handled by fast kernel relaunch
// unless they are on a user_event queue which never submits events
for (auto event_iter = command_queue->new_commands.begin();
event_iter != command_queue->new_commands.end();) {
cl_event event = *event_iter;
int success = 0;
if (!command_queue->submits_commands)
success = 1;
else {
if (event->depend_on.empty()) {
success = acl_submit_command(event);
} else {
success = acl_fast_relaunch_kernel(event);
}
}

// Increment before removal so we don't invalidate the iterator
event_iter++;
if (success) {
// num_commands_submitted isn't used for ooo queues today
// but keep it up-to-date in case someone wants to use it in the future
command_queue->num_commands_submitted++;
command_queue->new_commands.remove(event);
num_updates++;
}
}

return num_updates;
}

Expand Down
4 changes: 0 additions & 4 deletions src/acl_device_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1474,10 +1474,6 @@ void acl_post_status_to_owning_event(acl_device_op_t *op, int new_status) {
command_queue->num_commands_submitted--;
} else {
event->timestamp[new_status] = op->timestamp[new_status];

if (command_queue->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
acl_try_FastKernelRelaunch_ooo_queue_event_dependents(event);
}
}
}

Expand Down
10 changes: 0 additions & 10 deletions src/acl_event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1078,16 +1078,6 @@ int acl_notify_dependent_events(cl_event event) {
if (event->cmd.type == CL_COMMAND_USER && event->execution_status < 0) {
acl_set_execution_status(dependent, event->execution_status);
}

// Submit the event if it has no dependencies and is partt of an
// Out-of-order queue
if (dependent->command_queue->properties &
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE &&
dependent->depend_on.empty() &&
dependent->cmd.type != CL_COMMAND_USER) {
dependent->command_queue->num_commands_submitted++;
acl_submit_command(dependent);
}
}

int num_updates = static_cast<int>(event->depend_on_me.size());
Expand Down