Skip to content

Fix USM mem blocking free corruption #360

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions include/acl_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,10 @@ typedef struct _cl_event {

int is_on_device_op_queue; // bool whether this event is submitted to DevQ
bool support_profiling; // bool whether this event can be profiled

// Flag to indicate the event is being removed during command/inorder_command
// traversal and the removal should be deferred
bool defer_removal = false;
} _cl_event;

ACL_DECLARE_CL_OBJECT_ALLOC_FUNCTIONS(cl_event);
Expand All @@ -764,6 +768,9 @@ typedef struct _cl_command_queue {
int num_commands; // Number of commands in the queue.
int num_commands_submitted; // Number of events submitted to device_op_queue

// Flag to indicate whether commands/inorder_commands is being traversed
bool waiting_for_events = false;

// Used only for in-order command queues. Owner of all events managed by this
// queue
std::deque<cl_event> inorder_commands;
Expand Down
18 changes: 15 additions & 3 deletions src/acl_command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,13 @@ int acl_update_ooo_queue(cl_command_queue command_queue) {
command_queue->last_barrier = NULL;
}
acl_maybe_delete_event(event);
command_queue->commands.erase(event);
if (command_queue->waiting_for_events) {
// We are in the middle of traversing command_queue->commands, defer
// the removal till later to avoid corruption
event->defer_removal = true;
} else {
command_queue->commands.erase(event);
}
event->not_popped = false;
command_queue->num_commands--;
acl_release(command_queue);
Expand Down Expand Up @@ -820,8 +826,14 @@ int acl_update_inorder_queue(cl_command_queue command_queue) {
// popping it the second time.

// Deleted event. Remove it from our queue.
command_queue->inorder_commands.erase(
command_queue->inorder_commands.begin() + queue_idx);
if (command_queue->waiting_for_events) {
// We are in the middle of traversing command_queue->inorder_commands,
// defer the removal till later to avoid corruption
event->defer_removal = true;
} else {
command_queue->inorder_commands.erase(
command_queue->inorder_commands.begin() + queue_idx);
}
command_queue->num_commands--;
num_updates++;
event->not_popped = false;
Expand Down
90 changes: 57 additions & 33 deletions src/acl_usm.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2020-2021 Intel Corporation

Check warning on line 1 in src/acl_usm.cpp

View workflow job for this annotation

GitHub Actions / build

Coverage

76.8% (-1.1%)
// SPDX-License-Identifier: BSD-3-Clause

// System headers.
Expand Down Expand Up @@ -1224,48 +1224,72 @@

for (int i = 0; i < num_command_queues; i++) {
cl_command_queue current_cq = context->command_queue[i];
// check events in commands
for (const auto &event : current_cq->commands) {
if (event->execution_status != CL_COMPLETE) {
// check if ptr is used by kernels when we submit to queue
if (event->ptr_hashtable.find(ptr) != event->ptr_hashtable.end()) {
clWaitForEvents(1, &event);
}
// check if ptr is used in queues
if ((event->cmd.type == CL_COMMAND_MEMCPY_INTEL) ||
(event->cmd.type == CL_COMMAND_MEMFILL_INTEL)) {
src_usm_alloc = acl_get_usm_alloc_from_ptr(
context, event->cmd.info.usm_xfer.src_ptr);
dst_usm_alloc = acl_get_usm_alloc_from_ptr(
context, event->cmd.info.usm_xfer.dst_ptr);
if ((src_usm_alloc && (src_usm_alloc->range.begin == ptr)) ||
(dst_usm_alloc && (dst_usm_alloc->range.begin == ptr))) {
// Set a flag to indicate the command set of this command queue is being
// traversed, and any event deletion should be deferred
current_cq->waiting_for_events = true;

if (current_cq->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
// Current queue is ooo queue, check events in commands
for (auto it = current_cq->commands.begin();
it != current_cq->commands.end();) {
auto event = *it;
if (event->execution_status != CL_COMPLETE) {
// check if ptr is used by kernels when we submit to queue
if (event->ptr_hashtable.find(ptr) != event->ptr_hashtable.end()) {
clWaitForEvents(1, &event);
}
// check if ptr is used in queues
if ((event->cmd.type == CL_COMMAND_MEMCPY_INTEL) ||
(event->cmd.type == CL_COMMAND_MEMFILL_INTEL)) {
src_usm_alloc = acl_get_usm_alloc_from_ptr(
context, event->cmd.info.usm_xfer.src_ptr);
dst_usm_alloc = acl_get_usm_alloc_from_ptr(
context, event->cmd.info.usm_xfer.dst_ptr);
if ((src_usm_alloc && (src_usm_alloc->range.begin == ptr)) ||
(dst_usm_alloc && (dst_usm_alloc->range.begin == ptr))) {
clWaitForEvents(1, &event);
}
}
}
}
}
// check events in inorder commands
for (const auto &event : current_cq->inorder_commands) {
if (event->execution_status != CL_COMPLETE) {
// check if ptr is used by kernels when we submit to queue
if (event->ptr_hashtable.find(ptr) != event->ptr_hashtable.end()) {
clWaitForEvents(1, &event);
if (event->defer_removal) {
it = current_cq->commands.erase(it);
event->defer_removal = false; // Reset as this event might get reused
} else {
++it;
}
// check if ptr is used in queues
if ((event->cmd.type == CL_COMMAND_MEMCPY_INTEL) ||
(event->cmd.type == CL_COMMAND_MEMFILL_INTEL)) {
src_usm_alloc = acl_get_usm_alloc_from_ptr(
context, event->cmd.info.usm_xfer.src_ptr);
dst_usm_alloc = acl_get_usm_alloc_from_ptr(
context, event->cmd.info.usm_xfer.dst_ptr);
if ((src_usm_alloc && (src_usm_alloc->range.begin == ptr)) ||
(dst_usm_alloc && (dst_usm_alloc->range.begin == ptr))) {
}
} else {
// Current queue is inorder queue, check events in inorder commands
for (auto it = current_cq->inorder_commands.begin();
it != current_cq->inorder_commands.end();) {
auto event = *it;
if (event->execution_status != CL_COMPLETE) {
// check if ptr is used by kernels when we submit to queue
if (event->ptr_hashtable.find(ptr) != event->ptr_hashtable.end()) {
clWaitForEvents(1, &event);
}
// check if ptr is used in queues
if ((event->cmd.type == CL_COMMAND_MEMCPY_INTEL) ||
(event->cmd.type == CL_COMMAND_MEMFILL_INTEL)) {
src_usm_alloc = acl_get_usm_alloc_from_ptr(
context, event->cmd.info.usm_xfer.src_ptr);
dst_usm_alloc = acl_get_usm_alloc_from_ptr(
context, event->cmd.info.usm_xfer.dst_ptr);
if ((src_usm_alloc && (src_usm_alloc->range.begin == ptr)) ||
(dst_usm_alloc && (dst_usm_alloc->range.begin == ptr))) {
clWaitForEvents(1, &event);
}
}
}
if (event->defer_removal) {
it = current_cq->inorder_commands.erase(it);
event->defer_removal = false; // Reset as this event might get reused
} else {
++it;
}
}
}
current_cq->waiting_for_events = false;
}
}

Expand Down
Loading