Skip to content

Commit 104e439

Browse files
[SYCL] Fix memory dependency leaks caused by failed kernel enqueue (#5120)
If a kernel enqueue fails the runtime will immediately try and clean it up. However, if it has any dependencies or users the cleanup will be skipped. This can cause the dependencies to stay alive and leak. These changes forces a full sub-graph cleanup of the command if enqueuing failed. Additionally, sub-graph cleanup is changed to account for failed kernel enqueues and will remove the failed command from its leaves.
1 parent d38b599 commit 104e439

File tree

6 files changed

+291
-48
lines changed

6 files changed

+291
-48
lines changed

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -234,13 +234,7 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
234234

235235
void Scheduler::GraphBuilder::updateLeaves(const std::set<Command *> &Cmds,
236236
MemObjRecord *Record,
237-
access::mode AccessMode,
238237
std::vector<Command *> &ToCleanUp) {
239-
240-
const bool ReadOnlyReq = AccessMode == access::mode::read;
241-
if (ReadOnlyReq)
242-
return;
243-
244238
for (Command *Cmd : Cmds) {
245239
bool WasLeaf = Cmd->MLeafCounter > 0;
246240
Cmd->MLeafCounter -= Record->MReadLeaves.remove(Cmd);
@@ -252,6 +246,18 @@ void Scheduler::GraphBuilder::updateLeaves(const std::set<Command *> &Cmds,
252246
}
253247
}
254248

249+
void Scheduler::GraphBuilder::updateLeaves(const std::set<Command *> &Cmds,
250+
MemObjRecord *Record,
251+
access::mode AccessMode,
252+
std::vector<Command *> &ToCleanUp) {
253+
254+
const bool ReadOnlyReq = AccessMode == access::mode::read;
255+
if (ReadOnlyReq)
256+
return;
257+
258+
updateLeaves(Cmds, Record, ToCleanUp);
259+
}
260+
255261
void Scheduler::GraphBuilder::addNodeToLeaves(
256262
MemObjRecord *Record, Command *Cmd, access::mode AccessMode,
257263
std::vector<Command *> &ToEnqueue) {
@@ -1253,6 +1259,60 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands(
12531259
handleVisitedNodes(MVisitedCmds);
12541260
}
12551261

1262+
void Scheduler::GraphBuilder::cleanupFailedCommand(
1263+
Command *FailedCmd,
1264+
std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>>
1265+
&StreamsToDeallocate,
1266+
std::vector<Command *> &ToCleanUp) {
1267+
1268+
// If the failed command has no users and no dependencies, there is no reason
1269+
// to replace it with an empty command.
1270+
if (FailedCmd->MDeps.size() == 0 && FailedCmd->MUsers.size() == 0)
1271+
return;
1272+
1273+
// Create empty command that is "ready" for enqueuing.
1274+
EmptyCommand *EmptyCmd = new EmptyCommand(FailedCmd->getQueue());
1275+
if (!EmptyCmd)
1276+
throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY);
1277+
EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
1278+
1279+
// Collect stream objects for the failed command.
1280+
if (FailedCmd->getType() == Command::CommandType::RUN_CG) {
1281+
auto ExecCmd = static_cast<ExecCGCommand *>(FailedCmd);
1282+
std::vector<std::shared_ptr<stream_impl>> Streams = ExecCmd->getStreams();
1283+
ExecCmd->clearStreams();
1284+
StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(),
1285+
Streams.end());
1286+
}
1287+
1288+
for (DepDesc &Dep : FailedCmd->MDeps) {
1289+
// Replace failed command in dependency records.
1290+
const Requirement *Req = Dep.MDepRequirement;
1291+
MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
1292+
updateLeaves({FailedCmd}, Record, ToCleanUp);
1293+
std::vector<Command *> ToEnqueue;
1294+
addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue);
1295+
assert(ToEnqueue.empty());
1296+
1297+
// Replace failed command as a user.
1298+
if (Dep.MDepCommand->MUsers.erase(FailedCmd)) {
1299+
Dep.MDepCommand->MUsers.insert(EmptyCmd);
1300+
EmptyCmd->MDeps.push_back(Dep);
1301+
}
1302+
}
1303+
FailedCmd->MDeps.clear();
1304+
1305+
for (Command *UserCmd : FailedCmd->MUsers)
1306+
for (DepDesc &Dep : UserCmd->MDeps)
1307+
if (Dep.MDepCommand == FailedCmd)
1308+
Dep.MDepCommand = EmptyCmd;
1309+
std::swap(FailedCmd->MUsers, EmptyCmd->MUsers);
1310+
1311+
FailedCmd->getEvent()->setCommand(EmptyCmd);
1312+
assert(FailedCmd->MLeafCounter == 0);
1313+
delete FailedCmd;
1314+
}
1315+
12561316
void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) {
12571317
const auto It = std::find_if(
12581318
MMemObjs.begin(), MMemObjs.end(),

sycl/source/detail/scheduler/leaves_collection.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ static inline bool doOverlap(const Requirement *LHS, const Requirement *RHS) {
3232

3333
static inline bool isHostAccessorCmd(Command *Cmd) {
3434
return Cmd->getType() == Command::EMPTY_TASK &&
35+
Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueBlocked &&
3536
Cmd->MBlockReason == Command::BlockReason::HostAccessor;
3637
}
3738

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 35 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,16 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
7070
}
7171
}
7272

73+
static void deallocateStreams(
74+
std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate) {
75+
// Deallocate buffers for stream objects of the finished commands. Iterate in
76+
// reverse order because it is the order of commands execution.
77+
for (auto StreamImplPtr = StreamsToDeallocate.rbegin();
78+
StreamImplPtr != StreamsToDeallocate.rend(); ++StreamImplPtr)
79+
detail::Scheduler::getInstance().deallocateStreamBuffers(
80+
StreamImplPtr->get());
81+
}
82+
7383
EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
7484
QueueImplPtr Queue) {
7585
EventImplPtr NewEvent = nullptr;
@@ -111,58 +121,51 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
111121
}
112122

113123
std::vector<Command *> ToCleanUp;
114-
{
124+
try {
115125
ReadLockT Lock(MGraphLock);
116126

117127
Command *NewCmd = static_cast<Command *>(NewEvent->getCommand());
118128

119129
EnqueueResultT Res;
120130
bool Enqueued;
121131

122-
auto CleanUp = [&]() {
123-
if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) {
124-
if (Type == CG::RunOnHostIntel)
125-
static_cast<ExecCGCommand *>(NewCmd)->releaseCG();
126-
127-
NewEvent->setCommand(nullptr);
128-
delete NewCmd;
129-
}
130-
};
131-
132132
for (Command *Cmd : AuxiliaryCmds) {
133133
Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp);
134-
try {
135-
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
136-
throw runtime_error("Auxiliary enqueue process failed.",
137-
PI_INVALID_OPERATION);
138-
} catch (...) {
139-
// enqueueCommand() func and if statement above may throw an exception,
140-
// so destroy required resources to avoid memory leak
141-
CleanUp();
142-
std::rethrow_exception(std::current_exception());
143-
}
134+
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
135+
throw runtime_error("Auxiliary enqueue process failed.",
136+
PI_INVALID_OPERATION);
144137
}
145138

146139
if (NewCmd) {
147140
// TODO: Check if lazy mode.
148141
EnqueueResultT Res;
149-
try {
150-
bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res, ToCleanUp);
151-
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
152-
throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
153-
} catch (...) {
154-
// enqueueCommand() func and if statement above may throw an exception,
155-
// so destroy required resources to avoid memory leak
156-
CleanUp();
157-
std::rethrow_exception(std::current_exception());
158-
}
142+
bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res, ToCleanUp);
143+
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
144+
throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
159145

160146
// If there are no memory dependencies decouple and free the command.
161147
// Though, dismiss ownership of native kernel command group as it's
162148
// resources may be in use by backend and synchronization point here is
163149
// at native kernel execution finish.
164-
CleanUp();
150+
if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) {
151+
if (Type == CG::RunOnHostIntel)
152+
static_cast<ExecCGCommand *>(NewCmd)->releaseCG();
153+
154+
NewEvent->setCommand(nullptr);
155+
delete NewCmd;
156+
}
165157
}
158+
} catch (...) {
159+
std::vector<StreamImplPtr> StreamsToDeallocate;
160+
Command *NewCmd = static_cast<Command *>(NewEvent->getCommand());
161+
if (NewCmd) {
162+
WriteLockT Lock(MGraphLock, std::defer_lock);
163+
MGraphBuilder.cleanupFailedCommand(NewCmd, StreamsToDeallocate,
164+
ToCleanUp);
165+
}
166+
deallocateStreams(StreamsToDeallocate);
167+
cleanupCommands(ToCleanUp);
168+
std::rethrow_exception(std::current_exception());
166169
}
167170
cleanupCommands(ToCleanUp);
168171

@@ -223,16 +226,6 @@ void Scheduler::waitForEvent(EventImplPtr Event) {
223226
cleanupCommands(ToCleanUp);
224227
}
225228

226-
static void deallocateStreams(
227-
std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate) {
228-
// Deallocate buffers for stream objects of the finished commands. Iterate in
229-
// reverse order because it is the order of commands execution.
230-
for (auto StreamImplPtr = StreamsToDeallocate.rbegin();
231-
StreamImplPtr != StreamsToDeallocate.rend(); ++StreamImplPtr)
232-
detail::Scheduler::getInstance().deallocateStreamBuffers(
233-
StreamImplPtr->get());
234-
}
235-
236229
void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
237230
// We are going to traverse a graph of finished commands. Gather stream
238231
// objects from these commands if any and deallocate buffers for these stream

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,13 @@ class Scheduler {
516516
Command *FinishedCmd,
517517
std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>> &);
518518

519+
/// Replaces a failed command in the subgraph with an empty command and
520+
/// deletes the failed command.
521+
void cleanupFailedCommand(
522+
Command *FailedCmd,
523+
std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>> &,
524+
std::vector<Command *> &ToCleanUp);
525+
519526
/// Reschedules the command passed using Queue provided.
520527
///
521528
/// This can lead to rescheduling of all dependent commands. This can be
@@ -551,6 +558,8 @@ class Scheduler {
551558
std::vector<Command *> &ToEnqueue);
552559

553560
/// Removes commands from leaves.
561+
void updateLeaves(const std::set<Command *> &Cmds, MemObjRecord *Record,
562+
std::vector<Command *> &ToCleanUp);
554563
void updateLeaves(const std::set<Command *> &Cmds, MemObjRecord *Record,
555564
access::mode AccessMode,
556565
std::vector<Command *> &ToCleanUp);

0 commit comments

Comments
 (0)