Skip to content

[NFC][SYCL] Pass queue_impl by raw ptr/ref (mostly scheduler) #19120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions sycl/source/detail/graph/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer,

sycl::detail::EventImplPtr Event =
sycl::detail::Scheduler::getInstance().addCG(
Node->getCGCopy(), MQueueImpl,
Node->getCGCopy(), *MQueueImpl,
/*EventNeeded=*/true, CommandBuffer, Deps);

if (MIsUpdatable) {
Expand Down Expand Up @@ -1048,7 +1048,7 @@ EventImplPtr exec_graph_impl::enqueueHostTaskPartition(
NodeCommandGroup->getType()));

EventImplPtr SchedulerEvent = sycl::detail::Scheduler::getInstance().addCG(
std::move(CommandGroup), Queue.shared_from_this(), EventNeeded);
std::move(CommandGroup), Queue, EventNeeded);

if (EventNeeded) {
return SchedulerEvent;
Expand Down Expand Up @@ -1076,7 +1076,7 @@ EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler(
CommandBuffer, nullptr, std::move(CGData));

EventImplPtr SchedulerEvent = sycl::detail::Scheduler::getInstance().addCG(
std::move(CommandGroup), Queue.shared_from_this(), EventNeeded);
std::move(CommandGroup), Queue, EventNeeded);

if (EventNeeded) {
SchedulerEvent->setEventFromSubmittedExecCommandBuffer(true);
Expand Down Expand Up @@ -1551,7 +1551,7 @@ void exec_graph_impl::update(
// other scheduler commands
auto UpdateEvent =
sycl::detail::Scheduler::getInstance().addCommandGraphUpdate(
this, Nodes, MQueueImpl, std::move(UpdateRequirements),
this, Nodes, MQueueImpl.get(), std::move(UpdateRequirements),
MSchedulerDependencies);

MSchedulerDependencies.push_back(UpdateEvent);
Expand Down
10 changes: 5 additions & 5 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ queue_impl::get_backend_info<info::device::backend_version>() const {
}
#endif

static event prepareSYCLEventAssociatedWithQueue(
const std::shared_ptr<detail::queue_impl> &QueueImpl) {
auto EventImpl = detail::event_impl::create_device_event(*QueueImpl);
EventImpl->setContextImpl(QueueImpl->getContextImpl());
static event
prepareSYCLEventAssociatedWithQueue(detail::queue_impl &QueueImpl) {
auto EventImpl = detail::event_impl::create_device_event(QueueImpl);
EventImpl->setContextImpl(QueueImpl.getContextImpl());
EventImpl->setStateIncomplete();
return detail::createSyclObjFromImpl<event>(EventImpl);
}
Expand Down Expand Up @@ -464,7 +464,7 @@ event queue_impl::submitMemOpHelper(const std::vector<event> &DepEvents,
event_impl::create_discarded_event());
}

event ResEvent = prepareSYCLEventAssociatedWithQueue(shared_from_this());
event ResEvent = prepareSYCLEventAssociatedWithQueue(*this);
const auto &EventImpl = detail::getSyclObjImpl(ResEvent);
{
NestedCallsTracker tracker;
Expand Down
5 changes: 1 addition & 4 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,9 +649,6 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
static ContextImplPtr getContext(queue_impl *Queue) {
return Queue ? Queue->getContextImplPtr() : nullptr;
}
static ContextImplPtr getContext(const QueueImplPtr &Queue) {
return getContext(Queue.get());
}

// Must be called under MMutex protection
void doUnenqueuedCommandCleanup(
Expand Down Expand Up @@ -688,7 +685,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
protected:
template <typename HandlerType = handler>
EventImplPtr insertHelperBarrier(const HandlerType &Handler) {
auto &Queue = Handler.impl->get_queue();
queue_impl &Queue = Handler.impl->get_queue();
auto ResEvent = detail::event_impl::create_device_event(Queue);
ur_event_handle_t UREvent = nullptr;
getAdapter()->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
Expand Down
60 changes: 29 additions & 31 deletions sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ static bool isOnSameContext(const ContextImplPtr Context, queue_impl *Queue) {
// contexts comparison.
return Context == queue_impl::getContext(Queue);
}
static bool isOnSameContext(const ContextImplPtr Context,
const QueueImplPtr &Queue) {
return isOnSameContext(Context, Queue.get());
}

/// Checks if the required access mode is allowed under the current one.
static bool isAccessModeAllowed(access::mode Required, access::mode Current) {
Expand Down Expand Up @@ -183,7 +179,7 @@ MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) {
}

MemObjRecord *
Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
Scheduler::GraphBuilder::getOrInsertMemObjRecord(queue_impl *Queue,
const Requirement *Req) {
SYCLMemObjI *MemObject = Req->MSYCLMemObj;
MemObjRecord *Record = getMemObjRecord(MemObject);
Expand Down Expand Up @@ -231,8 +227,8 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
MemObject->MRecord.reset(
new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency});
std::vector<Command *> ToEnqueue;
getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
ToEnqueue);
getOrCreateAllocaForReq(MemObject->MRecord.get(), Req,
InteropQueuePtr.get(), ToEnqueue);
assert(ToEnqueue.empty() && "Creation of the first alloca for a record "
"shouldn't lead to any enqueuing (no linked "
"alloca or exceeding the leaf limit).");
Expand Down Expand Up @@ -274,14 +270,13 @@ void Scheduler::GraphBuilder::addNodeToLeaves(
}

UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
MemObjRecord *Record, Requirement *Req, queue_impl *Queue,
std::vector<Command *> &ToEnqueue) {
auto Context = queue_impl::getContext(Queue);
AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Context);
assert(AllocaCmd && "There must be alloca for requirement!");
UpdateHostRequirementCommand *UpdateCommand =
new UpdateHostRequirementCommand(Queue.get(), *Req, AllocaCmd,
&Req->MData);
new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData);
// Need copy of requirement because after host accessor destructor call
// dependencies become invalid if requirement is stored by pointer.
const Requirement *StoredReq = UpdateCommand->getRequirement();
Expand Down Expand Up @@ -330,9 +325,10 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc,
return MapCmd;
}

Command *Scheduler::GraphBuilder::insertMemoryMove(
MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
std::vector<Command *> &ToEnqueue) {
Command *
Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
Requirement *Req, queue_impl *Queue,
std::vector<Command *> &ToEnqueue) {
AllocaCommandBase *AllocaCmdDst =
getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
if (!AllocaCmdDst)
Expand Down Expand Up @@ -519,7 +515,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
SYCLMemObj->handleWriteAccessorCreation();
}
// Host accessor is not attached to any queue so no QueueImplPtr object to be
// Host accessor is not attached to any queue so no queue object to be
// sent to getOrInsertMemObjRecord.
MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req);
if (MPrintOptionsArray[BeforeAddHostAcc])
Expand Down Expand Up @@ -691,7 +687,7 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
// Note, creation of new allocation command can lead to the current context
// (Record->MCurContext) change.
AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue,
MemObjRecord *Record, const Requirement *Req, queue_impl *Queue,
std::vector<Command *> &ToEnqueue) {
auto Context = queue_impl::getContext(Queue);
AllocaCommandBase *AllocaCmd =
Expand All @@ -710,8 +706,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(

auto *ParentAlloca =
getOrCreateAllocaForReq(Record, &ParentRequirement, Queue, ToEnqueue);
AllocaCmd = new AllocaSubBufCommand(Queue.get(), *Req, ParentAlloca,
ToEnqueue, ToCleanUp);
AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca, ToEnqueue,
ToCleanUp);
} else {

const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange,
Expand Down Expand Up @@ -787,8 +783,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
}
}

AllocaCmd = new AllocaCommand(Queue.get(), FullReq, InitFromUserData,
LinkedAllocaCmd);
AllocaCmd =
new AllocaCommand(Queue, FullReq, InitFromUserData, LinkedAllocaCmd);

// Update linked command
if (LinkedAllocaCmd) {
Expand Down Expand Up @@ -926,16 +922,16 @@ static void combineAccessModesOfReqs(std::vector<Requirement *> &Reqs) {
}

Command *Scheduler::GraphBuilder::addCG(
std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
std::unique_ptr<detail::CG> CommandGroup, queue_impl *Queue,
std::vector<Command *> &ToEnqueue, bool EventNeeded,
ur_exp_command_buffer_handle_t CommandBuffer,
const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies) {
std::vector<Requirement *> &Reqs = CommandGroup->getRequirements();
std::vector<detail::EventImplPtr> &Events = CommandGroup->getEvents();

auto NewCmd = std::make_unique<ExecCGCommand>(
std::move(CommandGroup), Queue.get(), EventNeeded, CommandBuffer,
std::move(Dependencies));
auto NewCmd = std::make_unique<ExecCGCommand>(std::move(CommandGroup), Queue,
EventNeeded, CommandBuffer,
std::move(Dependencies));

if (!NewCmd)
throw exception(make_error_code(errc::memory_allocation),
Expand All @@ -958,9 +954,9 @@ Command *Scheduler::GraphBuilder::addCG(
bool isSameCtx = false;

{
const QueueImplPtr &QueueForAlloca =
queue_impl *QueueForAlloca =
isInteropTask
? static_cast<detail::CGHostTask &>(NewCmd->getCG()).MQueue
? static_cast<detail::CGHostTask &>(NewCmd->getCG()).MQueue.get()
: Queue;

Record = getOrInsertMemObjRecord(QueueForAlloca, Req);
Expand Down Expand Up @@ -990,15 +986,15 @@ Command *Scheduler::GraphBuilder::addCG(
// Cannot directly copy memory from OpenCL device to OpenCL device -
// create two copies: device->host and host->device.
bool NeedMemMoveToHost = false;
auto MemMoveTargetQueue = Queue;
queue_impl *MemMoveTargetQueue = Queue;

if (isInteropTask) {
const detail::CGHostTask &HT =
static_cast<detail::CGHostTask &>(NewCmd->getCG());

if (!isOnSameContext(Record->MCurContext, HT.MQueue)) {
if (!isOnSameContext(Record->MCurContext, HT.MQueue.get())) {
NeedMemMoveToHost = true;
MemMoveTargetQueue = HT.MQueue;
MemMoveTargetQueue = HT.MQueue.get();
}
} else if (Queue && Record->MCurContext)
NeedMemMoveToHost = true;
Expand Down Expand Up @@ -1230,7 +1226,9 @@ Command *Scheduler::GraphBuilder::connectDepEvent(
try {
std::shared_ptr<detail::HostTask> HT(new detail::HostTask);
std::unique_ptr<detail::CG> ConnectCG(new detail::CGHostTask(
std::move(HT), /* Queue = */ Cmd->getQueue(), /* Context = */ {},
std::move(HT),
/* Queue = */ Cmd->getQueue(),
/* Context = */ {},
/* Args = */ {},
detail::CG::StorageInitHelper(
/* ArgsStorage = */ {}, /* AccStorage = */ {},
Expand Down Expand Up @@ -1281,11 +1279,11 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
ext::oneapi::experimental::detail::exec_graph_impl *Graph,
std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
Nodes,
const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
queue_impl *Queue, std::vector<Requirement *> Requirements,
std::vector<detail::EventImplPtr> &Events,
std::vector<Command *> &ToEnqueue) {
auto NewCmd =
std::make_unique<UpdateCommandBufferCommand>(Queue.get(), Graph, Nodes);
std::make_unique<UpdateCommandBufferCommand>(Queue, Graph, Nodes);
// If there are multiple requirements for the same memory object, its
// AllocaCommand creation will be dependent on the access mode of the first
// requirement. Combine these access modes to take all of them into account.
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
}

EventImplPtr Scheduler::addCG(
std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
std::unique_ptr<detail::CG> CommandGroup, queue_impl &Queue,
bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer,
const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies) {
EventImplPtr NewEvent = nullptr;
Expand All @@ -128,7 +128,7 @@ EventImplPtr Scheduler::addCG(
break;
}
default:
NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue),
NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), &Queue,
AuxiliaryCmds, EventNeeded, CommandBuffer,
std::move(Dependencies));
}
Expand Down Expand Up @@ -646,7 +646,7 @@ EventImplPtr Scheduler::addCommandGraphUpdate(
ext::oneapi::experimental::detail::exec_graph_impl *Graph,
std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
Nodes,
const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
queue_impl *Queue, std::vector<Requirement *> Requirements,
std::vector<detail::EventImplPtr> &Events) {
std::vector<Command *> AuxiliaryCmds;
EventImplPtr NewCmdEvent = nullptr;
Expand Down
22 changes: 10 additions & 12 deletions sycl/source/detail/scheduler/scheduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ class DispatchHostTask;

using ContextImplPtr = std::shared_ptr<detail::context_impl>;
using EventImplPtr = std::shared_ptr<detail::event_impl>;
using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
using StreamImplPtr = std::shared_ptr<detail::stream_impl>;

using CommandPtr = std::unique_ptr<Command>;
Expand Down Expand Up @@ -379,7 +378,7 @@ class Scheduler {
/// \return an event object to wait on for command group completion. It can
/// be a discarded event.
EventImplPtr addCG(
std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
std::unique_ptr<detail::CG> CommandGroup, queue_impl &Queue,
bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer = nullptr,
const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies = {});

Expand Down Expand Up @@ -477,7 +476,7 @@ class Scheduler {
ext::oneapi::experimental::detail::exec_graph_impl *Graph,
std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
Nodes,
const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
queue_impl *Queue, std::vector<Requirement *> Requirements,
std::vector<detail::EventImplPtr> &Events);

static bool CheckEventReadiness(context_impl &Context,
Expand Down Expand Up @@ -560,9 +559,8 @@ class Scheduler {
/// \return a command that represents command group execution and a bool
/// indicating whether this command should be enqueued to the graph
/// processor right away or not.
Command *addCG(std::unique_ptr<detail::CG> CommandGroup,
const QueueImplPtr &Queue, std::vector<Command *> &ToEnqueue,
bool EventNeeded,
Command *addCG(std::unique_ptr<detail::CG> CommandGroup, queue_impl *Queue,
std::vector<Command *> &ToEnqueue, bool EventNeeded,
ur_exp_command_buffer_handle_t CommandBuffer = nullptr,
const std::vector<ur_exp_command_buffer_sync_point_t>
&Dependencies = {});
Expand Down Expand Up @@ -600,15 +598,15 @@ class Scheduler {
/// used when the user provides a "secondary" queue to the submit method
/// which may be used when the command fails to enqueue/execute in the
/// primary queue.
void rescheduleCommand(Command *Cmd, const QueueImplPtr &Queue);
void rescheduleCommand(Command *Cmd, queue_impl *Queue);

/// \return a pointer to the corresponding memory object record for the
/// SYCL memory object provided, or nullptr if it does not exist.
MemObjRecord *getMemObjRecord(SYCLMemObjI *MemObject);

/// \return a pointer to MemObjRecord for pointer to memory object. If the
/// record is not found, nullptr is returned.
MemObjRecord *getOrInsertMemObjRecord(const QueueImplPtr &Queue,
MemObjRecord *getOrInsertMemObjRecord(queue_impl *Queue,
const Requirement *Req);

/// Decrements leaf counters for all leaves of the record.
Expand Down Expand Up @@ -656,7 +654,7 @@ class Scheduler {
std::vector<
std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
Nodes,
const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
queue_impl *Queue, std::vector<Requirement *> Requirements,
std::vector<detail::EventImplPtr> &Events,
std::vector<Command *> &ToEnqueue);

Expand All @@ -673,7 +671,7 @@ class Scheduler {
/// \param Req is a Requirement describing destination.
/// \param Queue is a queue that is bound to target context.
Command *insertMemoryMove(MemObjRecord *Record, Requirement *Req,
const QueueImplPtr &Queue,
queue_impl *Queue,
std::vector<Command *> &ToEnqueue);

// Inserts commands required to remap the memory object to its current host
Expand All @@ -684,7 +682,7 @@ class Scheduler {

UpdateHostRequirementCommand *
insertUpdateHostReqCmd(MemObjRecord *Record, Requirement *Req,
const QueueImplPtr &Queue,
queue_impl *Queue,
std::vector<Command *> &ToEnqueue);

/// Finds dependencies for the requirement.
Expand Down Expand Up @@ -717,7 +715,7 @@ class Scheduler {
/// If none found, creates new one.
AllocaCommandBase *
getOrCreateAllocaForReq(MemObjRecord *Record, const Requirement *Req,
const QueueImplPtr &Queue,
queue_impl *Queue,
std::vector<Command *> &ToEnqueue);

void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req);
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,7 @@ event handler::finalize() {
CommandGroup->getRequirements().size() == 0;

detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG(
std::move(CommandGroup), Queue->shared_from_this(), !DiscardEvent);
std::move(CommandGroup), *Queue, !DiscardEvent);

#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
MLastEvent = DiscardEvent ? nullptr : Event;
Expand Down
Loading