Skip to content

Commit 97fed15

Browse files
[NFC][SYCL] Accept queue_impl by raw ptr/ref in MemoryManager (#18712)
This is intended to be a part of a bigger refactoring around internal RT APIs passing raw references instead of `std::shared_ptr<*_impl>`, similar to what have been implemented for `device_impl` earlier.
1 parent ee949ac commit 97fed15

File tree

10 files changed

+176
-212
lines changed

10 files changed

+176
-212
lines changed

sycl/source/detail/context_impl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
427427
for (DeviceGlobalMapEntry *DeviceGlobalEntry : DeviceGlobalEntries) {
428428
// Get or allocate the USM memory associated with the device global.
429429
DeviceGlobalUSMMem &DeviceGlobalUSM =
430-
DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(QueueImpl);
430+
DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(*QueueImpl);
431431

432432
// If the device global still has a initialization event it should be
433433
// added to the initialization events list. Since initialization events

sycl/source/detail/device_global_map_entry.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,13 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(const AdapterPtr &Adapter) {
4141
return OwnedUrEvent(Adapter);
4242
}
4343

44-
DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(
45-
const std::shared_ptr<queue_impl> &QueueImpl) {
44+
DeviceGlobalUSMMem &
45+
DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
4646
assert(!MIsDeviceImageScopeDecorated &&
4747
"USM allocations should not be acquired for device_global with "
4848
"device_image_scope property.");
49-
const std::shared_ptr<context_impl> &CtxImpl = QueueImpl->getContextImplPtr();
50-
const device_impl &DevImpl = QueueImpl->getDeviceImpl();
49+
const std::shared_ptr<context_impl> &CtxImpl = QueueImpl.getContextImplPtr();
50+
const device_impl &DevImpl = QueueImpl.getDeviceImpl();
5151
std::lock_guard<std::mutex> Lock(MDeviceToUSMPtrMapMutex);
5252

5353
auto DGUSMPtr = MDeviceToUSMPtrMap.find({&DevImpl, CtxImpl.get()});

sycl/source/detail/device_global_map_entry.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,7 @@ struct DeviceGlobalMapEntry {
109109
}
110110

111111
// Gets or allocates USM memory for a device_global.
112-
DeviceGlobalUSMMem &
113-
getOrAllocateDeviceGlobalUSM(const std::shared_ptr<queue_impl> &QueueImpl);
112+
DeviceGlobalUSMMem &getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl);
114113

115114
// Removes resources for device_globals associated with the context.
116115
void removeAssociatedResources(const context_impl *CtxImpl);

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ class kernel_bundle_impl {
669669
// of using a throw-away queue.
670670
queue InitQueue{MContext, Dev};
671671
auto &USMMem =
672-
Entry->getOrAllocateDeviceGlobalUSM(getSyclObjImpl(InitQueue));
672+
Entry->getOrAllocateDeviceGlobalUSM(*getSyclObjImpl(InitQueue));
673673
InitQueue.wait_and_throw();
674674
return USMMem.getPtr();
675675
}

sycl/source/detail/memory_manager.cpp

Lines changed: 112 additions & 151 deletions
Large diffs are not rendered by default.

sycl/source/detail/memory_manager.hpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -107,80 +107,80 @@ class MemoryManager {
107107

108108
// Copies memory between: host and device, host and host,
109109
// device and device if memory objects bound to the one context.
110-
static void copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, QueueImplPtr SrcQueue,
110+
static void copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, queue_impl *SrcQueue,
111111
unsigned int DimSrc, sycl::range<3> SrcSize,
112112
sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
113-
unsigned int SrcElemSize, void *DstMem,
114-
QueueImplPtr TgtQueue, unsigned int DimDst,
115-
sycl::range<3> DstSize, sycl::range<3> DstAccessRange,
116-
sycl::id<3> DstOffset, unsigned int DstElemSize,
113+
unsigned int SrcElemSize, void *DstMem, queue_impl *TgtQueue,
114+
unsigned int DimDst, sycl::range<3> DstSize,
115+
sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
116+
unsigned int DstElemSize,
117117
std::vector<ur_event_handle_t> DepEvents,
118118
ur_event_handle_t &OutEvent);
119119

120-
static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
120+
static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue,
121121
size_t PatternSize, const unsigned char *Pattern,
122122
unsigned int Dim, sycl::range<3> Size,
123123
sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
124124
unsigned int ElementSize,
125125
std::vector<ur_event_handle_t> DepEvents,
126126
ur_event_handle_t &OutEvent);
127127

128-
static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
128+
static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue,
129129
access::mode AccessMode, unsigned int Dim,
130130
sycl::range<3> Size, sycl::range<3> AccessRange,
131131
sycl::id<3> AccessOffset, unsigned int ElementSize,
132132
std::vector<ur_event_handle_t> DepEvents,
133133
ur_event_handle_t &OutEvent);
134134

135-
static void unmap(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
135+
static void unmap(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue,
136136
void *MappedPtr, std::vector<ur_event_handle_t> DepEvents,
137137
ur_event_handle_t &OutEvent);
138138

139-
static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len,
139+
static void copy_usm(const void *SrcMem, queue_impl &Queue, size_t Len,
140140
void *DstMem, std::vector<ur_event_handle_t> DepEvents,
141141
ur_event_handle_t *OutEvent);
142142

143-
static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len,
143+
static void fill_usm(void *DstMem, queue_impl &Queue, size_t Len,
144144
const std::vector<unsigned char> &Pattern,
145145
std::vector<ur_event_handle_t> DepEvents,
146146
ur_event_handle_t *OutEvent);
147147

148-
static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len,
148+
static void prefetch_usm(void *Ptr, queue_impl &Queue, size_t Len,
149149
std::vector<ur_event_handle_t> DepEvents,
150150
ur_event_handle_t *OutEvent);
151151

152-
static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len,
152+
static void advise_usm(const void *Ptr, queue_impl &Queue, size_t Len,
153153
ur_usm_advice_flags_t Advice,
154154
std::vector<ur_event_handle_t> DepEvents,
155155
ur_event_handle_t *OutEvent);
156156

157157
static void copy_2d_usm(const void *SrcMem, size_t SrcPitch,
158-
QueueImplPtr Queue, void *DstMem, size_t DstPitch,
158+
queue_impl &Queue, void *DstMem, size_t DstPitch,
159159
size_t Width, size_t Height,
160160
std::vector<ur_event_handle_t> DepEvents,
161161
ur_event_handle_t *OutEvent);
162162

163-
static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch,
163+
static void fill_2d_usm(void *DstMem, queue_impl &Queue, size_t Pitch,
164164
size_t Width, size_t Height,
165165
const std::vector<unsigned char> &Pattern,
166166
std::vector<ur_event_handle_t> DepEvents,
167167
ur_event_handle_t *OutEvent);
168168

169-
static void memset_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch,
169+
static void memset_2d_usm(void *DstMem, queue_impl &Queue, size_t Pitch,
170170
size_t Width, size_t Height, char Value,
171171
std::vector<ur_event_handle_t> DepEvents,
172172
ur_event_handle_t *OutEvent);
173173

174174
static void
175175
copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped,
176-
QueueImplPtr Queue, size_t NumBytes, size_t Offset,
176+
queue_impl &Queue, size_t NumBytes, size_t Offset,
177177
const void *SrcMem,
178178
const std::vector<ur_event_handle_t> &DepEvents,
179179
ur_event_handle_t *OutEvent);
180180

181181
static void
182182
copy_from_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped,
183-
QueueImplPtr Queue, size_t NumBytes, size_t Offset,
183+
queue_impl &Queue, size_t NumBytes, size_t Offset,
184184
void *DstMem,
185185
const std::vector<ur_event_handle_t> &DepEvents,
186186
ur_event_handle_t *OutEvent);
@@ -254,7 +254,7 @@ class MemoryManager {
254254
ur_exp_command_buffer_sync_point_t *OutSyncPoint);
255255

256256
static void copy_image_bindless(
257-
QueueImplPtr Queue, const void *Src, void *Dst,
257+
queue_impl &Queue, const void *Src, void *Dst,
258258
const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc,
259259
const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat,
260260
const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset,

sycl/source/detail/queue_impl.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
182182
return submitMemOpHelper(
183183
Self, DepEvents, CallerNeedsEvent,
184184
[&](handler &CGH) { CGH.memset(Ptr, Value, Count); },
185-
MemoryManager::fill_usm, Ptr, Self, Count, Pattern);
185+
MemoryManager::fill_usm, Ptr, *this, Count, Pattern);
186186
}
187187

188188
void report(const code_location &CodeLoc) {
@@ -233,7 +233,7 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
233233
return submitMemOpHelper(
234234
Self, DepEvents, CallerNeedsEvent,
235235
[&](handler &CGH) { CGH.memcpy(Dest, Src, Count); },
236-
MemoryManager::copy_usm, Src, Self, Count, Dest);
236+
MemoryManager::copy_usm, Src, *this, Count, Dest);
237237
}
238238

239239
event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
@@ -244,7 +244,7 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
244244
return submitMemOpHelper(
245245
Self, DepEvents, CallerNeedsEvent,
246246
[&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); },
247-
MemoryManager::advise_usm, Ptr, Self, Length, Advice);
247+
MemoryManager::advise_usm, Ptr, *this, Length, Advice);
248248
}
249249

250250
event queue_impl::memcpyToDeviceGlobal(
@@ -258,7 +258,7 @@ event queue_impl::memcpyToDeviceGlobal(
258258
NumBytes, Offset);
259259
},
260260
MemoryManager::copy_to_device_global, DeviceGlobalPtr, IsDeviceImageScope,
261-
Self, NumBytes, Offset, Src);
261+
*this, NumBytes, Offset, Src);
262262
}
263263

264264
event queue_impl::memcpyFromDeviceGlobal(
@@ -272,7 +272,7 @@ event queue_impl::memcpyFromDeviceGlobal(
272272
NumBytes, Offset);
273273
},
274274
MemoryManager::copy_from_device_global, DeviceGlobalPtr,
275-
IsDeviceImageScope, Self, NumBytes, Offset, Dest);
275+
IsDeviceImageScope, *this, NumBytes, Offset, Dest);
276276
}
277277

278278
sycl::detail::optional<event>
@@ -449,7 +449,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
449449
bool CallerNeedsEvent,
450450
HandlerFuncT HandlerFunc,
451451
MemOpFuncT MemOpFunc,
452-
MemOpArgTs... MemOpArgs) {
452+
MemOpArgTs &&...MemOpArgs) {
453453
// We need to submit command and update the last event under same lock if we
454454
// have in-order queue.
455455
{
@@ -468,7 +468,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
468468
auto isNoEventsMode = trySwitchingToNoEventsMode();
469469
if (!CallerNeedsEvent && isNoEventsMode) {
470470
NestedCallsTracker tracker;
471-
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
471+
MemOpFunc(std::forward<MemOpArgTs>(MemOpArgs)...,
472+
getUrEvents(ExpandedDepEvents),
472473
/*PiEvent*/ nullptr);
473474

474475
return createDiscardedEvent();
@@ -480,7 +481,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
480481
NestedCallsTracker tracker;
481482
ur_event_handle_t UREvent = nullptr;
482483
EventImpl->setSubmissionTime();
483-
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), &UREvent);
484+
MemOpFunc(std::forward<MemOpArgTs>(MemOpArgs)...,
485+
getUrEvents(ExpandedDepEvents), &UREvent);
484486
EventImpl->setHandle(UREvent);
485487
EventImpl->setEnqueued();
486488
// connect returned event with dependent events

sycl/source/detail/queue_impl.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,8 @@ class queue_impl {
955955
event submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
956956
const std::vector<event> &DepEvents,
957957
bool CallerNeedsEvent, HandlerFuncT HandlerFunc,
958-
MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs);
958+
MemMngrFuncT MemMngrFunc,
959+
MemMngrArgTs &&...MemOpArgs);
959960

960961
// When instrumentation is enabled emits trace event for wait begin and
961962
// returns the telemetry event generated for the wait

sycl/source/detail/reduction.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ addCounterInit(handler &CGH, std::shared_ptr<sycl::detail::queue_impl> &Queue,
184184
EventImpl->setContextImpl(detail::getSyclObjImpl(Queue->get_context()));
185185
EventImpl->setStateIncomplete();
186186
ur_event_handle_t UREvent = nullptr;
187-
MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), {0}, {}, &UREvent);
187+
MemoryManager::fill_usm(Counter.get(), *Queue, sizeof(int), {0}, {},
188+
&UREvent);
188189
EventImpl->setHandle(UREvent);
189190
CGH.depends_on(createSyclObjFromImpl<event>(EventImpl));
190191
}

0 commit comments

Comments
 (0)