Skip to content

Commit fb27c65

Browse files
[SYCL][L0] Honor property::queue::enable_profiling (#5543)
1 parent a3df384 commit fb27c65

File tree

2 files changed

+91
-61
lines changed

2 files changed

+91
-61
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 71 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ extern "C" {
3232
// Forward declarartions.
3333
static pi_result EventRelease(pi_event Event, pi_queue LockedQueue);
3434
static pi_result QueueRelease(pi_queue Queue, pi_queue LockedQueue);
35-
static pi_result EventCreate(pi_context Context, bool HostVisible,
36-
pi_event *RetEvent);
35+
static pi_result EventCreate(pi_context Context, pi_queue Queue,
36+
bool HostVisible, pi_event *RetEvent);
3737
}
3838

3939
namespace {
@@ -428,20 +428,13 @@ pi_result _pi_mem::removeMapping(void *MappedTo, Mapping &MapInfo) {
428428

429429
pi_result
430430
_pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
431-
size_t &Index, bool HostVisible) {
431+
size_t &Index, bool HostVisible,
432+
bool ProfilingEnabled) {
432433
// Lock while updating event pool machinery.
433434
std::lock_guard<std::mutex> Lock(ZeEventPoolCacheMutex);
434435

435-
// Setup for host-visible pool as needed.
436-
ze_event_pool_flag_t ZePoolFlag = {};
437-
std::list<ze_event_pool_handle_t> *ZePoolCache;
438-
439-
if (HostVisible) {
440-
ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
441-
ZePoolCache = &ZeHostVisibleEventPoolCache;
442-
} else {
443-
ZePoolCache = &ZeDeviceScopeEventPoolCache;
444-
}
436+
std::list<ze_event_pool_handle_t> *ZePoolCache =
437+
getZeEventPoolCache(HostVisible, ProfilingEnabled);
445438

446439
// Remove full pool from the cache.
447440
if (!ZePoolCache->empty()) {
@@ -460,7 +453,12 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
460453
if (*ZePool == nullptr) {
461454
ZeStruct<ze_event_pool_desc_t> ZeEventPoolDesc;
462455
ZeEventPoolDesc.count = MaxNumEventsPerPool;
463-
ZeEventPoolDesc.flags = ZePoolFlag | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
456+
ZeEventPoolDesc.flags = 0;
457+
if (HostVisible)
458+
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
459+
if (ProfilingEnabled)
460+
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
461+
zePrint("ze_event_pool_desc_t flags set to: %d\n", ZeEventPoolDesc.flags);
464462

465463
std::vector<ze_device_handle_t> ZeDevices;
466464
std::for_each(Devices.begin(), Devices.end(),
@@ -486,12 +484,8 @@ pi_result _pi_context::decrementUnreleasedEventsInPool(pi_event Event) {
486484
return PI_SUCCESS;
487485
}
488486

489-
std::list<ze_event_pool_handle_t> *ZePoolCache;
490-
if (Event->IsHostVisible()) {
491-
ZePoolCache = &ZeHostVisibleEventPoolCache;
492-
} else {
493-
ZePoolCache = &ZeDeviceScopeEventPoolCache;
494-
}
487+
std::list<ze_event_pool_handle_t> *ZePoolCache =
488+
getZeEventPoolCache(Event->isHostVisible(), Event->isProfilingEnabled());
495489

496490
// Put the empty pool to the cache of the pools.
497491
std::lock_guard<std::mutex> Lock(ZeEventPoolCacheMutex);
@@ -611,13 +605,15 @@ inline static void piQueueRetainNoLock(pi_queue Queue) { Queue->RefCount++; }
611605
// \param Event a pointer to hold the newly created pi_event
612606
// \param CommandType various command type determined by the caller
613607
// \param CommandList is the command list where the event is added
614-
inline static pi_result
615-
createEventAndAssociateQueue(pi_queue Queue, pi_event *Event,
616-
pi_command_type CommandType,
617-
pi_command_list_ptr_t CommandList) {
618-
pi_result Res = piEventCreate(Queue->Context, Event);
619-
if (Res != PI_SUCCESS)
620-
return Res;
608+
// \param ForceHostVisible tells if the event must be created in
609+
// the host-visible pool
610+
inline static pi_result createEventAndAssociateQueue(
611+
pi_queue Queue, pi_event *Event, pi_command_type CommandType,
612+
pi_command_list_ptr_t CommandList, bool ForceHostVisible = false) {
613+
614+
PI_CALL(EventCreate(Queue->Context, Queue,
615+
ForceHostVisible ? true : EventsScope == AllHostVisible,
616+
Event));
621617

622618
(*Event)->Queue = Queue;
623619
(*Event)->CommandType = CommandType;
@@ -806,13 +802,11 @@ pi_result _pi_context::finalize() {
806802
// For example, event pool caches would be still alive.
807803
{
808804
std::lock_guard<std::mutex> Lock(ZeEventPoolCacheMutex);
809-
for (auto &ZePool : ZeDeviceScopeEventPoolCache)
810-
ZE_CALL(zeEventPoolDestroy, (ZePool));
811-
for (auto &ZePool : ZeHostVisibleEventPoolCache)
812-
ZE_CALL(zeEventPoolDestroy, (ZePool));
813-
814-
ZeDeviceScopeEventPoolCache.clear();
815-
ZeHostVisibleEventPoolCache.clear();
805+
for (auto &ZePoolCache : ZeEventPoolCache) {
806+
for (auto &ZePool : ZePoolCache)
807+
ZE_CALL(zeEventPoolDestroy, (ZePool));
808+
ZePoolCache.clear();
809+
}
816810
}
817811

818812
// Destroy the command list used for initializations
@@ -841,8 +835,7 @@ pi_result _pi_context::finalize() {
841835

842836
bool _pi_queue::isInOrderQueue() const {
843837
// If out-of-order queue property is not set, then this is a in-order queue.
844-
return ((this->PiQueueProperties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ==
845-
0);
838+
return ((this->Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0);
846839
}
847840

848841
pi_result _pi_queue::resetCommandList(pi_command_list_ptr_t CommandList,
@@ -1032,11 +1025,10 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] {
10321025
_pi_queue::_pi_queue(ze_command_queue_handle_t Queue,
10331026
std::vector<ze_command_queue_handle_t> &CopyQueues,
10341027
pi_context Context, pi_device Device,
1035-
bool OwnZeCommandQueue,
1036-
pi_queue_properties PiQueueProperties)
1028+
bool OwnZeCommandQueue, pi_queue_properties Properties)
10371029
: ZeComputeCommandQueue{Queue}, ZeCopyCommandQueues{CopyQueues},
10381030
Context{Context}, Device{Device}, OwnZeCommandQueue{OwnZeCommandQueue},
1039-
PiQueueProperties(PiQueueProperties) {
1031+
Properties(Properties) {
10401032
ComputeCommandBatch.OpenCommandList = CommandListMap.end();
10411033
CopyCommandBatch.OpenCommandList = CommandListMap.end();
10421034
ComputeCommandBatch.QueueBatchSize =
@@ -1350,7 +1342,10 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
13501342
// Create a "proxy" host-visible event.
13511343
//
13521344
pi_event HostVisibleEvent;
1353-
PI_CALL(EventCreate(Context, true, &HostVisibleEvent));
1345+
auto Res = createEventAndAssociateQueue(
1346+
this, &HostVisibleEvent, PI_COMMAND_TYPE_USER, CommandList, true);
1347+
if (Res)
1348+
return Res;
13541349

13551350
// Update each command's event in the command-list to "see" this
13561351
// proxy event as a host-visible counterpart.
@@ -1359,10 +1354,14 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
13591354
PI_CALL(piEventRetain(HostVisibleEvent));
13601355
}
13611356

1362-
// Decrement the reference count by 1 so all the remaining references
1363-
// are from the other commands in this batch. This host-visible event
1364-
// will be destroyed after all events in the batch are gone.
1357+
// Decrement the reference count of the event such that all the remaining
1358+
// references are from the other commands in this batch. This host-visible
1359+
// event will not be waited/release by SYCL RT, so it must be destroyed
1360+
// after all events in the batch are gone.
13651361
PI_CALL(piEventRelease(HostVisibleEvent));
1362+
PI_CALL(piEventRelease(HostVisibleEvent));
1363+
PI_CALL(piEventRelease(HostVisibleEvent));
1364+
13661365
// Indicate no cleanup is needed for this PI event as it is special.
13671366
HostVisibleEvent->CleanedUp = true;
13681367

@@ -2105,7 +2104,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
21052104
*NumDevices = ZeDeviceCount;
21062105

21072106
if (NumEntries == 0) {
2108-
// Devices should be nullptr when querying the number of devices
2107+
// Devices should be nullptr when querying the number of devices.
21092108
PI_ASSERT(Devices == nullptr, PI_INVALID_VALUE);
21102109
return PI_SUCCESS;
21112110
}
@@ -4955,10 +4954,6 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
49554954
if (EventsScope != OnDemandHostVisibleProxy)
49564955
die("getOrCreateHostVisibleEvent: missing host-visible event");
49574956

4958-
// Create a "proxy" host-visible event on demand.
4959-
PI_CALL(EventCreate(Context, true, &HostVisibleEvent));
4960-
HostVisibleEvent->CleanedUp = true;
4961-
49624957
// Submit the command(s) signalling the proxy event to the queue.
49634958
// We have to first submit a wait for the device-only event for which this
49644959
// proxy is created.
@@ -4975,6 +4970,13 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
49754970
Queue, CommandList, false /* UseCopyEngine */, OkToBatch))
49764971
return Res;
49774972

4973+
// Create a "proxy" host-visible event.
4974+
auto Res = createEventAndAssociateQueue(
4975+
Queue, &HostVisibleEvent, PI_COMMAND_TYPE_USER, CommandList, true);
4976+
// HostVisibleEvent->CleanedUp = true;
4977+
if (Res != PI_SUCCESS)
4978+
return Res;
4979+
49784980
ZE_CALL(zeCommandListAppendWaitOnEvents,
49794981
(CommandList->first, 1, &ZeEvent));
49804982
ZE_CALL(zeCommandListAppendSignalEvent,
@@ -4989,12 +4991,21 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
49894991
return PI_SUCCESS;
49904992
}
49914993

4992-
static pi_result EventCreate(pi_context Context, bool HostVisible,
4993-
pi_event *RetEvent) {
4994+
// Helper function for creating a PI event.
4995+
// The "Queue" argument specifies the PI queue where a command is submitted.
4996+
// The "HostVisible" argument specifies if event needs to be allocated from
4997+
// a host-visible pool.
4998+
//
4999+
static pi_result EventCreate(pi_context Context, pi_queue Queue,
5000+
bool HostVisible, pi_event *RetEvent) {
5001+
5002+
bool ProfilingEnabled =
5003+
!Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0;
5004+
49945005
size_t Index = 0;
49955006
ze_event_pool_handle_t ZeEventPool = {};
4996-
if (auto Res = Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index,
4997-
HostVisible))
5007+
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
5008+
ZeEventPool, Index, HostVisible, ProfilingEnabled))
49985009
return Res;
49995010

50005011
ze_event_handle_t ZeEvent;
@@ -5035,8 +5046,9 @@ static pi_result EventCreate(pi_context Context, bool HostVisible,
50355046
return PI_SUCCESS;
50365047
}
50375048

5049+
// Exteral PI API entry
50385050
pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
5039-
return EventCreate(Context, EventsScope == AllHostVisible, RetEvent);
5051+
return EventCreate(Context, nullptr, EventsScope == AllHostVisible, RetEvent);
50405052
}
50415053

50425054
pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName,
@@ -5102,6 +5114,11 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
51025114

51035115
PI_ASSERT(Event, PI_INVALID_EVENT);
51045116

5117+
if (Event->Queue &&
5118+
(Event->Queue->Properties & PI_QUEUE_PROFILING_ENABLE) == 0) {
5119+
return PI_PROFILING_INFO_NOT_AVAILABLE;
5120+
}
5121+
51055122
uint64_t ZeTimerResolution =
51065123
Event->Queue
51075124
? Event->Queue->Device->ZeDeviceProperties->timerResolution
@@ -5374,7 +5391,7 @@ static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) {
53745391
// and release a reference to it.
53755392
if (Event->HostVisibleEvent && Event->HostVisibleEvent != Event) {
53765393
// Decrement ref-count of the host-visible proxy event.
5377-
PI_CALL(piEventRelease(Event->HostVisibleEvent));
5394+
PI_CALL(EventRelease(Event->HostVisibleEvent, LockedQueue));
53785395
}
53795396

53805397
auto Context = Event->Context;

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -562,9 +562,11 @@ struct _pi_context : _pi_object {
562562

563563
// Get index of the free slot in the available pool. If there is no available
564564
// pool then create new one. The HostVisible parameter tells if we need a
565-
// slot for a host-visible event.
565+
// slot for a host-visible event. The ProfilingEnabled tells is we need a
566+
// slot for an event with profiling capabilities.
566567
pi_result getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
567-
bool HostVisible);
568+
bool HostVisible,
569+
bool ProfilingEnabled);
568570

569571
// Decrement number of events living in the pool upon event destroy
570572
// and return the pool to the cache if there are no unreleased events.
@@ -601,9 +603,14 @@ struct _pi_context : _pi_object {
601603
// head. In case there is no next pool, a new pool is created and made the
602604
// head.
603605
//
604-
std::list<ze_event_pool_handle_t> ZeDeviceScopeEventPoolCache;
605606
// Cache of event pools to which host-visible events are added to.
606-
std::list<ze_event_pool_handle_t> ZeHostVisibleEventPoolCache;
607+
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
608+
auto getZeEventPoolCache(bool HostVisible, bool WithProfiling) {
609+
if (HostVisible)
610+
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
611+
else
612+
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
613+
}
607614

608615
// This map will be used to determine if a pool is full or not
609616
// by storing number of empty slots available in the pool.
@@ -625,7 +632,7 @@ struct _pi_queue : _pi_object {
625632
_pi_queue(ze_command_queue_handle_t Queue,
626633
std::vector<ze_command_queue_handle_t> &CopyQueues,
627634
pi_context Context, pi_device Device, bool OwnZeCommandQueue,
628-
pi_queue_properties PiQueueProperties = 0);
635+
pi_queue_properties Properties = 0);
629636

630637
// Level Zero compute command queue handle.
631638
ze_command_queue_handle_t ZeComputeCommandQueue;
@@ -731,7 +738,7 @@ struct _pi_queue : _pi_object {
731738
bool isBatchingAllowed(bool IsCopy) const;
732739

733740
// Keeps the properties of this queue.
734-
pi_queue_properties PiQueueProperties;
741+
pi_queue_properties Properties;
735742

736743
// Returns true if the queue is a in-order queue.
737744
bool isInOrderQueue() const;
@@ -986,11 +993,17 @@ struct _pi_event : _pi_object {
986993
// than by just this one event, depending on the mode (see EventsScope).
987994
//
988995
pi_event HostVisibleEvent = {nullptr};
989-
bool IsHostVisible() const { return this == HostVisibleEvent; }
996+
bool isHostVisible() const { return this == HostVisibleEvent; }
990997

991998
// Get the host-visible event or create one and enqueue its signal.
992999
pi_result getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent);
9931000

1001+
// Tells if this event is with profiling capabilities.
1002+
bool isProfilingEnabled() const {
1003+
return !Queue || // tentatively assume user events are profiling enabled
1004+
(Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0;
1005+
}
1006+
9941007
// Level Zero command list where the command signaling this event was appended
9951008
// to. This is currently used to remember/destroy the command list after all
9961009
// commands in it are completed, i.e. this event signaled.

0 commit comments

Comments
 (0)