@@ -32,8 +32,8 @@ extern "C" {
32
32
// Forward declarartions.
33
33
static pi_result EventRelease (pi_event Event, pi_queue LockedQueue);
34
34
static pi_result QueueRelease (pi_queue Queue, pi_queue LockedQueue);
35
- static pi_result EventCreate (pi_context Context, bool HostVisible ,
36
- pi_event *RetEvent);
35
+ static pi_result EventCreate (pi_context Context, pi_queue Queue ,
36
+ bool HostVisible, pi_event *RetEvent);
37
37
}
38
38
39
39
namespace {
@@ -428,20 +428,13 @@ pi_result _pi_mem::removeMapping(void *MappedTo, Mapping &MapInfo) {
428
428
429
429
pi_result
430
430
_pi_context::getFreeSlotInExistingOrNewPool (ze_event_pool_handle_t &Pool,
431
- size_t &Index, bool HostVisible) {
431
+ size_t &Index, bool HostVisible,
432
+ bool ProfilingEnabled) {
432
433
// Lock while updating event pool machinery.
433
434
std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
434
435
435
- // Setup for host-visible pool as needed.
436
- ze_event_pool_flag_t ZePoolFlag = {};
437
- std::list<ze_event_pool_handle_t > *ZePoolCache;
438
-
439
- if (HostVisible) {
440
- ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
441
- ZePoolCache = &ZeHostVisibleEventPoolCache;
442
- } else {
443
- ZePoolCache = &ZeDeviceScopeEventPoolCache;
444
- }
436
+ std::list<ze_event_pool_handle_t > *ZePoolCache =
437
+ getZeEventPoolCache (HostVisible, ProfilingEnabled);
445
438
446
439
// Remove full pool from the cache.
447
440
if (!ZePoolCache->empty ()) {
@@ -460,7 +453,13 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
460
453
if (*ZePool == nullptr ) {
461
454
ZeStruct<ze_event_pool_desc_t > ZeEventPoolDesc;
462
455
ZeEventPoolDesc.count = MaxNumEventsPerPool;
463
- ZeEventPoolDesc.flags = ZePoolFlag | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
456
+ ZeEventPoolDesc.flags = 0 ;
457
+ if (HostVisible) {
458
+ ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
459
+ }
460
+ if (ProfilingEnabled) {
461
+ ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
462
+ }
464
463
465
464
std::vector<ze_device_handle_t > ZeDevices;
466
465
std::for_each (Devices.begin (), Devices.end (),
@@ -486,12 +485,8 @@ pi_result _pi_context::decrementUnreleasedEventsInPool(pi_event Event) {
486
485
return PI_SUCCESS;
487
486
}
488
487
489
- std::list<ze_event_pool_handle_t > *ZePoolCache;
490
- if (Event->IsHostVisible ()) {
491
- ZePoolCache = &ZeHostVisibleEventPoolCache;
492
- } else {
493
- ZePoolCache = &ZeDeviceScopeEventPoolCache;
494
- }
488
+ std::list<ze_event_pool_handle_t > *ZePoolCache =
489
+ getZeEventPoolCache (Event->isHostVisible (), Event->isProfilingEnabled ());
495
490
496
491
// Put the empty pool to the cache of the pools.
497
492
std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
@@ -615,9 +610,9 @@ inline static pi_result
615
610
createEventAndAssociateQueue (pi_queue Queue, pi_event *Event,
616
611
pi_command_type CommandType,
617
612
pi_command_list_ptr_t CommandList) {
618
- pi_result Res = piEventCreate (Queue-> Context , Event);
619
- if (Res != PI_SUCCESS)
620
- return Res ;
613
+
614
+ PI_CALL (
615
+ EventCreate (Queue-> Context , Queue, EventsScope == AllHostVisible, Event)) ;
621
616
622
617
(*Event)->Queue = Queue;
623
618
(*Event)->CommandType = CommandType;
@@ -806,13 +801,11 @@ pi_result _pi_context::finalize() {
806
801
// For example, event pool caches would be still alive.
807
802
{
808
803
std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
809
- for (auto &ZePool : ZeDeviceScopeEventPoolCache)
810
- ZE_CALL (zeEventPoolDestroy, (ZePool));
811
- for (auto &ZePool : ZeHostVisibleEventPoolCache)
812
- ZE_CALL (zeEventPoolDestroy, (ZePool));
813
-
814
- ZeDeviceScopeEventPoolCache.clear ();
815
- ZeHostVisibleEventPoolCache.clear ();
804
+ for (auto &ZePoolCache : ZeEventPoolCache) {
805
+ for (auto &ZePool : ZePoolCache)
806
+ ZE_CALL (zeEventPoolDestroy, (ZePool));
807
+ ZePoolCache.clear ();
808
+ }
816
809
}
817
810
818
811
// Destroy the command list used for initializations
@@ -841,8 +834,7 @@ pi_result _pi_context::finalize() {
841
834
842
835
bool _pi_queue::isInOrderQueue () const {
843
836
// If out-of-order queue property is not set, then this is a in-order queue.
844
- return ((this ->PiQueueProperties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ==
845
- 0 );
837
+ return ((this ->Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
846
838
}
847
839
848
840
pi_result _pi_queue::resetCommandList (pi_command_list_ptr_t CommandList,
@@ -1032,11 +1024,10 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] {
1032
1024
_pi_queue::_pi_queue (ze_command_queue_handle_t Queue,
1033
1025
std::vector<ze_command_queue_handle_t > &CopyQueues,
1034
1026
pi_context Context, pi_device Device,
1035
- bool OwnZeCommandQueue,
1036
- pi_queue_properties PiQueueProperties)
1027
+ bool OwnZeCommandQueue, pi_queue_properties Properties)
1037
1028
: ZeComputeCommandQueue{Queue}, ZeCopyCommandQueues{CopyQueues},
1038
1029
Context{Context}, Device{Device}, OwnZeCommandQueue{OwnZeCommandQueue},
1039
- PiQueueProperties (PiQueueProperties ) {
1030
+ Properties (Properties ) {
1040
1031
ComputeCommandBatch.OpenCommandList = CommandListMap.end ();
1041
1032
CopyCommandBatch.OpenCommandList = CommandListMap.end ();
1042
1033
ComputeCommandBatch.QueueBatchSize =
@@ -1350,7 +1341,7 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
1350
1341
// Create a "proxy" host-visible event.
1351
1342
//
1352
1343
pi_event HostVisibleEvent;
1353
- PI_CALL (EventCreate (Context, true , &HostVisibleEvent));
1344
+ PI_CALL (EventCreate (Context, this , true , &HostVisibleEvent));
1354
1345
1355
1346
// Update each command's event in the command-list to "see" this
1356
1347
// proxy event as a host-visible counterpart.
@@ -4955,7 +4946,7 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
4955
4946
die (" getOrCreateHostVisibleEvent: missing host-visible event" );
4956
4947
4957
4948
// Create a "proxy" host-visible event on demand.
4958
- PI_CALL (EventCreate (Context, true , &HostVisibleEvent));
4949
+ PI_CALL (EventCreate (Context, Queue, true , &HostVisibleEvent));
4959
4950
HostVisibleEvent->CleanedUp = true ;
4960
4951
4961
4952
// Submit the command(s) signalling the proxy event to the queue.
@@ -4988,12 +4979,21 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
4988
4979
return PI_SUCCESS;
4989
4980
}
4990
4981
4991
- static pi_result EventCreate (pi_context Context, bool HostVisible,
4992
- pi_event *RetEvent) {
4982
+ // Helper function for creating a PI event.
4983
+ // The "Queue" argument specifies the PI queue where a command is submitted.
4984
+ // The "HostVisible" argument specifies if event needs to be allocated from
4985
+ // a host-visible pool.
4986
+ //
4987
+ static pi_result EventCreate (pi_context Context, pi_queue Queue,
4988
+ bool HostVisible, pi_event *RetEvent) {
4989
+
4990
+ bool ProfilingEnabled =
4991
+ !Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0 ;
4992
+
4993
4993
size_t Index = 0 ;
4994
4994
ze_event_pool_handle_t ZeEventPool = {};
4995
- if (auto Res = Context->getFreeSlotInExistingOrNewPool (ZeEventPool, Index,
4996
- HostVisible))
4995
+ if (auto Res = Context->getFreeSlotInExistingOrNewPool (
4996
+ ZeEventPool, Index, HostVisible, ProfilingEnabled ))
4997
4997
return Res;
4998
4998
4999
4999
ze_event_handle_t ZeEvent;
@@ -5034,8 +5034,9 @@ static pi_result EventCreate(pi_context Context, bool HostVisible,
5034
5034
return PI_SUCCESS;
5035
5035
}
5036
5036
5037
+ // Exteral PI API entry
5037
5038
pi_result piEventCreate (pi_context Context, pi_event *RetEvent) {
5038
- return EventCreate (Context, EventsScope == AllHostVisible, RetEvent);
5039
+ return EventCreate (Context, nullptr , EventsScope == AllHostVisible, RetEvent);
5039
5040
}
5040
5041
5041
5042
pi_result piEventGetInfo (pi_event Event, pi_event_info ParamName,
@@ -5101,6 +5102,11 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
5101
5102
5102
5103
PI_ASSERT (Event, PI_INVALID_EVENT);
5103
5104
5105
+ if (Event->Queue &&
5106
+ (Event->Queue ->Properties & PI_QUEUE_PROFILING_ENABLE) == 0 ) {
5107
+ return PI_PROFILING_INFO_NOT_AVAILABLE;
5108
+ }
5109
+
5104
5110
uint64_t ZeTimerResolution =
5105
5111
Event->Queue
5106
5112
? Event->Queue ->Device ->ZeDeviceProperties ->timerResolution
0 commit comments