@@ -32,8 +32,8 @@ extern "C" {
32
32
// Forward declarartions.
33
33
static pi_result EventRelease (pi_event Event, pi_queue LockedQueue);
34
34
static pi_result QueueRelease (pi_queue Queue, pi_queue LockedQueue);
35
- static pi_result EventCreate (pi_context Context, bool HostVisible ,
36
- pi_event *RetEvent);
35
+ static pi_result EventCreate (pi_context Context, pi_queue Queue ,
36
+ bool HostVisible, pi_event *RetEvent);
37
37
}
38
38
39
39
namespace {
@@ -428,20 +428,13 @@ pi_result _pi_mem::removeMapping(void *MappedTo, Mapping &MapInfo) {
428
428
429
429
pi_result
430
430
_pi_context::getFreeSlotInExistingOrNewPool (ze_event_pool_handle_t &Pool,
431
- size_t &Index, bool HostVisible) {
431
+ size_t &Index, bool HostVisible,
432
+ bool ProfilingEnabled) {
432
433
// Lock while updating event pool machinery.
433
434
std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
434
435
435
- // Setup for host-visible pool as needed.
436
- ze_event_pool_flag_t ZePoolFlag = {};
437
- std::list<ze_event_pool_handle_t > *ZePoolCache;
438
-
439
- if (HostVisible) {
440
- ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
441
- ZePoolCache = &ZeHostVisibleEventPoolCache;
442
- } else {
443
- ZePoolCache = &ZeDeviceScopeEventPoolCache;
444
- }
436
+ std::list<ze_event_pool_handle_t > *ZePoolCache =
437
+ getZeEventPoolCache (HostVisible, ProfilingEnabled);
445
438
446
439
// Remove full pool from the cache.
447
440
if (!ZePoolCache->empty ()) {
@@ -460,7 +453,12 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
460
453
if (*ZePool == nullptr ) {
461
454
ZeStruct<ze_event_pool_desc_t > ZeEventPoolDesc;
462
455
ZeEventPoolDesc.count = MaxNumEventsPerPool;
463
- ZeEventPoolDesc.flags = ZePoolFlag | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
456
+ ZeEventPoolDesc.flags = 0 ;
457
+ if (HostVisible)
458
+ ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
459
+ if (ProfilingEnabled)
460
+ ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
461
+ zePrint (" ze_event_pool_desc_t flags set to: %d\n " , ZeEventPoolDesc.flags );
464
462
465
463
std::vector<ze_device_handle_t > ZeDevices;
466
464
std::for_each (Devices.begin (), Devices.end (),
@@ -486,12 +484,8 @@ pi_result _pi_context::decrementUnreleasedEventsInPool(pi_event Event) {
486
484
return PI_SUCCESS;
487
485
}
488
486
489
- std::list<ze_event_pool_handle_t > *ZePoolCache;
490
- if (Event->IsHostVisible ()) {
491
- ZePoolCache = &ZeHostVisibleEventPoolCache;
492
- } else {
493
- ZePoolCache = &ZeDeviceScopeEventPoolCache;
494
- }
487
+ std::list<ze_event_pool_handle_t > *ZePoolCache =
488
+ getZeEventPoolCache (Event->isHostVisible (), Event->isProfilingEnabled ());
495
489
496
490
// Put the empty pool to the cache of the pools.
497
491
std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
@@ -611,13 +605,15 @@ inline static void piQueueRetainNoLock(pi_queue Queue) { Queue->RefCount++; }
611
605
// \param Event a pointer to hold the newly created pi_event
612
606
// \param CommandType various command type determined by the caller
613
607
// \param CommandList is the command list where the event is added
614
- inline static pi_result
615
- createEventAndAssociateQueue (pi_queue Queue, pi_event *Event,
616
- pi_command_type CommandType,
617
- pi_command_list_ptr_t CommandList) {
618
- pi_result Res = piEventCreate (Queue->Context , Event);
619
- if (Res != PI_SUCCESS)
620
- return Res;
608
+ // \param ForceHostVisible tells if the event must be created in
609
+ // the host-visible pool
610
+ inline static pi_result createEventAndAssociateQueue (
611
+ pi_queue Queue, pi_event *Event, pi_command_type CommandType,
612
+ pi_command_list_ptr_t CommandList, bool ForceHostVisible = false ) {
613
+
614
+ PI_CALL (EventCreate (Queue->Context , Queue,
615
+ ForceHostVisible ? true : EventsScope == AllHostVisible,
616
+ Event));
621
617
622
618
(*Event)->Queue = Queue;
623
619
(*Event)->CommandType = CommandType;
@@ -806,13 +802,11 @@ pi_result _pi_context::finalize() {
806
802
// For example, event pool caches would be still alive.
807
803
{
808
804
std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
809
- for (auto &ZePool : ZeDeviceScopeEventPoolCache)
810
- ZE_CALL (zeEventPoolDestroy, (ZePool));
811
- for (auto &ZePool : ZeHostVisibleEventPoolCache)
812
- ZE_CALL (zeEventPoolDestroy, (ZePool));
813
-
814
- ZeDeviceScopeEventPoolCache.clear ();
815
- ZeHostVisibleEventPoolCache.clear ();
805
+ for (auto &ZePoolCache : ZeEventPoolCache) {
806
+ for (auto &ZePool : ZePoolCache)
807
+ ZE_CALL (zeEventPoolDestroy, (ZePool));
808
+ ZePoolCache.clear ();
809
+ }
816
810
}
817
811
818
812
// Destroy the command list used for initializations
@@ -841,8 +835,7 @@ pi_result _pi_context::finalize() {
841
835
842
836
bool _pi_queue::isInOrderQueue () const {
843
837
// If out-of-order queue property is not set, then this is a in-order queue.
844
- return ((this ->PiQueueProperties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ==
845
- 0 );
838
+ return ((this ->Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
846
839
}
847
840
848
841
pi_result _pi_queue::resetCommandList (pi_command_list_ptr_t CommandList,
@@ -1032,11 +1025,10 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] {
1032
1025
_pi_queue::_pi_queue (ze_command_queue_handle_t Queue,
1033
1026
std::vector<ze_command_queue_handle_t > &CopyQueues,
1034
1027
pi_context Context, pi_device Device,
1035
- bool OwnZeCommandQueue,
1036
- pi_queue_properties PiQueueProperties)
1028
+ bool OwnZeCommandQueue, pi_queue_properties Properties)
1037
1029
: ZeComputeCommandQueue{Queue}, ZeCopyCommandQueues{CopyQueues},
1038
1030
Context{Context}, Device{Device}, OwnZeCommandQueue{OwnZeCommandQueue},
1039
- PiQueueProperties (PiQueueProperties ) {
1031
+ Properties (Properties ) {
1040
1032
ComputeCommandBatch.OpenCommandList = CommandListMap.end ();
1041
1033
CopyCommandBatch.OpenCommandList = CommandListMap.end ();
1042
1034
ComputeCommandBatch.QueueBatchSize =
@@ -1350,7 +1342,10 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
1350
1342
// Create a "proxy" host-visible event.
1351
1343
//
1352
1344
pi_event HostVisibleEvent;
1353
- PI_CALL (EventCreate (Context, true , &HostVisibleEvent));
1345
+ auto Res = createEventAndAssociateQueue (
1346
+ this , &HostVisibleEvent, PI_COMMAND_TYPE_USER, CommandList, true );
1347
+ if (Res)
1348
+ return Res;
1354
1349
1355
1350
// Update each command's event in the command-list to "see" this
1356
1351
// proxy event as a host-visible counterpart.
@@ -1359,10 +1354,14 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
1359
1354
PI_CALL (piEventRetain (HostVisibleEvent));
1360
1355
}
1361
1356
1362
- // Decrement the reference count by 1 so all the remaining references
1363
- // are from the other commands in this batch. This host-visible event
1364
- // will be destroyed after all events in the batch are gone.
1357
+ // Decrement the reference count of the event such that all the remaining
1358
+ // references are from the other commands in this batch. This host-visible
1359
+ // event will not be waited/release by SYCL RT, so it must be destroyed
1360
+ // after all events in the batch are gone.
1365
1361
PI_CALL (piEventRelease (HostVisibleEvent));
1362
+ PI_CALL (piEventRelease (HostVisibleEvent));
1363
+ PI_CALL (piEventRelease (HostVisibleEvent));
1364
+
1366
1365
// Indicate no cleanup is needed for this PI event as it is special.
1367
1366
HostVisibleEvent->CleanedUp = true ;
1368
1367
@@ -2105,7 +2104,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
2105
2104
*NumDevices = ZeDeviceCount;
2106
2105
2107
2106
if (NumEntries == 0 ) {
2108
- // Devices should be nullptr when querying the number of devices
2107
+ // Devices should be nullptr when querying the number of devices.
2109
2108
PI_ASSERT (Devices == nullptr , PI_INVALID_VALUE);
2110
2109
return PI_SUCCESS;
2111
2110
}
@@ -4955,10 +4954,6 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
4955
4954
if (EventsScope != OnDemandHostVisibleProxy)
4956
4955
die (" getOrCreateHostVisibleEvent: missing host-visible event" );
4957
4956
4958
- // Create a "proxy" host-visible event on demand.
4959
- PI_CALL (EventCreate (Context, true , &HostVisibleEvent));
4960
- HostVisibleEvent->CleanedUp = true ;
4961
-
4962
4957
// Submit the command(s) signalling the proxy event to the queue.
4963
4958
// We have to first submit a wait for the device-only event for which this
4964
4959
// proxy is created.
@@ -4975,6 +4970,13 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
4975
4970
Queue, CommandList, false /* UseCopyEngine */ , OkToBatch))
4976
4971
return Res;
4977
4972
4973
+ // Create a "proxy" host-visible event.
4974
+ auto Res = createEventAndAssociateQueue (
4975
+ Queue, &HostVisibleEvent, PI_COMMAND_TYPE_USER, CommandList, true );
4976
+ // HostVisibleEvent->CleanedUp = true;
4977
+ if (Res != PI_SUCCESS)
4978
+ return Res;
4979
+
4978
4980
ZE_CALL (zeCommandListAppendWaitOnEvents,
4979
4981
(CommandList->first , 1 , &ZeEvent));
4980
4982
ZE_CALL (zeCommandListAppendSignalEvent,
@@ -4989,12 +4991,21 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
4989
4991
return PI_SUCCESS;
4990
4992
}
4991
4993
4992
- static pi_result EventCreate (pi_context Context, bool HostVisible,
4993
- pi_event *RetEvent) {
4994
+ // Helper function for creating a PI event.
4995
+ // The "Queue" argument specifies the PI queue where a command is submitted.
4996
+ // The "HostVisible" argument specifies if event needs to be allocated from
4997
+ // a host-visible pool.
4998
+ //
4999
+ static pi_result EventCreate (pi_context Context, pi_queue Queue,
5000
+ bool HostVisible, pi_event *RetEvent) {
5001
+
5002
+ bool ProfilingEnabled =
5003
+ !Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0 ;
5004
+
4994
5005
size_t Index = 0 ;
4995
5006
ze_event_pool_handle_t ZeEventPool = {};
4996
- if (auto Res = Context->getFreeSlotInExistingOrNewPool (ZeEventPool, Index,
4997
- HostVisible))
5007
+ if (auto Res = Context->getFreeSlotInExistingOrNewPool (
5008
+ ZeEventPool, Index, HostVisible, ProfilingEnabled ))
4998
5009
return Res;
4999
5010
5000
5011
ze_event_handle_t ZeEvent;
@@ -5035,8 +5046,9 @@ static pi_result EventCreate(pi_context Context, bool HostVisible,
5035
5046
return PI_SUCCESS;
5036
5047
}
5037
5048
5049
+ // Exteral PI API entry
5038
5050
pi_result piEventCreate (pi_context Context, pi_event *RetEvent) {
5039
- return EventCreate (Context, EventsScope == AllHostVisible, RetEvent);
5051
+ return EventCreate (Context, nullptr , EventsScope == AllHostVisible, RetEvent);
5040
5052
}
5041
5053
5042
5054
pi_result piEventGetInfo (pi_event Event, pi_event_info ParamName,
@@ -5102,6 +5114,11 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
5102
5114
5103
5115
PI_ASSERT (Event, PI_INVALID_EVENT);
5104
5116
5117
+ if (Event->Queue &&
5118
+ (Event->Queue ->Properties & PI_QUEUE_PROFILING_ENABLE) == 0 ) {
5119
+ return PI_PROFILING_INFO_NOT_AVAILABLE;
5120
+ }
5121
+
5105
5122
uint64_t ZeTimerResolution =
5106
5123
Event->Queue
5107
5124
? Event->Queue ->Device ->ZeDeviceProperties ->timerResolution
@@ -5374,7 +5391,7 @@ static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) {
5374
5391
// and release a reference to it.
5375
5392
if (Event->HostVisibleEvent && Event->HostVisibleEvent != Event) {
5376
5393
// Decrement ref-count of the host-visible proxy event.
5377
- PI_CALL (piEventRelease (Event->HostVisibleEvent ));
5394
+ PI_CALL (EventRelease (Event->HostVisibleEvent , LockedQueue ));
5378
5395
}
5379
5396
5380
5397
auto Context = Event->Context ;
0 commit comments