@@ -1199,8 +1199,11 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
1199
1199
zePrint (" Command list to be executed on copy engine\n " );
1200
1200
// If available, get the copy command queue assosciated with
1201
1201
// ZeCommandList
1202
- auto ZeCopyCommandQueue =
1203
- (Index == -1 ) ? nullptr : ZeCopyCommandQueues[Index];
1202
+ ze_command_queue_handle_t ZeCopyCommandQueue = nullptr ;
1203
+ if (Index != -1 ) {
1204
+ if (auto Res = getOrCreateCopyCommandQueue (Index, ZeCopyCommandQueue))
1205
+ return Res;
1206
+ }
1204
1207
auto &ZeCommandQueue =
1205
1208
(UseCopyEngine) ? ZeCopyCommandQueue : ZeComputeCommandQueue;
1206
1209
// Scope of the lock must be till the end of the function, otherwise new mem
@@ -1262,6 +1265,42 @@ bool _pi_queue::isBatchingAllowed() {
1262
1265
return (this ->QueueBatchSize > 0 && ((ZeSerialize & ZeSerializeBlock) == 0 ));
1263
1266
}
1264
1267
1268
+ pi_result _pi_queue::getOrCreateCopyCommandQueue (
1269
+ int Index, ze_command_queue_handle_t &ZeCopyCommandQueue) {
1270
+ ZeCopyCommandQueue = nullptr ;
1271
+
1272
+ // Make sure 'Index' is within limits
1273
+ PI_ASSERT ((Index >= 0 ) && (Index < (int )(ZeCopyCommandQueues.size ())),
1274
+ PI_INVALID_VALUE);
1275
+
1276
+ // Return the Ze copy command queue, if already available
1277
+ if (ZeCopyCommandQueues[Index]) {
1278
+ ZeCopyCommandQueue = ZeCopyCommandQueues[Index];
1279
+ return PI_SUCCESS;
1280
+ }
1281
+
1282
+ // Ze copy command queue is not available at 'Index'. So we create it below.
1283
+ ZeStruct<ze_command_queue_desc_t > ZeCommandQueueDesc;
1284
+ ZeCommandQueueDesc.ordinal = (Index == 0 ) ? Device->ZeMainCopyQueueGroupIndex
1285
+ : Device->ZeLinkCopyQueueGroupIndex ;
1286
+ // There are two copy queues: main copy queues and link copy queues.
1287
+ // ZeCommandQueueDesc.index is the index into the list of main (or link)
1288
+ // copy queues. (Index == 0) means we are using the main copy queue and
1289
+ // ZeCommandQueueDesc.index is set to 0. Otherwise, we use one of the link
1290
+ // copy queues and ZeCommandQueueDesc.index is set to (Index - 1) as Index
1291
+ // for link copy engines in the overall list starts from 1.
1292
+ ZeCommandQueueDesc.index = (Index == 0 ) ? 0 : Index - 1 ;
1293
+ zePrint (" NOTE: Copy Engine ZeCommandQueueDesc.ordinal = %d, "
1294
+ " ZeCommandQueueDesc.index = %d\n " ,
1295
+ ZeCommandQueueDesc.ordinal , ZeCommandQueueDesc.index );
1296
+ ZE_CALL (zeCommandQueueCreate,
1297
+ (Context->ZeContext , Device->ZeDevice ,
1298
+ &ZeCommandQueueDesc, // TODO: translate properties
1299
+ &ZeCopyCommandQueue));
1300
+ ZeCopyCommandQueues[Index] = ZeCopyCommandQueue;
1301
+ return PI_SUCCESS;
1302
+ }
1303
+
1265
1304
// This function will return one of possibly multiple available copy queues.
1266
1305
// Currently, a round robin strategy is used.
1267
1306
// This function also sends back the value of CopyQueueIndex and
@@ -1293,7 +1332,10 @@ _pi_queue::getZeCopyCommandQueue(int *CopyQueueIndex,
1293
1332
if (CopyQueueGroupIndex)
1294
1333
*CopyQueueGroupIndex = Device->ZeMainCopyQueueGroupIndex ;
1295
1334
zePrint (" Note: CopyQueueIndex = %d\n " , *CopyQueueIndex);
1296
- return ZeCopyCommandQueues[0 ];
1335
+ ze_command_queue_handle_t ZeCopyCommandQueue = nullptr ;
1336
+ if (getOrCreateCopyCommandQueue (0 , ZeCopyCommandQueue))
1337
+ return nullptr ;
1338
+ return ZeCopyCommandQueue;
1297
1339
}
1298
1340
1299
1341
// Round robin logic is used here to access copy command queues.
@@ -1319,7 +1361,10 @@ _pi_queue::getZeCopyCommandQueue(int *CopyQueueIndex,
1319
1361
((*CopyQueueIndex == 0 ) && Device->hasMainCopyEngine ())
1320
1362
? Device->ZeMainCopyQueueGroupIndex
1321
1363
: Device->ZeLinkCopyQueueGroupIndex ;
1322
- return ZeCopyCommandQueues[*CopyQueueIndex];
1364
+ ze_command_queue_handle_t ZeCopyCommandQueue = nullptr ;
1365
+ if (getOrCreateCopyCommandQueue (*CopyQueueIndex, ZeCopyCommandQueue))
1366
+ return nullptr ;
1367
+ return ZeCopyCommandQueue;
1323
1368
}
1324
1369
1325
1370
pi_result _pi_queue::executeOpenCommandList () {
@@ -2805,39 +2850,21 @@ pi_result piQueueCreate(pi_context Context, pi_device Device,
2805
2850
2806
2851
std::vector<ze_command_queue_handle_t > ZeCopyCommandQueues;
2807
2852
2808
- // Create queue to main copy engine
2853
+ // Create a placeholder in ZeCopyCommandQueues for a queue that will be used
2854
+ // to submit commands to main copy engine. This queue is initially NULL and
2855
+ // will be replaced by the Ze Command Queue which gets created just before its
2856
+ // first use.
2809
2857
ze_command_queue_handle_t ZeMainCopyCommandQueue = nullptr ;
2810
2858
if (Device->hasMainCopyEngine ()) {
2811
- zePrint (" NOTE: Main Copy Engine ZeCommandQueueDesc.ordinal = %d, "
2812
- " ZeCommandQueueDesc.index = %d\n " ,
2813
- Device->ZeMainCopyQueueGroupIndex , 0 );
2814
- ZeCommandQueueDesc.ordinal = Device->ZeMainCopyQueueGroupIndex ;
2815
- ZeCommandQueueDesc.index = 0 ;
2816
- ZE_CALL (zeCommandQueueCreate,
2817
- (Context->ZeContext , ZeDevice,
2818
- &ZeCommandQueueDesc, // TODO: translate properties
2819
- &ZeMainCopyCommandQueue));
2820
- // Main Copy Command Queue is pushed at start of ZeCopyCommandQueues
2821
- // vector.
2822
2859
ZeCopyCommandQueues.push_back (ZeMainCopyCommandQueue);
2823
2860
}
2824
- PI_ASSERT (Queue, PI_INVALID_QUEUE);
2825
2861
2826
- // Create additional queues to link copy engines and push them into
2827
- // ZeCopyCommandQueues vector .
2862
+ // Create additional 'placeholder queues' to link copy engines and push them
2863
+ // into ZeCopyCommandQueues .
2828
2864
if (Device->hasLinkCopyEngine ()) {
2829
2865
auto ZeNumLinkCopyQueues = Device->ZeLinkCopyQueueGroupProperties .numQueues ;
2830
2866
for (uint32_t i = 0 ; i < ZeNumLinkCopyQueues; ++i) {
2831
- zePrint (" NOTE: Link Copy Engine ZeCommandQueueDesc.ordinal = %d, "
2832
- " ZeCommandQueueDesc.index = %d\n " ,
2833
- Device->ZeLinkCopyQueueGroupIndex , i);
2834
2867
ze_command_queue_handle_t ZeLinkCopyCommandQueue = nullptr ;
2835
- ZeCommandQueueDesc.ordinal = Device->ZeLinkCopyQueueGroupIndex ;
2836
- ZeCommandQueueDesc.index = i;
2837
- ZE_CALL (zeCommandQueueCreate,
2838
- (Context->ZeContext , ZeDevice,
2839
- &ZeCommandQueueDesc, // TODO: translate properties
2840
- &ZeLinkCopyCommandQueue));
2841
2868
ZeCopyCommandQueues.push_back (ZeLinkCopyCommandQueue);
2842
2869
}
2843
2870
}
@@ -2919,7 +2946,8 @@ pi_result piQueueRelease(pi_queue Queue) {
2919
2946
// Make sure all commands get executed.
2920
2947
ZE_CALL (zeHostSynchronize, (Queue->ZeComputeCommandQueue ));
2921
2948
for (uint32_t i = 0 ; i < Queue->ZeCopyCommandQueues .size (); ++i) {
2922
- ZE_CALL (zeHostSynchronize, (Queue->ZeCopyCommandQueues [i]));
2949
+ if (Queue->ZeCopyCommandQueues [i])
2950
+ ZE_CALL (zeHostSynchronize, (Queue->ZeCopyCommandQueues [i]));
2923
2951
}
2924
2952
2925
2953
// Destroy all the fences created associated with this queue.
@@ -2960,7 +2988,8 @@ static pi_result QueueRelease(pi_queue Queue, pi_queue LockedQueue) {
2960
2988
if (Queue->OwnZeCommandQueue ) {
2961
2989
ZE_CALL (zeCommandQueueDestroy, (Queue->ZeComputeCommandQueue ));
2962
2990
for (uint32_t i = 0 ; i < Queue->ZeCopyCommandQueues .size (); ++i) {
2963
- ZE_CALL (zeCommandQueueDestroy, (Queue->ZeCopyCommandQueues [i]));
2991
+ if (Queue->ZeCopyCommandQueues [i])
2992
+ ZE_CALL (zeCommandQueueDestroy, (Queue->ZeCopyCommandQueues [i]));
2964
2993
}
2965
2994
}
2966
2995
@@ -2993,7 +3022,8 @@ pi_result piQueueFinish(pi_queue Queue) {
2993
3022
2994
3023
ZE_CALL (zeHostSynchronize, (Queue->ZeComputeCommandQueue ));
2995
3024
for (uint32_t i = 0 ; i < Queue->ZeCopyCommandQueues .size (); ++i) {
2996
- ZE_CALL (zeHostSynchronize, (Queue->ZeCopyCommandQueues [i]));
3025
+ if (Queue->ZeCopyCommandQueues [i])
3026
+ ZE_CALL (zeHostSynchronize, (Queue->ZeCopyCommandQueues [i]));
2997
3027
}
2998
3028
2999
3029
return PI_SUCCESS;
@@ -5389,7 +5419,8 @@ pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList,
5389
5419
5390
5420
ZE_CALL (zeHostSynchronize, (Queue->ZeComputeCommandQueue ));
5391
5421
for (uint32_t i = 0 ; i < Queue->ZeCopyCommandQueues .size (); ++i) {
5392
- ZE_CALL (zeHostSynchronize, (Queue->ZeCopyCommandQueues [i]));
5422
+ if (Queue->ZeCopyCommandQueues [i])
5423
+ ZE_CALL (zeHostSynchronize, (Queue->ZeCopyCommandQueues [i]));
5393
5424
}
5394
5425
5395
5426
Queue->LastCommandEvent = *Event;
0 commit comments