Skip to content

Commit 1b844bf

Browse files
authored
[SYCL][L0][Plugin] Call ZeCommandQueueCreate on demand (#5109)
Signed-off-by: Arvind Sudarsanam <arvind.sudarsanam@intel.com>
1 parent c4a7290 commit 1b844bf

File tree

2 files changed

+71
-32
lines changed

2 files changed

+71
-32
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,8 +1199,11 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
11991199
zePrint("Command list to be executed on copy engine\n");
12001200
// If available, get the copy command queue assosciated with
12011201
// ZeCommandList
1202-
auto ZeCopyCommandQueue =
1203-
(Index == -1) ? nullptr : ZeCopyCommandQueues[Index];
1202+
ze_command_queue_handle_t ZeCopyCommandQueue = nullptr;
1203+
if (Index != -1) {
1204+
if (auto Res = getOrCreateCopyCommandQueue(Index, ZeCopyCommandQueue))
1205+
return Res;
1206+
}
12041207
auto &ZeCommandQueue =
12051208
(UseCopyEngine) ? ZeCopyCommandQueue : ZeComputeCommandQueue;
12061209
// Scope of the lock must be till the end of the function, otherwise new mem
@@ -1262,6 +1265,42 @@ bool _pi_queue::isBatchingAllowed() {
12621265
return (this->QueueBatchSize > 0 && ((ZeSerialize & ZeSerializeBlock) == 0));
12631266
}
12641267

1268+
pi_result _pi_queue::getOrCreateCopyCommandQueue(
1269+
int Index, ze_command_queue_handle_t &ZeCopyCommandQueue) {
1270+
ZeCopyCommandQueue = nullptr;
1271+
1272+
// Make sure 'Index' is within limits
1273+
PI_ASSERT((Index >= 0) && (Index < (int)(ZeCopyCommandQueues.size())),
1274+
PI_INVALID_VALUE);
1275+
1276+
// Return the Ze copy command queue, if already available
1277+
if (ZeCopyCommandQueues[Index]) {
1278+
ZeCopyCommandQueue = ZeCopyCommandQueues[Index];
1279+
return PI_SUCCESS;
1280+
}
1281+
1282+
// Ze copy command queue is not available at 'Index'. So we create it below.
1283+
ZeStruct<ze_command_queue_desc_t> ZeCommandQueueDesc;
1284+
ZeCommandQueueDesc.ordinal = (Index == 0) ? Device->ZeMainCopyQueueGroupIndex
1285+
: Device->ZeLinkCopyQueueGroupIndex;
1286+
// There are two copy queues: main copy queues and link copy queues.
1287+
// ZeCommandQueueDesc.index is the index into the list of main (or link)
1288+
// copy queues. (Index == 0) means we are using the main copy queue and
1289+
// ZeCommandQueueDesc.index is set to 0. Otherwise, we use one of the link
1290+
// copy queues and ZeCommandQueueDesc.index is set to (Index - 1) as Index
1291+
// for link copy engines in the overall list starts from 1.
1292+
ZeCommandQueueDesc.index = (Index == 0) ? 0 : Index - 1;
1293+
zePrint("NOTE: Copy Engine ZeCommandQueueDesc.ordinal = %d, "
1294+
"ZeCommandQueueDesc.index = %d\n",
1295+
ZeCommandQueueDesc.ordinal, ZeCommandQueueDesc.index);
1296+
ZE_CALL(zeCommandQueueCreate,
1297+
(Context->ZeContext, Device->ZeDevice,
1298+
&ZeCommandQueueDesc, // TODO: translate properties
1299+
&ZeCopyCommandQueue));
1300+
ZeCopyCommandQueues[Index] = ZeCopyCommandQueue;
1301+
return PI_SUCCESS;
1302+
}
1303+
12651304
// This function will return one of possibly multiple available copy queues.
12661305
// Currently, a round robin strategy is used.
12671306
// This function also sends back the value of CopyQueueIndex and
@@ -1293,7 +1332,10 @@ _pi_queue::getZeCopyCommandQueue(int *CopyQueueIndex,
12931332
if (CopyQueueGroupIndex)
12941333
*CopyQueueGroupIndex = Device->ZeMainCopyQueueGroupIndex;
12951334
zePrint("Note: CopyQueueIndex = %d\n", *CopyQueueIndex);
1296-
return ZeCopyCommandQueues[0];
1335+
ze_command_queue_handle_t ZeCopyCommandQueue = nullptr;
1336+
if (getOrCreateCopyCommandQueue(0, ZeCopyCommandQueue))
1337+
return nullptr;
1338+
return ZeCopyCommandQueue;
12971339
}
12981340

12991341
// Round robin logic is used here to access copy command queues.
@@ -1319,7 +1361,10 @@ _pi_queue::getZeCopyCommandQueue(int *CopyQueueIndex,
13191361
((*CopyQueueIndex == 0) && Device->hasMainCopyEngine())
13201362
? Device->ZeMainCopyQueueGroupIndex
13211363
: Device->ZeLinkCopyQueueGroupIndex;
1322-
return ZeCopyCommandQueues[*CopyQueueIndex];
1364+
ze_command_queue_handle_t ZeCopyCommandQueue = nullptr;
1365+
if (getOrCreateCopyCommandQueue(*CopyQueueIndex, ZeCopyCommandQueue))
1366+
return nullptr;
1367+
return ZeCopyCommandQueue;
13231368
}
13241369

13251370
pi_result _pi_queue::executeOpenCommandList() {
@@ -2805,39 +2850,21 @@ pi_result piQueueCreate(pi_context Context, pi_device Device,
28052850

28062851
std::vector<ze_command_queue_handle_t> ZeCopyCommandQueues;
28072852

2808-
// Create queue to main copy engine
2853+
// Create a placeholder in ZeCopyCommandQueues for a queue that will be used
2854+
// to submit commands to main copy engine. This queue is initially NULL and
2855+
// will be replaced by the Ze Command Queue which gets created just before its
2856+
// first use.
28092857
ze_command_queue_handle_t ZeMainCopyCommandQueue = nullptr;
28102858
if (Device->hasMainCopyEngine()) {
2811-
zePrint("NOTE: Main Copy Engine ZeCommandQueueDesc.ordinal = %d, "
2812-
"ZeCommandQueueDesc.index = %d\n",
2813-
Device->ZeMainCopyQueueGroupIndex, 0);
2814-
ZeCommandQueueDesc.ordinal = Device->ZeMainCopyQueueGroupIndex;
2815-
ZeCommandQueueDesc.index = 0;
2816-
ZE_CALL(zeCommandQueueCreate,
2817-
(Context->ZeContext, ZeDevice,
2818-
&ZeCommandQueueDesc, // TODO: translate properties
2819-
&ZeMainCopyCommandQueue));
2820-
// Main Copy Command Queue is pushed at start of ZeCopyCommandQueues
2821-
// vector.
28222859
ZeCopyCommandQueues.push_back(ZeMainCopyCommandQueue);
28232860
}
2824-
PI_ASSERT(Queue, PI_INVALID_QUEUE);
28252861

2826-
// Create additional queues to link copy engines and push them into
2827-
// ZeCopyCommandQueues vector.
2862+
// Create additional 'placeholder queues' to link copy engines and push them
2863+
// into ZeCopyCommandQueues.
28282864
if (Device->hasLinkCopyEngine()) {
28292865
auto ZeNumLinkCopyQueues = Device->ZeLinkCopyQueueGroupProperties.numQueues;
28302866
for (uint32_t i = 0; i < ZeNumLinkCopyQueues; ++i) {
2831-
zePrint("NOTE: Link Copy Engine ZeCommandQueueDesc.ordinal = %d, "
2832-
"ZeCommandQueueDesc.index = %d\n",
2833-
Device->ZeLinkCopyQueueGroupIndex, i);
28342867
ze_command_queue_handle_t ZeLinkCopyCommandQueue = nullptr;
2835-
ZeCommandQueueDesc.ordinal = Device->ZeLinkCopyQueueGroupIndex;
2836-
ZeCommandQueueDesc.index = i;
2837-
ZE_CALL(zeCommandQueueCreate,
2838-
(Context->ZeContext, ZeDevice,
2839-
&ZeCommandQueueDesc, // TODO: translate properties
2840-
&ZeLinkCopyCommandQueue));
28412868
ZeCopyCommandQueues.push_back(ZeLinkCopyCommandQueue);
28422869
}
28432870
}
@@ -2919,7 +2946,8 @@ pi_result piQueueRelease(pi_queue Queue) {
29192946
// Make sure all commands get executed.
29202947
ZE_CALL(zeHostSynchronize, (Queue->ZeComputeCommandQueue));
29212948
for (uint32_t i = 0; i < Queue->ZeCopyCommandQueues.size(); ++i) {
2922-
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
2949+
if (Queue->ZeCopyCommandQueues[i])
2950+
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
29232951
}
29242952

29252953
// Destroy all the fences created associated with this queue.
@@ -2960,7 +2988,8 @@ static pi_result QueueRelease(pi_queue Queue, pi_queue LockedQueue) {
29602988
if (Queue->OwnZeCommandQueue) {
29612989
ZE_CALL(zeCommandQueueDestroy, (Queue->ZeComputeCommandQueue));
29622990
for (uint32_t i = 0; i < Queue->ZeCopyCommandQueues.size(); ++i) {
2963-
ZE_CALL(zeCommandQueueDestroy, (Queue->ZeCopyCommandQueues[i]));
2991+
if (Queue->ZeCopyCommandQueues[i])
2992+
ZE_CALL(zeCommandQueueDestroy, (Queue->ZeCopyCommandQueues[i]));
29642993
}
29652994
}
29662995

@@ -2993,7 +3022,8 @@ pi_result piQueueFinish(pi_queue Queue) {
29933022

29943023
ZE_CALL(zeHostSynchronize, (Queue->ZeComputeCommandQueue));
29953024
for (uint32_t i = 0; i < Queue->ZeCopyCommandQueues.size(); ++i) {
2996-
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
3025+
if (Queue->ZeCopyCommandQueues[i])
3026+
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
29973027
}
29983028

29993029
return PI_SUCCESS;
@@ -5389,7 +5419,8 @@ pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList,
53895419

53905420
ZE_CALL(zeHostSynchronize, (Queue->ZeComputeCommandQueue));
53915421
for (uint32_t i = 0; i < Queue->ZeCopyCommandQueues.size(); ++i) {
5392-
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
5422+
if (Queue->ZeCopyCommandQueues[i])
5423+
ZE_CALL(zeHostSynchronize, (Queue->ZeCopyCommandQueues[i]));
53935424
}
53945425

53955426
Queue->LastCommandEvent = *Event;

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,14 @@ struct _pi_queue : _pi_object {
621621
// link copy engines, if available.
622622
std::vector<ze_command_queue_handle_t> ZeCopyCommandQueues;
623623

624+
// This function will check if a Ze copy command queue is available in
625+
// ZeCopyCommandQueues at index 'Index'.
626+
// If available, it will return the queue. Otherwise, it will create a new
627+
// Ze copy command queue and return a newly created queue.
628+
pi_result
629+
getOrCreateCopyCommandQueue(int Index,
630+
ze_command_queue_handle_t &ZeCopyCommandQueue);
631+
624632
// One of the many available copy command queues will be used for
625633
// submitting command lists to. This variable stores index of the last used
626634
// copy command queue in the ZeCopyCommandQueues vector.

0 commit comments

Comments
 (0)