|
1 |
| -//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===// |
| 1 | +//===--------- async_alloc.cpp - Level Zero Adapter -----------------------===// |
2 | 2 | //
|
3 |
| -// Copyright (C) 2024 Intel Corporation |
| 3 | +// Copyright (C) 2025 Intel Corporation |
4 | 4 | //
|
5 | 5 | // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
|
6 | 6 | // Exceptions. See LICENSE.TXT
|
7 | 7 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
8 | 8 | //
|
9 | 9 | //===----------------------------------------------------------------------===//
|
10 | 10 |
|
| 11 | +#include "context.hpp" |
| 12 | +#include "enqueued_pool.hpp" |
| 13 | +#include "event.hpp" |
| 14 | + |
| 15 | +#include "logger/ur_logger.hpp" |
| 16 | + |
| 17 | +#include <umf_helpers.hpp> |
11 | 18 | #include <ur_api.h>
|
12 | 19 |
|
13 | 20 | namespace ur::level_zero {
|
14 | 21 |
|
15 |
| -UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp( |
16 |
| - ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, |
17 |
| - const ur_exp_async_usm_alloc_properties_t *, uint32_t, |
18 |
| - const ur_event_handle_t *, void **, ur_event_handle_t *) { |
19 |
| - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; |
| 22 | +static ur_result_t enqueueUSMAllocHelper( |
| 23 | + ur_queue_handle_t Queue, ur_usm_pool_handle_t Pool, const size_t Size, |
| 24 | + const ur_exp_async_usm_alloc_properties_t *, uint32_t NumEventsInWaitList, |
| 25 | + const ur_event_handle_t *EventWaitList, void **RetMem, |
| 26 | + ur_event_handle_t *OutEvent, ur_usm_type_t Type) { |
| 27 | + |
| 28 | + std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex); |
| 29 | + |
| 30 | + // Allocate USM memory |
| 31 | + ur_usm_pool_handle_t USMPool = nullptr; |
| 32 | + if (Pool) { |
| 33 | + USMPool = Pool; |
| 34 | + } else { |
| 35 | + USMPool = &Queue->Context->AsyncPool; |
| 36 | + } |
| 37 | + |
| 38 | + auto Device = (Type == UR_USM_TYPE_HOST) ? nullptr : Queue->Device; |
| 39 | + |
| 40 | + std::vector<ur_event_handle_t> ExtEventWaitList; |
| 41 | + ur_event_handle_t OriginAllocEvent = nullptr; |
| 42 | + auto AsyncAlloc = |
| 43 | + USMPool->allocateEnqueued(Queue, Device, nullptr, Type, Size); |
| 44 | + if (!AsyncAlloc) { |
| 45 | + auto Ret = |
| 46 | + USMPool->allocate(Queue->Context, Device, nullptr, Type, Size, RetMem); |
| 47 | + if (Ret) { |
| 48 | + return Ret; |
| 49 | + } |
| 50 | + } else { |
| 51 | + *RetMem = std::get<0>(*AsyncAlloc); |
| 52 | + OriginAllocEvent = std::get<1>(*AsyncAlloc); |
| 53 | + if (OriginAllocEvent) { |
| 54 | + for (size_t I = 0; I < NumEventsInWaitList; ++I) { |
| 55 | + ExtEventWaitList.push_back(EventWaitList[I]); |
| 56 | + } |
| 57 | + ExtEventWaitList.push_back(OriginAllocEvent); |
| 58 | + } |
| 59 | + } |
| 60 | + |
| 61 | + if (!ExtEventWaitList.empty()) { |
| 62 | + NumEventsInWaitList = ExtEventWaitList.size(); |
| 63 | + EventWaitList = ExtEventWaitList.data(); |
| 64 | + } |
| 65 | + |
| 66 | + bool UseCopyEngine = false; |
| 67 | + _ur_ze_event_list_t TmpWaitList; |
| 68 | + UR_CALL(TmpWaitList.createAndRetainUrZeEventList( |
| 69 | + NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); |
| 70 | + |
| 71 | + bool OkToBatch = true; |
| 72 | + // Get a new command list to be used on this call |
| 73 | + ur_command_list_ptr_t CommandList{}; |
| 74 | + UR_CALL(Queue->Context->getAvailableCommandList( |
| 75 | + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, |
| 76 | + OkToBatch, nullptr /*ForcedCmdQueue*/)); |
| 77 | + |
| 78 | + ze_event_handle_t ZeEvent = nullptr; |
| 79 | + ur_event_handle_t InternalEvent{}; |
| 80 | + bool IsInternal = OutEvent == nullptr; |
| 81 | + ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; |
| 82 | + |
| 83 | + ur_command_t CommandType = UR_COMMAND_FORCE_UINT32; |
| 84 | + switch (Type) { |
| 85 | + case UR_USM_TYPE_HOST: |
| 86 | + CommandType = UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP; |
| 87 | + break; |
| 88 | + case UR_USM_TYPE_DEVICE: |
| 89 | + CommandType = UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP; |
| 90 | + break; |
| 91 | + case UR_USM_TYPE_SHARED: |
| 92 | + CommandType = UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP; |
| 93 | + break; |
| 94 | + default: |
| 95 | + logger::error("enqueueUSMAllocHelper: unsupported USM type"); |
| 96 | + throw UR_RESULT_ERROR_UNKNOWN; |
| 97 | + } |
| 98 | + UR_CALL(createEventAndAssociateQueue(Queue, Event, CommandType, CommandList, |
| 99 | + IsInternal, false)); |
| 100 | + ZeEvent = (*Event)->ZeEvent; |
| 101 | + (*Event)->WaitList = TmpWaitList; |
| 102 | + (*Event)->OriginAllocEvent = OriginAllocEvent; |
| 103 | + |
| 104 | + const auto &ZeCommandList = CommandList->first; |
| 105 | + const auto &WaitList = (*Event)->WaitList; |
| 106 | + if (WaitList.Length) { |
| 107 | + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, |
| 108 | + (ZeCommandList, WaitList.Length, WaitList.ZeEventList)); |
| 109 | + } |
| 110 | + |
| 111 | + // Signal that USM allocation event was finished |
| 112 | + ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandList->first, ZeEvent)); |
| 113 | + |
| 114 | + UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch)); |
| 115 | + |
| 116 | + return UR_RESULT_SUCCESS; |
| 117 | +} |
| 118 | + |
| 119 | +ur_result_t urEnqueueUSMDeviceAllocExp( |
| 120 | + ur_queue_handle_t Queue, ///< [in] handle of the queue object |
| 121 | + ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor |
| 122 | + const size_t Size, ///< [in] minimum size in bytes of the USM memory object |
| 123 | + ///< to be allocated |
| 124 | + const ur_exp_async_usm_alloc_properties_t |
| 125 | + *Properties, ///< [in][optional] pointer to the enqueue asynchronous |
| 126 | + ///< USM allocation properties |
| 127 | + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list |
| 128 | + const ur_event_handle_t |
| 129 | + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] |
| 130 | + ///< pointer to a list of events that must be complete |
| 131 | + ///< before the kernel execution. If nullptr, the |
| 132 | + ///< numEventsInWaitList must be 0, indicating no wait |
| 133 | + ///< events. |
| 134 | + void **Mem, ///< [out] pointer to USM memory object |
| 135 | + ur_event_handle_t *OutEvent ///< [out][optional] return an event object that |
| 136 | + ///< identifies the async alloc |
| 137 | +) { |
| 138 | + return enqueueUSMAllocHelper(Queue, Pool, Size, Properties, |
| 139 | + NumEventsInWaitList, EventWaitList, Mem, |
| 140 | + OutEvent, UR_USM_TYPE_DEVICE); |
20 | 141 | }
|
21 | 142 |
|
22 |
| -UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( |
23 |
| - ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, |
24 |
| - const ur_exp_async_usm_alloc_properties_t *, uint32_t, |
25 |
| - const ur_event_handle_t *, void **, ur_event_handle_t *) { |
26 |
| - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; |
| 143 | +ur_result_t urEnqueueUSMSharedAllocExp( |
| 144 | + ur_queue_handle_t Queue, ///< [in] handle of the queue object |
| 145 | + ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor |
| 146 | + const size_t Size, ///< [in] minimum size in bytes of the USM memory object |
| 147 | + ///< to be allocated |
| 148 | + const ur_exp_async_usm_alloc_properties_t |
| 149 | + *Properties, ///< [in][optional] pointer to the enqueue asynchronous |
| 150 | + ///< USM allocation properties |
| 151 | + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list |
| 152 | + const ur_event_handle_t |
| 153 | + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] |
| 154 | + ///< pointer to a list of events that must be complete |
| 155 | + ///< before the kernel execution. If nullptr, the |
| 156 | + ///< numEventsInWaitList must be 0, indicating no wait |
| 157 | + ///< events. |
| 158 | + void **Mem, ///< [out] pointer to USM memory object |
| 159 | + ur_event_handle_t *OutEvent ///< [out][optional] return an event object that |
| 160 | + ///< identifies the async alloc |
| 161 | +) { |
| 162 | + return enqueueUSMAllocHelper(Queue, Pool, Size, Properties, |
| 163 | + NumEventsInWaitList, EventWaitList, Mem, |
| 164 | + OutEvent, UR_USM_TYPE_SHARED); |
27 | 165 | }
|
28 | 166 |
|
29 |
| -UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( |
30 |
| - ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, |
31 |
| - const ur_exp_async_usm_alloc_properties_t *, uint32_t, |
32 |
| - const ur_event_handle_t *, void **, ur_event_handle_t *) { |
33 |
| - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; |
| 167 | +ur_result_t urEnqueueUSMHostAllocExp( |
| 168 | + ur_queue_handle_t Queue, ///< [in] handle of the queue object |
| 169 | + ur_usm_pool_handle_t Pool, ///< [in][optional] handle of the USM memory pool |
| 170 | + const size_t Size, ///< [in] minimum size in bytes of the USM memory object |
| 171 | + ///< to be allocated |
| 172 | + const ur_exp_async_usm_alloc_properties_t |
| 173 | + *Properties, ///< [in][optional] pointer to the enqueue asynchronous |
| 174 | + ///< USM allocation properties |
| 175 | + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list |
| 176 | + const ur_event_handle_t |
| 177 | + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] |
| 178 | + ///< pointer to a list of events that must be complete |
| 179 | + ///< before the kernel execution. If nullptr, the |
| 180 | + ///< numEventsInWaitList must be 0, indicating no wait |
| 181 | + ///< events. |
| 182 | + void **Mem, ///< [out] pointer to USM memory object |
| 183 | + ur_event_handle_t |
| 184 | + *OutEvent ///< [out][optional] return an event object that identifies |
| 185 | + ///< the asynchronous USM device allocation |
| 186 | +) { |
| 187 | + return enqueueUSMAllocHelper(Queue, Pool, Size, Properties, |
| 188 | + NumEventsInWaitList, EventWaitList, Mem, |
| 189 | + OutEvent, UR_USM_TYPE_HOST); |
34 | 190 | }
|
35 | 191 |
|
36 |
| -UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t, |
37 |
| - ur_usm_pool_handle_t, void *, |
38 |
| - uint32_t, |
39 |
| - const ur_event_handle_t *, |
40 |
| - ur_event_handle_t *) { |
41 |
| - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; |
| 192 | +ur_result_t urEnqueueUSMFreeExp( |
| 193 | + ur_queue_handle_t Queue, ///< [in] handle of the queue object |
| 194 | + ur_usm_pool_handle_t, ///< [in][optional] USM pool descriptor |
| 195 | + void *Mem, ///< [in] pointer to USM memory object |
| 196 | + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list |
| 197 | + const ur_event_handle_t |
| 198 | + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] |
| 199 | + ///< pointer to a list of events that must be complete |
| 200 | + ///< before the kernel execution. If nullptr, the |
| 201 | + ///< numEventsInWaitList must be 0, indicating no wait |
| 202 | + ///< events. |
| 203 | + ur_event_handle_t *OutEvent ///< [out][optional] return an event object that |
| 204 | + ///< identifies the async alloc |
| 205 | +) { |
| 206 | + std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex); |
| 207 | + |
| 208 | + bool UseCopyEngine = false; |
| 209 | + _ur_ze_event_list_t TmpWaitList; |
| 210 | + UR_CALL(TmpWaitList.createAndRetainUrZeEventList( |
| 211 | + NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); |
| 212 | + |
| 213 | + bool OkToBatch = false; |
| 214 | + // Get a new command list to be used on this call |
| 215 | + ur_command_list_ptr_t CommandList{}; |
| 216 | + UR_CALL(Queue->Context->getAvailableCommandList( |
| 217 | + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, |
| 218 | + OkToBatch, nullptr /*ForcedCmdQueue*/)); |
| 219 | + |
| 220 | + ze_event_handle_t ZeEvent = nullptr; |
| 221 | + ur_event_handle_t InternalEvent{}; |
| 222 | + bool IsInternal = OutEvent == nullptr; |
| 223 | + ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; |
| 224 | + |
| 225 | + UR_CALL(createEventAndAssociateQueue(Queue, Event, |
| 226 | + UR_COMMAND_ENQUEUE_USM_FREE_EXP, |
| 227 | + CommandList, IsInternal, false)); |
| 228 | + ZeEvent = (*Event)->ZeEvent; |
| 229 | + (*Event)->WaitList = TmpWaitList; |
| 230 | + |
| 231 | + const auto &ZeCommandList = CommandList->first; |
| 232 | + const auto &WaitList = (*Event)->WaitList; |
| 233 | + if (WaitList.Length) { |
| 234 | + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, |
| 235 | + (ZeCommandList, WaitList.Length, WaitList.ZeEventList)); |
| 236 | + } |
| 237 | + |
| 238 | + auto hPool = umfPoolByPtr(Mem); |
| 239 | + if (!hPool) { |
| 240 | + return USMFreeHelper(Queue->Context, Mem); |
| 241 | + } |
| 242 | + |
| 243 | + UsmPool *usmPool = nullptr; |
| 244 | + auto ret = umfPoolGetTag(hPool, (void **)&usmPool); |
| 245 | + if (ret != UMF_RESULT_SUCCESS || usmPool == nullptr) { |
| 246 | + return USMFreeHelper(Queue->Context, Mem); |
| 247 | + } |
| 248 | + |
| 249 | + size_t size = umfPoolMallocUsableSize(hPool, Mem); |
| 250 | + usmPool->AsyncPool.insert(Mem, size, *Event, Queue); |
| 251 | + |
| 252 | + // Signal that USM free event was finished |
| 253 | + ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); |
| 254 | + |
| 255 | + UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch)); |
| 256 | + |
| 257 | + return UR_RESULT_SUCCESS; |
42 | 258 | }
|
43 | 259 | } // namespace ur::level_zero
|
0 commit comments