Skip to content

Commit 23efc67

Browse files
authored
[Offload] Remove non-blocking allocation type (llvm#159851)
Summary: This was originally added in as a hack to work around CUDA's limitation on allocation. The `libc` implementation now isn't even used for CUDA so this code is never hit. Even if this case, this code never truly worked. A true solution would be to use CUDA's virtual memory API instead to allocate 2MiB slabs independenctly from the normal memory management done in the stream.
1 parent 2dd0edf commit 23efc67

File tree

7 files changed

+3
-30
lines changed

7 files changed

+3
-30
lines changed

offload/include/omptarget.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,6 @@ enum TargetAllocTy : int32_t {
101101
TARGET_ALLOC_HOST,
102102
TARGET_ALLOC_SHARED,
103103
TARGET_ALLOC_DEFAULT,
104-
/// The allocation will not block on other streams.
105-
TARGET_ALLOC_DEVICE_NON_BLOCKING,
106104
};
107105

108106
inline KernelArgsTy CTorDTorKernelArgs = {

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2350,7 +2350,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
23502350
switch (Kind) {
23512351
case TARGET_ALLOC_DEFAULT:
23522352
case TARGET_ALLOC_DEVICE:
2353-
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
23542353
MemoryPool = CoarseGrainedMemoryPools[0];
23552354
break;
23562355
case TARGET_ALLOC_HOST:
@@ -3847,7 +3846,6 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
38473846
switch (Kind) {
38483847
case TARGET_ALLOC_DEFAULT:
38493848
case TARGET_ALLOC_DEVICE:
3850-
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
38513849
MemoryPool = CoarseGrainedMemoryPools[0];
38523850
break;
38533851
case TARGET_ALLOC_HOST:

offload/plugins-nextgen/common/include/ErrorReporting.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ class ErrorReporter {
6161
/// Return a nice name for an TargetAllocTy.
6262
static StringRef getAllocTyName(TargetAllocTy Kind) {
6363
switch (Kind) {
64-
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
6564
case TARGET_ALLOC_DEFAULT:
6665
case TARGET_ALLOC_DEVICE:
6766
return "device memory";

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,7 +1295,6 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
12951295

12961296
switch (Kind) {
12971297
case TARGET_ALLOC_DEFAULT:
1298-
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
12991298
case TARGET_ALLOC_DEVICE:
13001299
if (MemoryManager) {
13011300
Alloc = MemoryManager->allocate(Size, HostPtr);
@@ -1386,7 +1385,6 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
13861385
int Res;
13871386
switch (Kind) {
13881387
case TARGET_ALLOC_DEFAULT:
1389-
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
13901388
case TARGET_ALLOC_DEVICE:
13911389
if (MemoryManager) {
13921390
Res = MemoryManager->free(TgtPtr);

offload/plugins-nextgen/common/src/RPC.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ rpc::Status handleOffloadOpcodes(plugin::GenericDeviceTy &Device,
2828
switch (Port.get_opcode()) {
2929
case LIBC_MALLOC: {
3030
Port.recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
31-
Buffer->data[0] = reinterpret_cast<uintptr_t>(Device.allocate(
32-
Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING));
31+
Buffer->data[0] = reinterpret_cast<uintptr_t>(
32+
Device.allocate(Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE));
3333
});
3434
break;
3535
}
3636
case LIBC_FREE: {
3737
Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
3838
Device.free(reinterpret_cast<void *>(Buffer->data[0]),
39-
TARGET_ALLOC_DEVICE_NON_BLOCKING);
39+
TARGET_ALLOC_DEVICE);
4040
});
4141
break;
4242
}

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -587,16 +587,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
587587
Res = cuMemAllocManaged(&DevicePtr, Size, CU_MEM_ATTACH_GLOBAL);
588588
MemAlloc = (void *)DevicePtr;
589589
break;
590-
case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
591-
CUstream Stream;
592-
if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
593-
break;
594-
if ((Res = cuMemAllocAsync(&DevicePtr, Size, Stream)))
595-
break;
596-
cuStreamSynchronize(Stream);
597-
Res = cuStreamDestroy(Stream);
598-
MemAlloc = (void *)DevicePtr;
599-
}
600590
}
601591

602592
if (auto Err =
@@ -627,15 +617,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
627617
case TARGET_ALLOC_HOST:
628618
Res = cuMemFreeHost(TgtPtr);
629619
break;
630-
case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
631-
CUstream Stream;
632-
if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
633-
break;
634-
cuMemFreeAsync(reinterpret_cast<CUdeviceptr>(TgtPtr), Stream);
635-
cuStreamSynchronize(Stream);
636-
if ((Res = cuStreamDestroy(Stream)))
637-
break;
638-
}
639620
}
640621

641622
if (auto Err = Plugin::check(Res, "error in cuMemFree[Host]: %s")) {

offload/plugins-nextgen/host/src/rtl.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
250250
case TARGET_ALLOC_DEVICE:
251251
case TARGET_ALLOC_HOST:
252252
case TARGET_ALLOC_SHARED:
253-
case TARGET_ALLOC_DEVICE_NON_BLOCKING:
254253
MemAlloc = std::malloc(Size);
255254
break;
256255
}

0 commit comments

Comments
 (0)