Skip to content

Commit eaa00ef

Browse files
committed
[OpenMP] Map omp_default_mem_alloc to global memory
Summary: Currently, we assign this to private memory. This causes failures on some SOLLVE tests. The standard isn't clear on the semantics of this allocation type, but there seems to be a consensus that it's supposed to be shared memory.
1 parent b05c554 commit eaa00ef

File tree

3 files changed

+24
-18
lines changed

3 files changed

+24
-18
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,15 +2048,15 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
20482048
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
20492049
auto AS = LangAS::Default;
20502050
switch (A->getAllocatorType()) {
2051-
// Use the default allocator here as by default local vars are
2052-
// threadlocal.
20532051
case OMPAllocateDeclAttr::OMPNullMemAlloc:
20542052
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
2055-
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
20562053
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
20572054
case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
2058-
// Follow the user decision - use default allocation.
2059-
return Address::invalid();
2055+
AS = LangAS::opencl_global;
2056+
break;
2057+
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
2058+
AS = LangAS::opencl_private;
2059+
break;
20602060
case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
20612061
// TODO: implement aupport for user-defined allocators.
20622062
return Address::invalid();
@@ -2208,12 +2208,14 @@ bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
22082208
case OMPAllocateDeclAttr::OMPNullMemAlloc:
22092209
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
22102210
// Not supported, fallback to the default mem space.
2211-
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
22122211
case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
22132212
case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
22142213
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
22152214
case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
2216-
AS = LangAS::Default;
2215+
AS = LangAS::opencl_global;
2216+
return true;
2217+
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
2218+
AS = LangAS::opencl_private;
22172219
return true;
22182220
case OMPAllocateDeclAttr::OMPConstMemAlloc:
22192221
AS = LangAS::cuda_constant;

clang/test/OpenMP/nvptx_allocate_codegen.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,18 +87,17 @@ void bar() {
8787
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
8888
// CHECK1-NEXT: entry:
8989
// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
90-
// CHECK1-NEXT: [[B:%.*]] = alloca double, align 8
9190
// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4
9291
// CHECK1-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4
93-
// CHECK1-NEXT: store double 3.000000e+00, ptr [[B]], align 8
92+
// CHECK1-NEXT: store double 3.000000e+00, ptr addrspacecast (ptr addrspace(1) @b1 to ptr), align 8
9493
// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]]
9594
// CHECK1-NEXT: ret i32 [[CALL]]
9695
//
9796
//
9897
// CHECK1-LABEL: define {{[^@]+}}@_Z3fooIiET_v
9998
// CHECK1-SAME: () #[[ATTR1:[0-9]+]] comdat {
10099
// CHECK1-NEXT: entry:
101-
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZN2STIiE1mE, align 4
100+
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @_ZN2STIiE1mE to ptr), align 4
102101
// CHECK1-NEXT: store i32 [[TMP0]], ptr @v, align 4
103102
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr @v, align 4
104103
// CHECK1-NEXT: ret i32 [[TMP1]]
@@ -120,13 +119,12 @@ void bar() {
120119
// CHECK1-NEXT: entry:
121120
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
122121
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
123-
// CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4
124122
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
125123
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
126-
// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[BAR_A]], align 4
124+
// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr @bar_a, align 4
127125
// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
128126
// CHECK1-NEXT: store double [[CONV]], ptr addrspacecast (ptr addrspace(3) @bar_b to ptr), align 8
129-
// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]]
127+
// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) @bar_a) #[[ATTR7]]
130128
// CHECK1-NEXT: ret void
131129
//
132130
//

offload/test/api/omp_device_alloc.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,19 @@
55
#include <stdio.h>
66

77
int main() {
8-
#pragma omp target teams num_teams(4)
9-
#pragma omp parallel
8+
#pragma omp target
109
{
11-
int *ptr = (int *)omp_alloc(sizeof(int), omp_default_mem_alloc);
10+
int *ptr;
11+
#pragma omp allocate(ptr) allocator(omp_default_mem_alloc)
12+
ptr = omp_alloc(sizeof(int), omp_default_mem_alloc);
1213
assert(ptr && "Ptr is (null)!");
13-
*ptr = 1;
14-
assert(*ptr == 1 && "Ptr is not 1");
14+
*ptr = 0;
15+
#pragma omp parallel num_threads(32)
16+
{
17+
#pragma omp atomic
18+
*ptr += 1;
19+
}
20+
assert(*ptr == 32 && "Ptr is not 32");
1521
omp_free(ptr, omp_default_mem_alloc);
1622
}
1723

0 commit comments

Comments
 (0)