Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit dabebaf

Browse files
committed
[OpenMP][libomptarget] Add support for close map modifier
Summary: This patch adds support for the close map modifier. The close map modifier will overwrite the unified shared memory requirement and create a device copy of the data. Reviewers: ABataev, Hahnfeld, caomhin, grokos, jdoerfert, AlexEichenberger Reviewed By: Hahnfeld, AlexEichenberger Subscribers: guansong, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D65340 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@368488 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 9ea8071 commit dabebaf

File tree

7 files changed

+344
-15
lines changed

7 files changed

+344
-15
lines changed

libomptarget/include/omptarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ enum tgt_map_type {
4747
OMP_TGT_MAPTYPE_LITERAL = 0x100,
4848
// mapping is implicit
4949
OMP_TGT_MAPTYPE_IMPLICIT = 0x200,
50+
// copy data to device
51+
OMP_TGT_MAPTYPE_CLOSE = 0x400,
5052
// member of struct, member given by [16 MSBs] - 1
5153
OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000
5254
};

libomptarget/src/device.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
158158
// to do an illegal mapping.
159159
void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
160160
int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
161-
bool UpdateRefCount) {
161+
bool UpdateRefCount, bool HasCloseModifier) {
162162
void *rc = NULL;
163163
IsHostPtr = false;
164164
DataMapMtx.lock();
@@ -192,9 +192,9 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
192192
// privatized use host address. Any explicitly mapped variables also use
193193
// host address where correctness is not impeded. In all other cases
194194
// maps are respected.
195-
// TODO: In addition to the mapping rules above, when the close map
196-
// modifier is implemented, foce the mapping of the variable to the device.
197-
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
195+
// In addition to the mapping rules above, the close map
196+
// modifier forces the mapping of the variable to the device.
197+
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) {
198198
DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
199199
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
200200
IsHostPtr = true;
@@ -204,8 +204,8 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
204204
IsNew = true;
205205
uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
206206
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
207-
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
208-
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
207+
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
208+
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
209209
HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
210210
(uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
211211
rc = (void *)tp;
@@ -269,8 +269,9 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
269269
return NULL;
270270
}
271271

272-
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
273-
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
272+
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
273+
bool HasCloseModifier) {
274+
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
274275
return OFFLOAD_SUCCESS;
275276
// Check if the pointer is contained in any sub-nodes.
276277
int rc;

libomptarget/src/device.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,13 @@ struct DeviceTy {
132132
long getMapEntryRefCnt(void *HstPtrBegin);
133133
LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
134134
void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
135-
bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true);
135+
bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true,
136+
bool HasCloseModifier = false);
136137
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
137138
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
138139
bool UpdateRefCount, bool &IsHostPtr);
139-
int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete);
140+
int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete,
141+
bool HasCloseModifier = false);
140142
int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
141143
int disassociatePtr(void *HstPtrBegin);
142144

libomptarget/src/omptarget.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,9 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
244244
bool IsNew, Pointer_IsNew;
245245
bool IsHostPtr = false;
246246
bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
247+
// Force the creation of a device side copy of the data when:
248+
// a close map modifier was associated with a map that contained a to.
249+
bool HasCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE;
247250
// UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
248251
// have reached this point via __tgt_target_data_begin and not __tgt_target
249252
// then no argument is marked as TARGET_PARAM ("omp target data map" is not
@@ -254,7 +257,8 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
254257
DP("Has a pointer entry: \n");
255258
// base is address of pointer.
256259
Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
257-
sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef);
260+
sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef,
261+
HasCloseModifier);
258262
if (!Pointer_TgtPtrBegin) {
259263
DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
260264
"illegal mapping).\n");
@@ -270,7 +274,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
270274
}
271275

272276
void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
273-
data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef);
277+
data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef, HasCloseModifier);
274278
if (!TgtPtrBegin && data_size) {
275279
// If data_size==0, then the argument could be a zero-length pointer to
276280
// NULL, so getOrAlloc() returning NULL is not an error.
@@ -290,7 +294,8 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
290294

291295
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
292296
bool copy = false;
293-
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
297+
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
298+
HasCloseModifier) {
294299
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
295300
copy = true;
296301
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
@@ -370,6 +375,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
370375
bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
371376
(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
372377
bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
378+
bool HasCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE;
373379

374380
// If PTR_AND_OBJ, HstPtrBegin is address of pointee
375381
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
@@ -390,7 +396,8 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
390396
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
391397
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
392398
bool CopyMember = false;
393-
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
399+
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
400+
HasCloseModifier) {
394401
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
395402
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
396403
// Copy data only if the "parent" struct has RefCount==1.
@@ -455,7 +462,8 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
455462

456463
// Deallocate map
457464
if (DelEntry) {
458-
int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete);
465+
int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete,
466+
HasCloseModifier);
459467
if (rt != OFFLOAD_SUCCESS) {
460468
DP("Deallocating data from device failed.\n");
461469
return OFFLOAD_FAIL;
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
2+
// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
3+
// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
4+
// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
5+
6+
// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
7+
8+
#include <omp.h>
9+
#include <stdio.h>
10+
11+
#pragma omp requires unified_shared_memory
12+
13+
#define N 1024
14+
15+
int main(int argc, char *argv[]) {
16+
int fails;
17+
void *host_alloc = 0, *device_alloc = 0;
18+
int *a = (int *)malloc(N * sizeof(int));
19+
20+
// Init
21+
for (int i = 0; i < N; ++i) {
22+
a[i] = 10;
23+
}
24+
host_alloc = &a[0];
25+
26+
//
27+
// map + target no close
28+
//
29+
#pragma omp target data map(tofrom : a[ : N]) map(tofrom : device_alloc)
30+
{
31+
#pragma omp target map(tofrom : device_alloc)
32+
{ device_alloc = &a[0]; }
33+
}
34+
35+
// CHECK: a used from unified memory.
36+
if (device_alloc == host_alloc)
37+
printf("a used from unified memory.\n");
38+
39+
//
40+
// map + target with close
41+
//
42+
device_alloc = 0;
43+
#pragma omp target data map(close, tofrom : a[ : N]) map(tofrom : device_alloc)
44+
{
45+
#pragma omp target map(tofrom : device_alloc)
46+
{ device_alloc = &a[0]; }
47+
}
48+
// CHECK: a copied to device.
49+
if (device_alloc != host_alloc)
50+
printf("a copied to device.\n");
51+
52+
//
53+
// map + use_device_ptr no close
54+
//
55+
device_alloc = 0;
56+
#pragma omp target data map(tofrom : a[ : N]) use_device_ptr(a)
57+
{ device_alloc = &a[0]; }
58+
59+
// CHECK: a used from unified memory with use_device_ptr.
60+
if (device_alloc == host_alloc)
61+
printf("a used from unified memory with use_device_ptr.\n");
62+
63+
//
64+
// map + use_device_ptr close
65+
//
66+
device_alloc = 0;
67+
#pragma omp target data map(close, tofrom : a[ : N]) use_device_ptr(a)
68+
{ device_alloc = &a[0]; }
69+
70+
// CHECK: a used from device memory with use_device_ptr.
71+
if (device_alloc != host_alloc)
72+
printf("a used from device memory with use_device_ptr.\n");
73+
74+
//
75+
// map enter/exit + close
76+
//
77+
device_alloc = 0;
78+
#pragma omp target enter data map(close, to : a[ : N])
79+
80+
#pragma omp target map(from : device_alloc)
81+
{ device_alloc = &a[0]; }
82+
83+
#pragma omp target exit data map(from : a[ : N])
84+
85+
// CHECK: a has been mapped to the device.
86+
if (device_alloc != host_alloc)
87+
printf("a has been mapped to the device.\n");
88+
89+
free(a);
90+
91+
// CHECK: Done!
92+
printf("Done!\n");
93+
94+
return 0;
95+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
2+
// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
3+
// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
4+
// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
5+
6+
#include <omp.h>
7+
#include <stdio.h>
8+
9+
// ---------------------------------------------------------------------------
10+
// Various definitions copied from OpenMP RTL
11+
12+
extern void __tgt_register_requires(int64_t);
13+
14+
extern void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
15+
void **args_base, void **args,
16+
int64_t *arg_sizes, int64_t *arg_types);
17+
18+
extern void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
19+
void **args_base, void **args,
20+
int64_t *arg_sizes, int64_t *arg_types);
21+
22+
// End of definitions copied from OpenMP RTL.
23+
// ---------------------------------------------------------------------------
24+
25+
#pragma omp requires unified_shared_memory
26+
27+
#define N 1024
28+
29+
int main(int argc, char *argv[]) {
30+
int fails;
31+
void *host_alloc = 0, *device_alloc = 0;
32+
int *a = (int *)malloc(N * sizeof(int));
33+
34+
// Manual registration of requires flags for Clang versions
35+
// that do not support requires.
36+
__tgt_register_requires(8);
37+
38+
// Init
39+
for (int i = 0; i < N; ++i) {
40+
a[i] = 10;
41+
}
42+
host_alloc = &a[0];
43+
44+
// Dummy target region that ensures the runtime library is loaded when
45+
// the target data begin/end functions are manually called below.
46+
#pragma omp target
47+
{}
48+
49+
// Manual calls
50+
int device_id = omp_get_default_device();
51+
int arg_num = 1;
52+
void **args_base = (void **)&a;
53+
void **args = (void **)&a;
54+
int64_t arg_sizes[arg_num];
55+
56+
arg_sizes[0] = sizeof(int) * N;
57+
58+
int64_t arg_types[arg_num];
59+
60+
// Ox400 enables the CLOSE map type in the runtime:
61+
// OMP_TGT_MAPTYPE_CLOSE = 0x400
62+
// OMP_TGT_MAPTYPE_TO = 0x001
63+
arg_types[0] = 0x400 | 0x001;
64+
65+
device_alloc = host_alloc;
66+
67+
__tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
68+
arg_types);
69+
70+
#pragma omp target data use_device_ptr(a)
71+
{ device_alloc = a; }
72+
73+
__tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
74+
arg_types);
75+
76+
// CHECK: a was copied to the device
77+
if (device_alloc != host_alloc)
78+
printf("a was copied to the device\n");
79+
80+
free(a);
81+
82+
// CHECK: Done!
83+
printf("Done!\n");
84+
85+
return 0;
86+
}

0 commit comments

Comments
 (0)