Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 3654abe

Browse files
committed
[OpenMP][libomptarget] Add support for unified memory for regular maps
Summary: This patch adds support for using unified memory in the case of regular maps that happen when a target region is offloaded to the device. For cases where only a single version of the data is required then the host address can be used. When variables need to be privatized in any way or globalized, then the copy to the device is still required for correctness. Reviewers: ABataev, jdoerfert, Hahnfeld, AlexEichenberger, caomhin, grokos Reviewed By: Hahnfeld Subscribers: mgorny, guansong, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D65001 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@368192 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 2535f2b commit 3654abe

File tree

7 files changed

+383
-47
lines changed

7 files changed

+383
-47
lines changed

libomptarget/src/api.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,15 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
113113

114114
DeviceTy& Device = Devices[device_num];
115115
bool IsLast; // not used
116-
int rc = (Device.getTgtPtrBegin(ptr, 0, IsLast, false) != NULL);
116+
bool IsHostPtr;
117+
void *TgtPtr = Device.getTgtPtrBegin(ptr, 0, IsLast, false, IsHostPtr);
118+
int rc = (TgtPtr != NULL);
119+
// Under unified memory the host pointer can be returned by the
120+
// getTgtPtrBegin() function which means that there is no device
121+
// corresponding point for ptr. This function should return false
122+
// in that situation.
123+
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
124+
rc = !IsHostPtr;
117125
DP("Call to omp_target_is_present returns %d\n", rc);
118126
return rc;
119127
}

libomptarget/src/device.cpp

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,17 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
157157
// If NULL is returned, then either data allocation failed or the user tried
158158
// to do an illegal mapping.
159159
void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
160-
int64_t Size, bool &IsNew, bool IsImplicit, bool UpdateRefCount) {
160+
int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
161+
bool UpdateRefCount) {
161162
void *rc = NULL;
163+
IsHostPtr = false;
162164
DataMapMtx.lock();
163165
LookupResult lr = lookupMapping(HstPtrBegin, Size);
164166

165167
// Check if the pointer is contained.
168+
// If a variable is mapped to the device manually by the user - which would
169+
// lead to the IsContained flag to be true - then we must ensure that the
170+
// device address is returned even under unified memory conditions.
166171
if (lr.Flags.IsContained ||
167172
((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
168173
auto &HT = *lr.Entry;
@@ -183,15 +188,28 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
183188
// Explicit extension of mapped data - not allowed.
184189
DP("Explicit extension of mapping is not allowed.\n");
185190
} else if (Size) {
186-
// If it is not contained and Size > 0 we should create a new entry for it.
187-
IsNew = true;
188-
uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
189-
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
190-
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
191-
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
192-
HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
193-
(uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
194-
rc = (void *)tp;
191+
// If unified shared memory is active, implicitly mapped variables that are not
192+
// privatized use host address. Any explicitly mapped variables also use
193+
// host address where correctness is not impeded. In all other cases
194+
// maps are respected.
195+
// TODO: In addition to the mapping rules above, when the close map
196+
// modifier is implemented, foce the mapping of the variable to the device.
197+
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
198+
DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
199+
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
200+
IsHostPtr = true;
201+
rc = HstPtrBegin;
202+
} else {
203+
// If it is not contained and Size > 0 we should create a new entry for it.
204+
IsNew = true;
205+
uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
206+
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
207+
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
208+
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
209+
HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
210+
(uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
211+
rc = (void *)tp;
212+
}
195213
}
196214

197215
DataMapMtx.unlock();
@@ -202,8 +220,10 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
202220
// Return the target pointer begin (where the data will be moved).
203221
// Decrement the reference counter if called from target_data_end.
204222
void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
205-
bool UpdateRefCount) {
223+
bool UpdateRefCount, bool &IsHostPtr) {
206224
void *rc = NULL;
225+
IsHostPtr = false;
226+
IsLast = false;
207227
DataMapMtx.lock();
208228
LookupResult lr = lookupMapping(HstPtrBegin, Size);
209229

@@ -221,8 +241,14 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
221241
(CONSIDERED_INF(HT.RefCount)) ? "INF" :
222242
std::to_string(HT.RefCount).c_str());
223243
rc = (void *)tp;
224-
} else {
225-
IsLast = false;
244+
} else if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
245+
// If the value isn't found in the mapping and unified shared memory
246+
// is on then it means we have stumbled upon a value which we need to
247+
// use directly from the host.
248+
DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
249+
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
250+
IsHostPtr = true;
251+
rc = HstPtrBegin;
226252
}
227253

228254
DataMapMtx.unlock();
@@ -244,6 +270,8 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
244270
}
245271

246272
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
273+
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
274+
return OFFLOAD_SUCCESS;
247275
// Check if the pointer is contained in any sub-nodes.
248276
int rc;
249277
DataMapMtx.lock();

libomptarget/src/device.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,10 @@ struct DeviceTy {
137137
long getMapEntryRefCnt(void *HstPtrBegin);
138138
LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
139139
void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
140-
bool &IsNew, bool IsImplicit, bool UpdateRefCount = true);
140+
bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true);
141141
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
142142
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
143-
bool UpdateRefCount);
143+
bool UpdateRefCount, bool &IsHostPtr);
144144
int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete);
145145
int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
146146
int disassociatePtr(void *HstPtrBegin);

libomptarget/src/omptarget.cpp

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
242242
// Address of pointer on the host and device, respectively.
243243
void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
244244
bool IsNew, Pointer_IsNew;
245+
bool IsHostPtr = false;
245246
bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
246247
// UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
247248
// have reached this point via __tgt_target_data_begin and not __tgt_target
@@ -253,7 +254,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
253254
DP("Has a pointer entry: \n");
254255
// base is address of pointer.
255256
Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
256-
sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef);
257+
sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef);
257258
if (!Pointer_TgtPtrBegin) {
258259
DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
259260
"illegal mapping).\n");
@@ -269,7 +270,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
269270
}
270271

271272
void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
272-
data_size, IsNew, IsImplicit, UpdateRef);
273+
data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef);
273274
if (!TgtPtrBegin && data_size) {
274275
// If data_size==0, then the argument could be a zero-length pointer to
275276
// NULL, so getOrAlloc() returning NULL is not an error.
@@ -289,19 +290,21 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
289290

290291
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
291292
bool copy = false;
292-
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
293-
copy = true;
294-
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
295-
// Copy data only if the "parent" struct has RefCount==1.
296-
int32_t parent_idx = member_of(arg_types[i]);
297-
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
298-
assert(parent_rc > 0 && "parent struct not found");
299-
if (parent_rc == 1) {
293+
if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
294+
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
300295
copy = true;
296+
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
297+
// Copy data only if the "parent" struct has RefCount==1.
298+
int32_t parent_idx = member_of(arg_types[i]);
299+
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
300+
assert(parent_rc > 0 && "parent struct not found");
301+
if (parent_rc == 1) {
302+
copy = true;
303+
}
301304
}
302305
}
303306

304-
if (copy) {
307+
if (copy && !IsHostPtr) {
305308
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
306309
data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
307310
int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
@@ -312,7 +315,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
312315
}
313316
}
314317

315-
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
318+
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
316319
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
317320
DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
318321
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
@@ -363,14 +366,14 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
363366
}
364367
}
365368

366-
bool IsLast;
369+
bool IsLast, IsHostPtr;
367370
bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
368371
(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
369372
bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
370373

371374
// If PTR_AND_OBJ, HstPtrBegin is address of pointee
372375
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
373-
UpdateRef);
376+
UpdateRef, IsHostPtr);
374377
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
375378
" - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
376379
(IsLast ? "" : " not"));
@@ -387,18 +390,22 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
387390
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
388391
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
389392
bool CopyMember = false;
390-
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
391-
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
392-
// Copy data only if the "parent" struct has RefCount==1.
393-
int32_t parent_idx = member_of(arg_types[i]);
394-
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
395-
assert(parent_rc > 0 && "parent struct not found");
396-
if (parent_rc == 1) {
397-
CopyMember = true;
393+
if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
394+
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
395+
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
396+
// Copy data only if the "parent" struct has RefCount==1.
397+
int32_t parent_idx = member_of(arg_types[i]);
398+
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
399+
assert(parent_rc > 0 && "parent struct not found");
400+
if (parent_rc == 1) {
401+
CopyMember = true;
402+
}
398403
}
399404
}
400405

401-
if (DelEntry || Always || CopyMember) {
406+
if ((DelEntry || Always || CopyMember) &&
407+
!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
408+
TgtPtrBegin == HstPtrBegin)) {
402409
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
403410
data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
404411
int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
@@ -471,14 +478,21 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
471478

472479
void *HstPtrBegin = args[i];
473480
int64_t MapSize = arg_sizes[i];
474-
bool IsLast;
481+
bool IsLast, IsHostPtr;
475482
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
476-
false);
483+
false, IsHostPtr);
477484
if (!TgtPtrBegin) {
478485
DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
479486
continue;
480487
}
481488

489+
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
490+
TgtPtrBegin == HstPtrBegin) {
491+
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
492+
DPxPTR(HstPtrBegin));
493+
continue;
494+
}
495+
482496
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
483497
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
484498
arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
@@ -514,6 +528,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
514528
DP("Copying data to device failed.\n");
515529
return OFFLOAD_FAIL;
516530
}
531+
517532
uintptr_t lb = (uintptr_t) HstPtrBegin;
518533
uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
519534
Device.ShadowMtx.lock();
@@ -640,19 +655,26 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
640655
void *HstPtrVal = args[i];
641656
void *HstPtrBegin = args_base[i];
642657
void *HstPtrBase = args[idx];
643-
bool IsLast; // unused.
658+
bool IsLast, IsHostPtr; // unused.
644659
void *TgtPtrBase =
645660
(void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]);
646661
DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
647662
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
648663
void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
649664
void *Pointer_TgtPtrBegin =
650-
Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false);
665+
Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false,
666+
IsHostPtr);
651667
if (!Pointer_TgtPtrBegin) {
652668
DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
653669
DPxPTR(HstPtrVal));
654670
continue;
655671
}
672+
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
673+
TgtPtrBegin == HstPtrBegin) {
674+
DP("Unified memory is active, no need to map lambda captured"
675+
"variable (" DPxMOD ")\n", DPxPTR(HstPtrVal));
676+
continue;
677+
}
656678
DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
657679
DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
658680
int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin,
@@ -668,7 +690,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
668690
void *HstPtrBase = args_base[i];
669691
void *TgtPtrBegin;
670692
ptrdiff_t TgtBaseOffset;
671-
bool IsLast; // unused.
693+
bool IsLast, IsHostPtr; // unused.
672694
if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
673695
DP("Forwarding first-private value " DPxMOD " to the target construct\n",
674696
DPxPTR(HstPtrBase));
@@ -705,14 +727,14 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
705727
}
706728
} else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
707729
TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
708-
false);
730+
false, IsHostPtr);
709731
TgtBaseOffset = 0; // no offset for ptrs.
710732
DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
711733
"object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
712734
DPxPTR(HstPtrBase));
713735
} else {
714736
TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
715-
false);
737+
false, IsHostPtr);
716738
TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
717739
#ifdef OMPTARGET_DEBUG
718740
void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);

libomptarget/test/offloading/requires.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,4 @@ int main() {
4343
{}
4444

4545
return 0;
46-
}
46+
}

0 commit comments

Comments
 (0)