Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 9ea8071

Browse files
committed
[libomptarget] Remove duplicate RTLRequiresFlags per device
We have one global RTLs.RequiresFlags, I don't see a need to make a copy per device that the runtime manages. This was problematic anyway because the copy happened during the first __tgt_register_lib(). This made it impossible to call __tgt_register_requires() from normal user funtions for testing. Hence, this change also fixes unified_shared_memory/shared_update.c for older versions of Clang that don't call __tgt_register_requires() before __tgt_register_lib(). Differential Revision: https://reviews.llvm.org/D66019 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@368465 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 3654abe commit 9ea8071

File tree

5 files changed

+14
-21
lines changed

5 files changed

+14
-21
lines changed

libomptarget/src/api.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
120120
// getTgtPtrBegin() function which means that there is no device
121121
// corresponding point for ptr. This function should return false
122122
// in that situation.
123-
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
123+
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
124124
rc = !IsHostPtr;
125125
DP("Call to omp_target_is_present returns %d\n", rc);
126126
return rc;

libomptarget/src/device.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
194194
// maps are respected.
195195
// TODO: In addition to the mapping rules above, when the close map
196196
// modifier is implemented, foce the mapping of the variable to the device.
197-
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
197+
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
198198
DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
199199
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
200200
IsHostPtr = true;
@@ -241,7 +241,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
241241
(CONSIDERED_INF(HT.RefCount)) ? "INF" :
242242
std::to_string(HT.RefCount).c_str());
243243
rc = (void *)tp;
244-
} else if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
244+
} else if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
245245
// If the value isn't found in the mapping and unified shared memory
246246
// is on then it means we have stumbled upon a value which we need to
247247
// use directly from the host.
@@ -270,7 +270,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
270270
}
271271

272272
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
273-
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
273+
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
274274
return OFFLOAD_SUCCESS;
275275
// Check if the pointer is contained in any sub-nodes.
276276
int rc;
@@ -305,7 +305,7 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
305305
void DeviceTy::init() {
306306
// Make call to init_requires if it exists for this plugin.
307307
if (RTL->init_requires)
308-
RTL->init_requires(RTLRequiresFlags);
308+
RTL->init_requires(RTLs.RequiresFlags);
309309
int32_t rc = RTL->init_device(RTLDeviceID);
310310
if (rc == OFFLOAD_SUCCESS) {
311311
IsInit = true;

libomptarget/src/device.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,10 @@ struct DeviceTy {
100100
// moved into the target task in libomp.
101101
std::map<int32_t, uint64_t> LoopTripCnt;
102102

103-
int64_t RTLRequiresFlags;
104-
105103
DeviceTy(RTLInfoTy *RTL)
106104
: DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
107-
HasPendingGlobals(false), HostDataToTargetMap(),
108-
PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(),
109-
ShadowMtx(), RTLRequiresFlags(0) {}
105+
HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(),
106+
ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), ShadowMtx() {}
110107

111108
// The existence of mutexes makes DeviceTy non-copyable. We need to
112109
// provide a copy constructor and an assignment operator explicitly.
@@ -115,9 +112,8 @@ struct DeviceTy {
115112
IsInit(d.IsInit), InitFlag(), HasPendingGlobals(d.HasPendingGlobals),
116113
HostDataToTargetMap(d.HostDataToTargetMap),
117114
PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap),
118-
DataMapMtx(), PendingGlobalsMtx(),
119-
ShadowMtx(), LoopTripCnt(d.LoopTripCnt),
120-
RTLRequiresFlags(d.RTLRequiresFlags) {}
115+
DataMapMtx(), PendingGlobalsMtx(), ShadowMtx(),
116+
LoopTripCnt(d.LoopTripCnt) {}
121117

122118
DeviceTy& operator=(const DeviceTy &d) {
123119
DeviceID = d.DeviceID;
@@ -129,7 +125,6 @@ struct DeviceTy {
129125
PendingCtorsDtors = d.PendingCtorsDtors;
130126
ShadowPtrMap = d.ShadowPtrMap;
131127
LoopTripCnt = d.LoopTripCnt;
132-
RTLRequiresFlags = d.RTLRequiresFlags;
133128

134129
return *this;
135130
}

libomptarget/src/omptarget.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
290290

291291
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
292292
bool copy = false;
293-
if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
293+
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
294294
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
295295
copy = true;
296296
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
@@ -390,7 +390,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
390390
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
391391
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
392392
bool CopyMember = false;
393-
if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
393+
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
394394
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
395395
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
396396
// Copy data only if the "parent" struct has RefCount==1.
@@ -404,7 +404,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
404404
}
405405

406406
if ((DelEntry || Always || CopyMember) &&
407-
!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
407+
!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
408408
TgtPtrBegin == HstPtrBegin)) {
409409
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
410410
data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
@@ -486,7 +486,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
486486
continue;
487487
}
488488

489-
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
489+
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
490490
TgtPtrBegin == HstPtrBegin) {
491491
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
492492
DPxPTR(HstPtrBegin));
@@ -669,7 +669,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
669669
DPxPTR(HstPtrVal));
670670
continue;
671671
}
672-
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
672+
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
673673
TgtPtrBegin == HstPtrBegin) {
674674
DP("Unified memory is active, no need to map lambda captured"
675675
"variable (" DPxMOD ")\n", DPxPTR(HstPtrVal));

libomptarget/src/rtl.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,6 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
266266
Devices[start + device_id].DeviceID = start + device_id;
267267
// RTL local device ID
268268
Devices[start + device_id].RTLDeviceID = device_id;
269-
// RTL requires flags
270-
Devices[start + device_id].RTLRequiresFlags = RequiresFlags;
271269
}
272270

273271
// Initialize the index of this RTL and save it in the used RTLs.

0 commit comments

Comments
 (0)