Skip to content

Commit 046d5b9

Browse files
jhuber6tru
authored andcommitted
[Libomptarget] Revert changes to AMDGPU plugin destructors
These patches exposed a lot of problems in the AMD toolchain. Rather than keep it broken we should revert it to its old semi-functional state. This will prevent us from using device destructors but should remove some new bugs. In the future this interface should be changed once these problems are addressed more correctly. This reverts commit ed0f218. This reverts commit 2b7203a. Fixes llvm#57536 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D133997
1 parent 29d395a commit 046d5b9

File tree

2 files changed

+29
-50
lines changed

2 files changed

+29
-50
lines changed

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

+22-33
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ struct KernelArgPool {
210210
};
211211
pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER;
212212

213+
std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>>
214+
KernelArgPoolMap;
215+
213216
/// Use a single entity to encode a kernel and a set of flags
214217
struct KernelTy {
215218
llvm::omp::OMPTgtExecModeFlags ExecutionMode;
@@ -221,9 +224,7 @@ struct KernelTy {
221224
KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
222225
int32_t DeviceId, void *CallStackAddr, const char *Name,
223226
uint32_t KernargSegmentSize,
224-
hsa_amd_memory_pool_t &KernArgMemoryPool,
225-
std::unordered_map<std::string, std::unique_ptr<KernelArgPool>>
226-
&KernelArgPoolMap)
227+
hsa_amd_memory_pool_t &KernArgMemoryPool)
227228
: ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
228229
DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
229230
DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode);
@@ -237,6 +238,10 @@ struct KernelTy {
237238
}
238239
};
239240

241+
/// List that contains all the kernels.
242+
/// FIXME: we may need this to be per device and per library.
243+
std::list<KernelTy> KernelsList;
244+
240245
template <typename Callback> static hsa_status_t findAgents(Callback CB) {
241246

242247
hsa_status_t Err =
@@ -451,12 +456,6 @@ class RTLDeviceInfoTy : HSALifetime {
451456

452457
int NumberOfDevices = 0;
453458

454-
/// List that contains all the kernels.
455-
/// FIXME: we may need this to be per device and per library.
456-
std::list<KernelTy> KernelsList;
457-
std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>>
458-
KernelArgPoolMap;
459-
460459
// GPU devices
461460
std::vector<hsa_agent_t> HSAAgents;
462461
std::vector<HSAQueueScheduler> HSAQueueSchedulers; // one per gpu
@@ -858,6 +857,7 @@ class RTLDeviceInfoTy : HSALifetime {
858857
"Unexpected device id!");
859858
FuncGblEntries[DeviceId].emplace_back();
860859
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
860+
// KernelArgPoolMap.clear();
861861
E.Entries.clear();
862862
E.Table.EntriesBegin = E.Table.EntriesEnd = 0;
863863
}
@@ -1113,8 +1113,10 @@ class RTLDeviceInfoTy : HSALifetime {
11131113

11141114
pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER;
11151115

1116-
static RTLDeviceInfoTy *DeviceInfoState = nullptr;
1117-
static RTLDeviceInfoTy &DeviceInfo() { return *DeviceInfoState; }
1116+
// Putting accesses to DeviceInfo global behind a function call prior
1117+
// to changing to use init_plugin/deinit_plugin calls
1118+
static RTLDeviceInfoTy DeviceInfoState;
1119+
static RTLDeviceInfoTy &DeviceInfo() { return DeviceInfoState; }
11181120

11191121
namespace {
11201122

@@ -1455,9 +1457,8 @@ int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
14551457
KernelArgPool *ArgPool = nullptr;
14561458
void *KernArg = nullptr;
14571459
{
1458-
auto It =
1459-
DeviceInfo().KernelArgPoolMap.find(std::string(KernelInfo->Name));
1460-
if (It != DeviceInfo().KernelArgPoolMap.end()) {
1460+
auto It = KernelArgPoolMap.find(std::string(KernelInfo->Name));
1461+
if (It != KernelArgPoolMap.end()) {
14611462
ArgPool = (It->second).get();
14621463
}
14631464
}
@@ -2019,20 +2020,6 @@ bool IsImageCompatibleWithEnv(const char *ImgInfo, std::string EnvInfo) {
20192020
}
20202021

20212022
extern "C" {
2022-
2023-
int32_t __tgt_rtl_init_plugin() {
2024-
DeviceInfoState = new RTLDeviceInfoTy;
2025-
return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded)
2026-
? OFFLOAD_SUCCESS
2027-
: OFFLOAD_FAIL;
2028-
}
2029-
2030-
int32_t __tgt_rtl_deinit_plugin() {
2031-
if (DeviceInfoState)
2032-
delete DeviceInfoState;
2033-
return OFFLOAD_SUCCESS;
2034-
}
2035-
20362023
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
20372024
return elfMachineIdIsAmdgcn(Image);
20382025
}
@@ -2064,6 +2051,9 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
20642051
return true;
20652052
}
20662053

2054+
int32_t __tgt_rtl_init_plugin() { return OFFLOAD_SUCCESS; }
2055+
int32_t __tgt_rtl_deinit_plugin() { return OFFLOAD_SUCCESS; }
2056+
20672057
int __tgt_rtl_number_of_devices() {
20682058
// If the construction failed, no methods are safe to call
20692059
if (DeviceInfo().ConstructionSucceeded) {
@@ -2600,12 +2590,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId,
26002590
}
26012591
check("Loading computation property", Err);
26022592

2603-
DeviceInfo().KernelsList.push_back(
2604-
KernelTy(ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name,
2605-
KernargSegmentSize, DeviceInfo().KernArgPool,
2606-
DeviceInfo().KernelArgPoolMap));
2593+
KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, DeviceId,
2594+
CallStackAddr, E->name, KernargSegmentSize,
2595+
DeviceInfo().KernArgPool));
26072596
__tgt_offload_entry Entry = *E;
2608-
Entry.addr = (void *)&DeviceInfo().KernelsList.back();
2597+
Entry.addr = (void *)&KernelsList.back();
26092598
DeviceInfo().addOffloadEntry(DeviceId, Entry);
26102599
DP("Entry point %ld maps to %s\n", E - HostBegin, E->name);
26112600
}

openmp/libomptarget/src/rtl.cpp

+7-17
Original file line numberDiff line numberDiff line change
@@ -67,23 +67,6 @@ __attribute__((constructor(101))) void init() {
6767

6868
__attribute__((destructor(101))) void deinit() {
6969
DP("Deinit target library!\n");
70-
71-
for (auto *R : PM->RTLs.UsedRTLs) {
72-
// Plugins can either destroy their local state using global variables
73-
// or attribute(destructor) functions or by implementing deinit_plugin
74-
// The hazard with plugin local destructors is they may be called before
75-
// or after this destructor. If the plugin is destroyed using global
76-
// state before this library finishes calling into it the plugin is
77-
// likely to crash. If good fortune means the plugin outlives this
78-
// library then there may be no crash.
79-
// Using deinit_plugin and no global destructors from the plugin works.
80-
if (R->deinit_plugin) {
81-
if (R->deinit_plugin() != OFFLOAD_SUCCESS) {
82-
DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str());
83-
}
84-
}
85-
}
86-
8770
delete PM;
8871

8972
#ifdef OMPTARGET_PROFILE_ENABLED
@@ -584,6 +567,13 @@ void RTLsTy::unregisterLib(__tgt_bin_desc *Desc) {
584567
PM->TblMapMtx.unlock();
585568

586569
// TODO: Write some RTL->unload_image(...) function?
570+
for (auto *R : UsedRTLs) {
571+
if (R->deinit_plugin) {
572+
if (R->deinit_plugin() != OFFLOAD_SUCCESS) {
573+
DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str());
574+
}
575+
}
576+
}
587577

588578
DP("Done unregistering library!\n");
589579
}

0 commit comments

Comments
 (0)