@@ -210,6 +210,9 @@ struct KernelArgPool {
210
210
};
211
211
pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER;
212
212
213
+ std::unordered_map<std::string /* kernel*/ , std::unique_ptr<KernelArgPool>>
214
+ KernelArgPoolMap;
215
+
213
216
// / Use a single entity to encode a kernel and a set of flags
214
217
struct KernelTy {
215
218
llvm::omp::OMPTgtExecModeFlags ExecutionMode;
@@ -221,9 +224,7 @@ struct KernelTy {
221
224
KernelTy (llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
222
225
int32_t DeviceId, void *CallStackAddr, const char *Name,
223
226
uint32_t KernargSegmentSize,
224
- hsa_amd_memory_pool_t &KernArgMemoryPool,
225
- std::unordered_map<std::string, std::unique_ptr<KernelArgPool>>
226
- &KernelArgPoolMap)
227
+ hsa_amd_memory_pool_t &KernArgMemoryPool)
227
228
: ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
228
229
DeviceId (DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
229
230
DP (" Construct kernelinfo: ExecMode %d\n " , ExecutionMode);
@@ -237,6 +238,10 @@ struct KernelTy {
237
238
}
238
239
};
239
240
241
+ // / List that contains all the kernels.
242
+ // / FIXME: we may need this to be per device and per library.
243
+ std::list<KernelTy> KernelsList;
244
+
240
245
template <typename Callback> static hsa_status_t findAgents (Callback CB) {
241
246
242
247
hsa_status_t Err =
@@ -451,12 +456,6 @@ class RTLDeviceInfoTy : HSALifetime {
451
456
452
457
int NumberOfDevices = 0 ;
453
458
454
- // / List that contains all the kernels.
455
- // / FIXME: we may need this to be per device and per library.
456
- std::list<KernelTy> KernelsList;
457
- std::unordered_map<std::string /* kernel*/ , std::unique_ptr<KernelArgPool>>
458
- KernelArgPoolMap;
459
-
460
459
// GPU devices
461
460
std::vector<hsa_agent_t > HSAAgents;
462
461
std::vector<HSAQueueScheduler> HSAQueueSchedulers; // one per gpu
@@ -858,6 +857,7 @@ class RTLDeviceInfoTy : HSALifetime {
858
857
" Unexpected device id!" );
859
858
FuncGblEntries[DeviceId].emplace_back ();
860
859
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back ();
860
+ // KernelArgPoolMap.clear();
861
861
E.Entries .clear ();
862
862
E.Table .EntriesBegin = E.Table .EntriesEnd = 0 ;
863
863
}
@@ -1113,8 +1113,10 @@ class RTLDeviceInfoTy : HSALifetime {
1113
1113
1114
1114
pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER;
1115
1115
1116
- static RTLDeviceInfoTy *DeviceInfoState = nullptr ;
1117
- static RTLDeviceInfoTy &DeviceInfo () { return *DeviceInfoState; }
1116
+ // Putting accesses to DeviceInfo global behind a function call prior
1117
+ // to changing to use init_plugin/deinit_plugin calls
1118
+ static RTLDeviceInfoTy DeviceInfoState;
1119
+ static RTLDeviceInfoTy &DeviceInfo () { return DeviceInfoState; }
1118
1120
1119
1121
namespace {
1120
1122
@@ -1455,9 +1457,8 @@ int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
1455
1457
KernelArgPool *ArgPool = nullptr ;
1456
1458
void *KernArg = nullptr ;
1457
1459
{
1458
- auto It =
1459
- DeviceInfo ().KernelArgPoolMap .find (std::string (KernelInfo->Name ));
1460
- if (It != DeviceInfo ().KernelArgPoolMap .end ()) {
1460
+ auto It = KernelArgPoolMap.find (std::string (KernelInfo->Name ));
1461
+ if (It != KernelArgPoolMap.end ()) {
1461
1462
ArgPool = (It->second ).get ();
1462
1463
}
1463
1464
}
@@ -2019,20 +2020,6 @@ bool IsImageCompatibleWithEnv(const char *ImgInfo, std::string EnvInfo) {
2019
2020
}
2020
2021
2021
2022
extern " C" {
2022
-
2023
- int32_t __tgt_rtl_init_plugin () {
2024
- DeviceInfoState = new RTLDeviceInfoTy;
2025
- return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded )
2026
- ? OFFLOAD_SUCCESS
2027
- : OFFLOAD_FAIL;
2028
- }
2029
-
2030
- int32_t __tgt_rtl_deinit_plugin () {
2031
- if (DeviceInfoState)
2032
- delete DeviceInfoState;
2033
- return OFFLOAD_SUCCESS;
2034
- }
2035
-
2036
2023
int32_t __tgt_rtl_is_valid_binary (__tgt_device_image *Image) {
2037
2024
return elfMachineIdIsAmdgcn (Image);
2038
2025
}
@@ -2064,6 +2051,9 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
2064
2051
return true ;
2065
2052
}
2066
2053
2054
+ int32_t __tgt_rtl_init_plugin () { return OFFLOAD_SUCCESS; }
2055
+ int32_t __tgt_rtl_deinit_plugin () { return OFFLOAD_SUCCESS; }
2056
+
2067
2057
int __tgt_rtl_number_of_devices () {
2068
2058
// If the construction failed, no methods are safe to call
2069
2059
if (DeviceInfo ().ConstructionSucceeded ) {
@@ -2600,12 +2590,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId,
2600
2590
}
2601
2591
check (" Loading computation property" , Err);
2602
2592
2603
- DeviceInfo ().KernelsList .push_back (
2604
- KernelTy (ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name ,
2605
- KernargSegmentSize, DeviceInfo ().KernArgPool ,
2606
- DeviceInfo ().KernelArgPoolMap ));
2593
+ KernelsList.push_back (KernelTy (ExecModeVal, WGSizeVal, DeviceId,
2594
+ CallStackAddr, E->name , KernargSegmentSize,
2595
+ DeviceInfo ().KernArgPool ));
2607
2596
__tgt_offload_entry Entry = *E;
2608
- Entry.addr = (void *)&DeviceInfo (). KernelsList .back ();
2597
+ Entry.addr = (void *)&KernelsList.back ();
2609
2598
DeviceInfo ().addOffloadEntry (DeviceId, Entry);
2610
2599
DP (" Entry point %ld maps to %s\n " , E - HostBegin, E->name );
2611
2600
}
0 commit comments