@@ -210,9 +210,6 @@ struct KernelArgPool {
210
210
};
211
211
pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER;
212
212
213
- std::unordered_map<std::string /* kernel*/ , std::unique_ptr<KernelArgPool>>
214
- KernelArgPoolMap;
215
-
216
213
// / Use a single entity to encode a kernel and a set of flags
217
214
struct KernelTy {
218
215
llvm::omp::OMPTgtExecModeFlags ExecutionMode;
@@ -224,7 +221,9 @@ struct KernelTy {
224
221
KernelTy (llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
225
222
int32_t DeviceId, void *CallStackAddr, const char *Name,
226
223
uint32_t KernargSegmentSize,
227
- hsa_amd_memory_pool_t &KernArgMemoryPool)
224
+ hsa_amd_memory_pool_t &KernArgMemoryPool,
225
+ std::unordered_map<std::string, std::unique_ptr<KernelArgPool>>
226
+ &KernelArgPoolMap)
228
227
: ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
229
228
DeviceId (DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
230
229
DP (" Construct kernelinfo: ExecMode %d\n " , ExecutionMode);
@@ -238,10 +237,6 @@ struct KernelTy {
238
237
}
239
238
};
240
239
241
- // / List that contains all the kernels.
242
- // / FIXME: we may need this to be per device and per library.
243
- std::list<KernelTy> KernelsList;
244
-
245
240
template <typename Callback> static hsa_status_t findAgents (Callback CB) {
246
241
247
242
hsa_status_t Err =
@@ -456,6 +451,12 @@ class RTLDeviceInfoTy : HSALifetime {
456
451
457
452
int NumberOfDevices = 0 ;
458
453
454
+ // / List that contains all the kernels.
455
+ // / FIXME: we may need this to be per device and per library.
456
+ std::list<KernelTy> KernelsList;
457
+ std::unordered_map<std::string /* kernel*/ , std::unique_ptr<KernelArgPool>>
458
+ KernelArgPoolMap;
459
+
459
460
// GPU devices
460
461
std::vector<hsa_agent_t > HSAAgents;
461
462
std::vector<HSAQueueScheduler> HSAQueueSchedulers; // one per gpu
@@ -857,7 +858,6 @@ class RTLDeviceInfoTy : HSALifetime {
857
858
" Unexpected device id!" );
858
859
FuncGblEntries[DeviceId].emplace_back ();
859
860
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back ();
860
- // KernelArgPoolMap.clear();
861
861
E.Entries .clear ();
862
862
E.Table .EntriesBegin = E.Table .EntriesEnd = 0 ;
863
863
}
@@ -1116,19 +1116,6 @@ pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER;
1116
1116
static RTLDeviceInfoTy *DeviceInfoState = nullptr ;
1117
1117
static RTLDeviceInfoTy &DeviceInfo () { return *DeviceInfoState; }
1118
1118
1119
- int32_t __tgt_rtl_init_plugin () {
1120
- DeviceInfoState = new RTLDeviceInfoTy;
1121
- return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded )
1122
- ? OFFLOAD_SUCCESS
1123
- : OFFLOAD_FAIL;
1124
- }
1125
-
1126
- int32_t __tgt_rtl_deinit_plugin () {
1127
- if (DeviceInfoState)
1128
- delete DeviceInfoState;
1129
- return OFFLOAD_SUCCESS;
1130
- }
1131
-
1132
1119
namespace {
1133
1120
1134
1121
int32_t dataRetrieve (int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size ,
@@ -1171,7 +1158,7 @@ int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size,
1171
1158
(long long unsigned )(Elf64_Addr)HstPtr,
1172
1159
(long long unsigned )(Elf64_Addr)TgtPtr);
1173
1160
Err = DeviceInfo ().freesignalpoolMemcpyH2D (TgtPtr, HstPtr, (size_t )Size ,
1174
- DeviceId);
1161
+ DeviceId);
1175
1162
if (Err != HSA_STATUS_SUCCESS) {
1176
1163
DP (" Error when copying data from host to device. Pointers: "
1177
1164
" host = 0x%016lx, device = 0x%016lx, size = %lld\n " ,
@@ -1468,8 +1455,9 @@ int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
1468
1455
KernelArgPool *ArgPool = nullptr ;
1469
1456
void *KernArg = nullptr ;
1470
1457
{
1471
- auto It = KernelArgPoolMap.find (std::string (KernelInfo->Name ));
1472
- if (It != KernelArgPoolMap.end ()) {
1458
+ auto It =
1459
+ DeviceInfo ().KernelArgPoolMap .find (std::string (KernelInfo->Name ));
1460
+ if (It != DeviceInfo ().KernelArgPoolMap .end ()) {
1473
1461
ArgPool = (It->second ).get ();
1474
1462
}
1475
1463
}
@@ -2031,6 +2019,20 @@ bool IsImageCompatibleWithEnv(const char *ImgInfo, std::string EnvInfo) {
2031
2019
}
2032
2020
2033
2021
extern " C" {
2022
+
2023
+ int32_t __tgt_rtl_init_plugin () {
2024
+ DeviceInfoState = new RTLDeviceInfoTy;
2025
+ return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded )
2026
+ ? OFFLOAD_SUCCESS
2027
+ : OFFLOAD_FAIL;
2028
+ }
2029
+
2030
+ int32_t __tgt_rtl_deinit_plugin () {
2031
+ if (DeviceInfoState)
2032
+ delete DeviceInfoState;
2033
+ return OFFLOAD_SUCCESS;
2034
+ }
2035
+
2034
2036
int32_t __tgt_rtl_is_valid_binary (__tgt_device_image *Image) {
2035
2037
return elfMachineIdIsAmdgcn (Image);
2036
2038
}
@@ -2373,8 +2375,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId,
2373
2375
}
2374
2376
2375
2377
// write ptr to device memory so it can be used by later kernels
2376
- Err = DeviceInfo ().freesignalpoolMemcpyH2D (StatePtr, &Ptr , sizeof ( void *),
2377
- DeviceId);
2378
+ Err = DeviceInfo ().freesignalpoolMemcpyH2D (StatePtr, &Ptr ,
2379
+ sizeof ( void *), DeviceId);
2378
2380
if (Err != HSA_STATUS_SUCCESS) {
2379
2381
DP (" memcpy install of state_ptr failed\n " );
2380
2382
return NULL ;
@@ -2437,8 +2439,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId,
2437
2439
// If unified memory is present any target link variables
2438
2440
// can access host addresses directly. There is no longer a
2439
2441
// need for device copies.
2440
- Err = DeviceInfo ().freesignalpoolMemcpyH2D (Varptr, E->addr ,
2441
- sizeof ( void *), DeviceId);
2442
+ Err = DeviceInfo ().freesignalpoolMemcpyH2D (Varptr, E->addr , sizeof ( void *),
2443
+ DeviceId);
2442
2444
if (Err != HSA_STATUS_SUCCESS)
2443
2445
DP (" Error when copying USM\n " );
2444
2446
DP (" Copy linked variable host address (" DPxMOD " )"
@@ -2598,11 +2600,12 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId,
2598
2600
}
2599
2601
check (" Loading computation property" , Err);
2600
2602
2601
- KernelsList.push_back (KernelTy (ExecModeVal, WGSizeVal, DeviceId,
2602
- CallStackAddr, E->name , KernargSegmentSize,
2603
- DeviceInfo ().KernArgPool ));
2603
+ DeviceInfo ().KernelsList .push_back (
2604
+ KernelTy (ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name ,
2605
+ KernargSegmentSize, DeviceInfo ().KernArgPool ,
2606
+ DeviceInfo ().KernelArgPoolMap ));
2604
2607
__tgt_offload_entry Entry = *E;
2605
- Entry.addr = (void *)&KernelsList.back ();
2608
+ Entry.addr = (void *)&DeviceInfo (). KernelsList .back ();
2606
2609
DeviceInfo ().addOffloadEntry (DeviceId, Entry);
2607
2610
DP (" Entry point %ld maps to %s\n " , E - HostBegin, E->name );
2608
2611
}
0 commit comments