Skip to content

Commit 27de306

Browse files
committed
Use IndexMap for PrelaodKernArgs.
1 parent 40c6a93 commit 27de306

File tree

5 files changed

+40
-31
lines changed

5 files changed

+40
-31
lines changed

llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -182,16 +182,12 @@ AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
182182
SmallVector<const KernArgPreloadDescriptor *, 4>
183183
AMDGPUFunctionArgInfo::getPreloadDescriptorsForArgIdx(unsigned ArgIdx) const {
184184
SmallVector<const KernArgPreloadDescriptor *, 4> Results;
185-
for (const auto &KV : PreloadKernArgs) {
186-
if (KV.second.OrigArgIdx == ArgIdx)
187-
Results.push_back(&KV.second);
185+
for (unsigned PartIdx = 0; PartIdx < PreloadKernArgs.size(); ++PartIdx) {
186+
const auto &Desc = PreloadKernArgs[PartIdx];
187+
if (Desc.OrigArgIdx == ArgIdx)
188+
Results.push_back(&Desc);
188189
}
189190

190-
stable_sort(Results, [](const KernArgPreloadDescriptor *A,
191-
const KernArgPreloadDescriptor *B) {
192-
return A->PartIdx < B->PartIdx;
193-
});
194-
195191
return Results;
196192
}
197193

@@ -203,11 +199,9 @@ AMDGPUFunctionArgInfo::getHiddenArgPreloadDescriptor(HiddenArg HA) const {
203199
if (HiddenArgIt == PreloadHiddenArgsIndexMap.end())
204200
return nullptr;
205201

206-
auto KernArgIt = PreloadKernArgs.find(HiddenArgIt->second);
207-
if (KernArgIt == PreloadKernArgs.end())
208-
return nullptr;
209-
210-
return &KernArgIt->second;
202+
const KernArgPreloadDescriptor &Desc = PreloadKernArgs[HiddenArgIt->second];
203+
assert(Desc.IsValid && "Hidden argument preload descriptor not valid.");
204+
return &Desc;
211205
}
212206

213207
const AMDGPUFunctionArgInfo &

llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1313
#include "llvm/ADT/DenseMap.h"
14+
#include "llvm/ADT/IndexedMap.h"
1415
#include "llvm/Analysis/ValueTracking.h"
1516
#include "llvm/CodeGen/Register.h"
1617
#include "llvm/IR/LLVMContext.h"
@@ -167,6 +168,8 @@ struct KernArgPreloadDescriptor {
167168
// The registers that the argument is preloaded into. The argument may be
168169
// split across multiple registers.
169170
SmallVector<MCRegister, 2> Regs;
171+
172+
bool IsValid = false;
170173
};
171174

172175
} // namespace KernArgPreload
@@ -231,9 +234,15 @@ struct AMDGPUFunctionArgInfo {
231234
ArgDescriptor WorkItemIDY;
232235
ArgDescriptor WorkItemIDZ;
233236

237+
struct PreloadArgIndexFunctor {
238+
using argument_type = unsigned;
239+
unsigned operator()(unsigned Idx) const { return Idx; }
240+
};
241+
234242
// Map the index of preloaded kernel arguments to its descriptor.
235-
SmallDenseMap<int, KernArgPreload::KernArgPreloadDescriptor>
236-
PreloadKernArgs{};
243+
IndexedMap<KernArgPreload::KernArgPreloadDescriptor, PreloadArgIndexFunctor>
244+
PreloadKernArgs;
245+
237246
// Map hidden argument to the index of it's descriptor.
238247
SmallDenseMap<KernArgPreload::HiddenArg, int> PreloadHiddenArgsIndexMap{};
239248
// The first user SGPR allocated for kernarg preloading.

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,7 @@ void MetadataStreamerMsgPackV6::emitHiddenKernelArg(
793793
ArgInfo->getHiddenArgPreloadDescriptor(HiddenArg);
794794
if (PreloadDesc) {
795795
const SmallVectorImpl<MCRegister> &Regs = PreloadDesc->Regs;
796-
for (const auto &Reg : Regs) {
796+
for (const auto Reg : Regs) {
797797
if (!PreloadStr.empty())
798798
PreloadStr.push_back(' ');
799799
PreloadStr += AMDGPUInstPrinter::getRegisterName(Reg);
@@ -817,7 +817,7 @@ void MetadataStreamerMsgPackV6::emitKernelArg(const Argument &Arg,
817817
if (!PreloadRegisters.empty())
818818
PreloadRegisters.push_back(' ');
819819

820-
for (const auto &Reg : Desc->Regs) {
820+
for (const auto Reg : Desc->Regs) {
821821
if (!PreloadRegisters.empty())
822822
PreloadRegisters.push_back(' ');
823823
PreloadRegisters += AMDGPUInstPrinter::getRegisterName(Reg);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2745,17 +2745,20 @@ void SITargetLowering::allocatePreloadKernArgSGPRs(
27452745
// Arg is preloaded into the previous SGPR.
27462746
if (ArgLoc.getLocVT().getStoreSize() < 4 && Alignment < 4) {
27472747
assert(InIdx >= 1 && "No previous SGPR");
2748-
auto [It, Inserted] =
2749-
Info.getArgInfo().PreloadKernArgs.try_emplace(InIdx);
2750-
assert(Inserted && "Preload kernel argument allocated twice.");
2751-
KernArgPreloadDescriptor &PreloadDesc = It->second;
2752-
2753-
const KernArgPreloadDescriptor &PrevDesc =
2754-
Info.getArgInfo().PreloadKernArgs[InIdx - 1];
2748+
auto &PreloadKernArgs = Info.getArgInfo().PreloadKernArgs;
2749+
PreloadKernArgs.grow(InIdx);
2750+
KernArgPreloadDescriptor &PreloadDesc = PreloadKernArgs[InIdx];
2751+
assert(!PreloadDesc.IsValid &&
2752+
"Preload kernel argument allocated twice.");
2753+
2754+
const KernArgPreloadDescriptor &PrevDesc = PreloadKernArgs[InIdx - 1];
2755+
assert(PrevDesc.IsValid &&
2756+
"Previous preload kernel argument not allocated.");
27552757
PreloadDesc.Regs.push_back(PrevDesc.Regs[0]);
27562758

27572759
PreloadDesc.OrigArgIdx = Arg.getArgNo();
27582760
PreloadDesc.PartIdx = InIdx;
2761+
PreloadDesc.IsValid = true;
27592762
if (Arg.hasAttribute("amdgpu-hidden-argument"))
27602763
mapHiddenArgToPreloadIndex(Info.getArgInfo(), ArgOffset,
27612764
ImplicitArgOffset, InIdx);
@@ -3183,7 +3186,9 @@ SDValue SITargetLowering::LowerFormalArguments(
31833186
}
31843187

31853188
SDValue NewArg;
3186-
if (Arg.isOrigArg() && Info->getArgInfo().PreloadKernArgs.count(i)) {
3189+
auto &PreloadKernArgs = Info->getArgInfo().PreloadKernArgs;
3190+
if (Arg.isOrigArg() && PreloadKernArgs.inBounds(i) &&
3191+
PreloadKernArgs[i].IsValid) {
31873192
if (MemVT.getStoreSize() < 4 && Alignment < 4) {
31883193
// In this case the argument is packed into the previous preload SGPR.
31893194
int64_t AlignDownOffset = alignDown(Offset, 4);
@@ -3193,8 +3198,7 @@ SDValue SITargetLowering::LowerFormalArguments(
31933198
const SIMachineFunctionInfo *Info =
31943199
MF.getInfo<SIMachineFunctionInfo>();
31953200
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
3196-
Register Reg =
3197-
Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs[0];
3201+
Register Reg = Info->getArgInfo().PreloadKernArgs[i].Regs[0];
31983202

31993203
assert(Reg);
32003204
Register VReg = MRI.getLiveInVirtReg(Reg);
@@ -3214,7 +3218,7 @@ SDValue SITargetLowering::LowerFormalArguments(
32143218
MF.getInfo<SIMachineFunctionInfo>();
32153219
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
32163220
const SmallVectorImpl<MCRegister> &PreloadRegs =
3217-
Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs;
3221+
Info->getArgInfo().PreloadKernArgs[i].Regs;
32183222

32193223
SDValue Copy;
32203224
if (PreloadRegs.size() == 1) {

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,13 @@ SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
278278
const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
279279
unsigned AllocSizeDWord, unsigned PartIdx, unsigned ArgIdx,
280280
unsigned PaddingSGPRs) {
281-
auto [It, Inserted] = ArgInfo.PreloadKernArgs.try_emplace(PartIdx);
282-
assert(Inserted && "Preload kernel argument allocated twice.");
283-
KernArgPreload::KernArgPreloadDescriptor &PreloadDesc = It->second;
281+
ArgInfo.PreloadKernArgs.grow(PartIdx);
282+
KernArgPreload::KernArgPreloadDescriptor &PreloadDesc =
283+
ArgInfo.PreloadKernArgs[PartIdx];
284+
assert(!PreloadDesc.IsValid && "Preload kernel argument allocated twice.");
284285
PreloadDesc.PartIdx = PartIdx;
285286
PreloadDesc.OrigArgIdx = ArgIdx;
287+
PreloadDesc.IsValid = true;
286288

287289
NumUserSGPRs += PaddingSGPRs;
288290
// If the available register tuples are aligned with the kernarg to be

0 commit comments

Comments
 (0)