-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Fix resource usage information for unnamed functions #115320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Janek van Oirschot (JanekvO) ChangesResource usage information would try to overwrite unnamed functions if there are multiple within the same compilation unit. This aims to either use the Full diff: https://github.com/llvm/llvm-project/pull/115320.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 93b6ba0595b70f..5ebe4a069569c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -368,6 +368,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
using RIK = MCResourceInfo::ResourceInfoKind;
const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
+ MCSymbol *FnSym = TM.getSymbol(&F);
auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
int64_t Val;
@@ -381,7 +382,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
const uint64_t MaxScratchPerWorkitem =
STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
MCSymbol *ScratchSizeSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_PrivateSegSize, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);
uint64_t ScratchSize;
if (ScratchSizeSymbol->isVariable() &&
TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
@@ -394,7 +395,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
// Validate addressable scalar registers (i.e., prior to added implicit
// SGPRs).
MCSymbol *NumSGPRSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_NumSGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
@@ -411,9 +412,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
}
MCSymbol *VCCUsedSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_UsesVCC, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);
MCSymbol *FlatUsedSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_UsesFlatScratch, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);
uint64_t VCCUsed, FlatUsed, NumSgpr;
if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
@@ -440,9 +441,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
}
MCSymbol *NumVgprSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_NumVGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);
MCSymbol *NumAgprSymbol =
- RI.getSymbol(F.getName(), RIK::RIK_NumAGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);
uint64_t NumVgpr, NumAgpr;
MachineModuleInfo &MMI =
@@ -705,15 +706,20 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
{
using RIK = MCResourceInfo::ResourceInfoKind;
getTargetStreamer()->EmitMCResourceInfo(
- RI.getSymbol(MF.getName(), RIK::RIK_NumVGPR, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_NumAGPR, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_NumSGPR, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_PrivateSegSize, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_UsesVCC, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_UsesFlatScratch, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_HasDynSizedStack, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_HasRecursion, OutContext),
- RI.getSymbol(MF.getName(), RIK::RIK_HasIndirectCall, OutContext));
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
+ OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
+ OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
+ OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,
+ OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
+ OutContext));
}
if (isVerbose()) {
@@ -726,18 +732,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" Function info:", false);
emitCommonFunctionComments(
- RI.getSymbol(MF.getName(), RIK::RIK_NumVGPR, OutContext)
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)
->getVariableValue(),
- STM.hasMAIInsts()
- ? RI.getSymbol(MF.getName(), RIK::RIK_NumAGPR, OutContext)
- ->getVariableValue()
- : nullptr,
+ STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),
+ RIK::RIK_NumAGPR, OutContext)
+ ->getVariableValue()
+ : nullptr,
RI.createTotalNumVGPRs(MF, Ctx),
RI.createTotalNumSGPRs(
MF,
MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
Ctx),
- RI.getSymbol(MF.getName(), RIK::RIK_PrivateSegSize, OutContext)
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
+ OutContext)
->getVariableValue(),
getFunctionCodeSize(MF), MFI);
return false;
@@ -943,7 +950,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
auto GetSymRefExpr =
[&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
- MCSymbol *Sym = RI.getSymbol(MF.getName(), RIK, OutContext);
+ MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);
return MCSymbolRefExpr::create(Sym, Ctx);
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index da0397fa20bd1b..85b359f18816a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -95,6 +96,8 @@ void MCResourceInfo::assignResourceInfoExpr(
int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
MCContext &OutContext) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
const MCConstantExpr *LocalConstExpr =
MCConstantExpr::create(LocalValue, OutContext);
const MCExpr *SymVal = LocalConstExpr;
@@ -109,12 +112,14 @@ void MCResourceInfo::assignResourceInfoExpr(
for (const Function *Callee : Callees) {
if (!Seen.insert(Callee).second)
continue;
- MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
+ MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
+ MCSymbol *CalleeValSym =
+ getSymbol(CalleeFnSym->getName(), RIK, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
- MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
}
@@ -133,6 +138,9 @@ void MCResourceInfo::gatherResourceInfo(
addMaxSGPRCandidate(FRI.NumExplicitSGPR);
}
+ const LLVMTargetMachine &TM = MF.getTarget();
+ MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
+
auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs,
ResourceInfoKind RIK) {
if (!FRI.HasIndirectCall) {
@@ -140,7 +148,7 @@ void MCResourceInfo::gatherResourceInfo(
FRI.Callees, OutContext);
} else {
const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext);
- MCSymbol *LocalNumSym = getSymbol(MF.getName(), RIK, OutContext);
+ MCSymbol *LocalNumSym = getSymbol(FnSym->getName(), RIK, OutContext);
const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
{MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext);
LocalNumSym->setVariableValue(MaxWithLocal);
@@ -165,8 +173,9 @@ void MCResourceInfo::gatherResourceInfo(
if (!Seen.insert(Callee).second)
continue;
if (!Callee->isDeclaration()) {
+ MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
MCSymbol *calleeValSym =
- getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
+ getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
}
}
@@ -178,12 +187,12 @@ void MCResourceInfo::gatherResourceInfo(
localConstExpr =
MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
}
- getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext)
+ getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext)
->setVariableValue(localConstExpr);
}
auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
- MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
};
@@ -213,18 +222,23 @@ void MCResourceInfo::gatherResourceInfo(
const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
MCContext &Ctx) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
return AMDGPUMCExpr::createTotalNumVGPR(
- getSymRefExpr(MF.getName(), RIK_NumAGPR, Ctx),
- getSymRefExpr(MF.getName(), RIK_NumVGPR, Ctx), Ctx);
+ getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx), Ctx);
}
const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
bool hasXnack,
MCContext &Ctx) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
return MCBinaryExpr::createAdd(
- getSymRefExpr(MF.getName(), RIK_NumSGPR, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx),
AMDGPUMCExpr::createExtraSGPRs(
- getSymRefExpr(MF.getName(), RIK_UsesVCC, Ctx),
- getSymRefExpr(MF.getName(), RIK_UsesFlatScratch, Ctx), hasXnack, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx), hasXnack,
+ Ctx),
Ctx);
}
diff --git a/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll b/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll
new file mode 100644
index 00000000000000..c9fbd369e062d5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck %s
+
+; CHECK-LABEL: __unnamed_1:
+; CHECK: .set __unnamed_1.num_vgpr, 0
+; CHECK: .set __unnamed_1.num_agpr, 0
+; CHECK: .set __unnamed_1.numbered_sgpr, 32
+; CHECK: .set __unnamed_1.private_seg_size, 0
+; CHECK: .set __unnamed_1.uses_vcc, 0
+; CHECK: .set __unnamed_1.uses_flat_scratch, 0
+; CHECK: .set __unnamed_1.has_dyn_sized_stack, 0
+; CHECK: .set __unnamed_1.has_recursion, 0
+; CHECK: .set __unnamed_1.has_indirect_call, 0
+define void @1() {
+entry:
+ ret void
+}
+
+; CHECK-LABEL: __unnamed_2:
+; CHECK: .set __unnamed_2.num_vgpr, max(32, __unnamed_1.num_vgpr)
+; CHECK: .set __unnamed_2.num_agpr, max(0, __unnamed_1.num_agpr)
+; CHECK: .set __unnamed_2.numbered_sgpr, max(34, __unnamed_1.numbered_sgpr)
+; CHECK: .set __unnamed_2.private_seg_size, 16+(max(__unnamed_1.private_seg_size))
+; CHECK: .set __unnamed_2.uses_vcc, or(0, __unnamed_1.uses_vcc)
+; CHECK: .set __unnamed_2.uses_flat_scratch, or(0, __unnamed_1.uses_flat_scratch)
+; CHECK: .set __unnamed_2.has_dyn_sized_stack, or(0, __unnamed_1.has_dyn_sized_stack)
+; CHECK: .set __unnamed_2.has_recursion, or(1, __unnamed_1.has_recursion)
+; CHECK: .set __unnamed_2.has_indirect_call, or(0, __unnamed_1.has_indirect_call)
+define void @2() {
+entry:
+ call void @1()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}use
+; CHECK: .set use.num_vgpr, max(32, __unnamed_1.num_vgpr, __unnamed_2.num_vgpr)
+; CHECK: .set use.num_agpr, max(0, __unnamed_1.num_agpr, __unnamed_2.num_agpr)
+; CHECK: .set use.numbered_sgpr, max(33, __unnamed_1.numbered_sgpr, __unnamed_2.numbered_sgpr)
+; CHECK: .set use.private_seg_size, 0+(max(__unnamed_1.private_seg_size, __unnamed_2.private_seg_size))
+; CHECK: .set use.uses_vcc, or(0, __unnamed_1.uses_vcc, __unnamed_2.uses_vcc)
+; CHECK: .set use.uses_flat_scratch, or(1, __unnamed_1.uses_flat_scratch, __unnamed_2.uses_flat_scratch)
+; CHECK: .set use.has_dyn_sized_stack, or(0, __unnamed_1.has_dyn_sized_stack, __unnamed_2.has_dyn_sized_stack)
+; CHECK: .set use.has_recursion, or(1, __unnamed_1.has_recursion, __unnamed_2.has_recursion)
+; CHECK: .set use.has_indirect_call, or(0, __unnamed_1.has_indirect_call, __unnamed_2.has_indirect_call)
+define amdgpu_kernel void @use() {
+ call void @1()
+ call void @2()
+ ret void
+}
|
lands and reverts: merge challenges, defering 7f60f13 [AMDGPU] Fix resource usage information for unnamed functions (llvm#115320) Change-Id: Ieee365930ec6a55b26cb0a1e93206020ca11c71c
…15320) Resource usage information would try to overwrite unnamed functions if there are multiple within the same compilation unit. This aims to either use the `MCSymbol` assigned to the unnamed function (i.e., `CurrentFnSym`), or, rematerialize the `MCSymbol` for the unnamed function.
…ass (llvm#102913) Converts AMDGPUResourceUsageAnalysis pass from Module to MachineFunction pass. Moves function resource info propagation to to MC layer (through helpers in AMDGPUMCResourceInfo) by generating MCExprs for every function resource which the emitters have been prepped for. Fixes llvm#64863 [AMDGPU] Fix stack size metadata for functions with direct and indirect calls (llvm#110828) When a function has an external call, it should still use the stack sizes of direct, known, calls to calculate its own stack size [AMDGPU] Fix resource usage information for unnamed functions (llvm#115320) Resource usage information would try to overwrite unnamed functions if there are multiple within the same compilation unit. This aims to either use the `MCSymbol` assigned to the unnamed function (i.e., `CurrentFnSym`), or, rematerialize the `MCSymbol` for the unnamed function. Reapply [AMDGPU] Avoid resource propagation for recursion through multiple functions (llvm#112251) I was wrong last patch. I viewed the `Visited` set purely as a possible recursion deterrent where functions calling a callee multiple times are handled elsewhere. This doesn't consider cases where a function is called multiple times by different callers still part of the same call graph. New test shows the aforementioned case. Reapplies llvm#111004, fixes llvm#115562. [AMDGPU] Newly added test modified for recent SGPR use change (llvm#116427) Mistimed rebase for llvm#112251 which added new tests which did not consider the changes introduced in llvm#112403 yet Change-Id: I4dfe6a1b679137e080a6d2b44016347ea704b014
Resource usage information would try to overwrite unnamed functions if there are multiple within the same compilation unit. This aims to either use the
MCSymbol
assigned to the unnamed function (i.e.,CurrentFnSym
), or, rematerialize theMCSymbol
for the unnamed function.