-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Reapply [AMDGPU] Avoid resource propagation for recursion through multiple functions #112251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…tiple functions I was wrong last patch. I viewed the visited set purely as a possible recursion deterrent where functions calling a callee multiple times are handled elsewhere. This wouldn't consider cases where a function is called multiple times by different callers still part of the same call graph. New test shows the aforementioned case. Reapplies llvm#111004
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Janek van Oirschot (JanekvO) ChangesI was wrong last patch. I viewed the Reapplies #111004 Patch is 21.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112251.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index da0397fa20bd1b..ee1453d1d733ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -91,6 +91,68 @@ MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
}
+// The (partially complete) expression should have no recursion in it. After
+// all, we're trying to avoid recursion using this codepath. Returns true if
+// Sym is found within Expr without recursing on Expr, false otherwise.
+static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
+ SmallVectorImpl<const MCExpr *> &Exprs,
+ SmallPtrSetImpl<const MCExpr *> &Visited) {
+ // Skip duplicate visits
+ if (!Visited.insert(Expr).second)
+ return false;
+
+ switch (Expr->getKind()) {
+ default:
+ return false;
+ case MCExpr::ExprKind::SymbolRef: {
+ const MCSymbolRefExpr *SymRefExpr = cast<MCSymbolRefExpr>(Expr);
+ const MCSymbol &SymRef = SymRefExpr->getSymbol();
+ if (Sym == &SymRef)
+ return true;
+ if (SymRef.isVariable())
+ Exprs.push_back(SymRef.getVariableValue(/*isUsed=*/false));
+ return false;
+ }
+ case MCExpr::ExprKind::Binary: {
+ const MCBinaryExpr *BExpr = cast<MCBinaryExpr>(Expr);
+ Exprs.push_back(BExpr->getLHS());
+ Exprs.push_back(BExpr->getRHS());
+ return false;
+ }
+ case MCExpr::ExprKind::Unary: {
+ const MCUnaryExpr *UExpr = cast<MCUnaryExpr>(Expr);
+ Exprs.push_back(UExpr->getSubExpr());
+ return false;
+ }
+ case MCExpr::ExprKind::Target: {
+ const AMDGPUMCExpr *AGVK = cast<AMDGPUMCExpr>(Expr);
+ for (const MCExpr *E : AGVK->getArgs())
+ Exprs.push_back(E);
+ return false;
+ }
+ }
+}
+
+// Symbols whose values eventually are used through their defines (i.e.,
+// recursive) must be avoided. Do a walk over Expr to see if Sym will occur in
+// it. The Expr is an MCExpr given through a callee's equivalent MCSymbol so if
+// no recursion is found Sym can be safely assigned to a (sub-)expr which
+// contains the symbol Expr is associated with. Returns true if Sym exists
+// in Expr or its sub-expressions, false otherwise.
+static bool foundRecursiveSymbolDef(MCSymbol *Sym, const MCExpr *Expr) {
+ SmallVector<const MCExpr *, 8> WorkList;
+ SmallPtrSet<const MCExpr *, 8> Visited;
+ WorkList.push_back(Expr);
+
+ while (!WorkList.empty()) {
+ const MCExpr *CurExpr = WorkList.pop_back_val();
+ if (findSymbolInExpr(Sym, CurExpr, WorkList, Visited))
+ return true;
+ }
+
+ return false;
+}
+
void MCResourceInfo::assignResourceInfoExpr(
int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
@@ -98,6 +160,7 @@ void MCResourceInfo::assignResourceInfoExpr(
const MCConstantExpr *LocalConstExpr =
MCConstantExpr::create(LocalValue, OutContext);
const MCExpr *SymVal = LocalConstExpr;
+ MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
if (!Callees.empty()) {
SmallVector<const MCExpr *, 8> ArgExprs;
// Avoid recursive symbol assignment.
@@ -110,11 +173,17 @@ void MCResourceInfo::assignResourceInfoExpr(
if (!Seen.insert(Callee).second)
continue;
MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
- ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+ bool CalleeIsVar = CalleeValSym->isVariable();
+ if (!CalleeIsVar ||
+ (CalleeIsVar &&
+ !foundRecursiveSymbolDef(
+ Sym, CalleeValSym->getVariableValue(/*IsUsed=*/false)))) {
+ ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+ }
}
- SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
+ if (ArgExprs.size() > 1)
+ SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
- MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
}
@@ -155,6 +224,7 @@ void MCResourceInfo::gatherResourceInfo(
// The expression for private segment size should be: FRI.PrivateSegmentSize
// + max(FRI.Callees, FRI.CalleeSegmentSize)
SmallVector<const MCExpr *, 8> ArgExprs;
+ MCSymbol *Sym = getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext);
if (FRI.CalleeSegmentSize)
ArgExprs.push_back(
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
@@ -165,9 +235,15 @@ void MCResourceInfo::gatherResourceInfo(
if (!Seen.insert(Callee).second)
continue;
if (!Callee->isDeclaration()) {
- MCSymbol *calleeValSym =
+ MCSymbol *CalleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
- ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
+ bool CalleeIsVar = CalleeValSym->isVariable();
+ if (!CalleeIsVar ||
+ (CalleeIsVar &&
+ !foundRecursiveSymbolDef(
+ Sym, CalleeValSym->getVariableValue(/*IsUsed=*/false)))) {
+ ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+ }
}
}
const MCExpr *localConstExpr =
@@ -178,8 +254,7 @@ void MCResourceInfo::gatherResourceInfo(
localConstExpr =
MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
}
- getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext)
- ->setVariableValue(localConstExpr);
+ Sym->setVariableValue(localConstExpr);
}
auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index d3a6b4e01ebfb8..c8cf7d7e535b33 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -481,6 +481,132 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
ret void
}
+; GCN-LABEL: {{^}}multi_stage_recurse2:
+; GCN: .set multi_stage_recurse2.num_vgpr, max(41, multi_stage_recurse1.num_vgpr)
+; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
+; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
+; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
+; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
+; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
+; GCN: NumVgprs: max(41, multi_stage_recurse1.num_vgpr)
+; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
+; GCN-LABEL: {{^}}multi_stage_recurse1:
+; GCN: .set multi_stage_recurse1.num_vgpr, 41
+; GCN: .set multi_stage_recurse1.num_agpr, 0
+; GCN: .set multi_stage_recurse1.numbered_sgpr, 34
+; GCN: .set multi_stage_recurse1.private_seg_size, 16
+; GCN: .set multi_stage_recurse1.uses_vcc, 1
+; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
+; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
+; GCN: .set multi_stage_recurse1.has_recursion, 1
+; GCN: .set multi_stage_recurse1.has_indirect_call, 0
+; GCN: TotalNumSgprs: 38
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+define void @multi_stage_recurse1(i32 %val) #2 {
+ call void @multi_stage_recurse2(i32 %val)
+ ret void
+}
+define void @multi_stage_recurse2(i32 %val) #2 {
+ call void @multi_stage_recurse1(i32 %val)
+ ret void
+}
+
+; GCN-LABEL: {{^}}usage_multi_stage_recurse:
+; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
+; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
+; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
+; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
+; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
+; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: 40
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
+ call void @multi_stage_recurse1(i32 %n)
+ ret void
+}
+
+; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
+; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
+; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(34, multi_stage_recurse_noattr1.numbered_sgpr)
+; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
+; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
+; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
+; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
+; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
+; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1))
+; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
+; GCN: .set multi_stage_recurse_noattr1.num_vgpr, 41
+; GCN: .set multi_stage_recurse_noattr1.num_agpr, 0
+; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, 34
+; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
+; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
+; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
+; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
+; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
+; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
+; GCN: TotalNumSgprs: 38
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+define void @multi_stage_recurse_noattr1(i32 %val) #0 {
+ call void @multi_stage_recurse_noattr2(i32 %val)
+ ret void
+}
+define void @multi_stage_recurse_noattr2(i32 %val) #0 {
+ call void @multi_stage_recurse_noattr1(i32 %val)
+ ret void
+}
+
+; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
+; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
+; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
+; GCN: TotalNumSgprs: 40
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
+ call void @multi_stage_recurse_noattr1(i32 %n)
+ ret void
+}
+
+; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
+; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
+; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
+; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
+; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
+; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
+; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: 49
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 2052
+define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
+ call void @use_stack0()
+ call void @use_stack1()
+ call void @multi_stage_recurse1(i32 %n)
+ ret void
+}
+
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
diff --git a/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll
new file mode 100644
index 00000000000000..c04bc96828e1f1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll
@@ -0,0 +1,82 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}qux
+; CHECK: .set qux.num_vgpr, 0
+; CHECK: .set qux.num_agpr, 0
+; CHECK: .set qux.numbered_sgpr, 32
+; CHECK: .set qux.private_seg_size, 0
+; CHECK: .set qux.uses_vcc, 0
+; CHECK: .set qux.uses_flat_scratch, 0
+; CHECK: .set qux.has_dyn_sized_stack, 0
+; CHECK: .set qux.has_recursion, 0
+; CHECK: .set qux.has_indirect_call, 0
+define void @qux() {
+entry:
+ ret void
+}
+
+; CHECK-LABEL: {{^}}baz
+; CHECK: .set baz.num_vgpr, max(32, qux.num_vgpr)
+; CHECK: .set baz.num_agpr, max(0, qux.num_agpr)
+; CHECK: .set baz.numbered_sgpr, max(34, qux.numbered_sgpr)
+; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size))
+; CHECK: .set baz.uses_vcc, or(0, qux.uses_vcc)
+; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch)
+; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack)
+; CHECK: .set baz.has_recursion, or(1, qux.has_recursion)
+; CHECK: .set baz.has_indirect_call, or(0, qux.has_indirect_call)
+define void @baz() {
+entry:
+ call void @qux()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}bar
+; CHECK: .set bar.num_vgpr, max(32, baz.num_vgpr, qux.num_vgpr)
+; CHECK: .set bar.num_agpr, max(0, baz.num_agpr, qux.num_agpr)
+; CHECK: .set bar.numbered_sgpr, max(34, baz.numbered_sgpr, qux.numbered_sgpr)
+; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size, qux.private_seg_size))
+; CHECK: .set bar.uses_vcc, or(0, baz.uses_vcc, qux.uses_vcc)
+; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch, qux.uses_flat_scratch)
+; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack, qux.has_dyn_sized_stack)
+; CHECK: .set bar.has_recursion, or(1, baz.has_recursion, qux.has_recursion)
+; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call, qux.has_indirect_call)
+define void @bar() {
+entry:
+ call void @baz()
+ call void @qux()
+ call void @baz()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}foo
+; CHECK: .set foo.num_vgpr, max(32, bar.num_vgpr)
+; CHECK: .set foo.num_agpr, max(0, bar.num_agpr)
+; CHECK: .set foo.numbered_sgpr, max(34, bar.numbered_sgpr)
+; CHECK: .set foo.private_seg_size, 16+(max(bar.private_seg_size))
+; CHECK: .set foo.uses_vcc, or(0, bar.uses_vcc)
+; CHECK: .set foo.uses_flat_scratch, or(0, bar.uses_flat_scratch)
+; CHECK: .set foo.has_dyn_sized_stack, or(0, bar.has_dyn_sized_stack)
+; CHECK: .set foo.has_recursion, or(1, bar.has_recursion)
+; CHECK: .set foo.has_indirect_call, or(0, bar.has_indirect_call)
+define void @foo() {
+entry:
+ call void @bar()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}usefoo
+; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr)
+; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr)
+; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr)
+; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size))
+; CHECK: .set usefoo.uses_vcc, or(0, foo.uses_vcc)
+; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch)
+; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack)
+; CHECK: .set usefoo.has_recursion, or(1, foo.has_recursion)
+; CHECK: .set usefoo.has_indirect_call, or(0, foo.has_indirect_call)
+define amdgpu_kernel void @usefoo() {
+ call void @foo()
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll
new file mode 100644
index 00000000000000..7e1090afc0cf1a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll
@@ -0,0 +1,85 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}qux
+; CHECK: .set qux.num_vgpr, max(41, foo.num_vgpr)
+; CHECK: .set qux.num_agpr, max(0, foo.num_agpr)
+; CHECK: .set qux.numbered_sgpr, max(34, foo.numbered_sgpr)
+; CHECK: .set qux.private_seg_size, 16
+; CHECK: .set qux.uses_vcc, or(1, foo.uses_vcc)
+; CHECK: .set qux.uses_flat_scratch, or(0, foo.uses_flat_scratch)
+; CHECK: .set qux.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack)
+; CHECK: .set qux.has_recursion, or(1, foo.has_recursion)
+; CHECK: .set qux.has_indirect_call, or(0, foo.has_indirect_call)
+
+; CHECK-LABEL: {{^}}baz
+; CHECK: .set baz.num_vgpr, max(42, qux.num_vgpr)
+; CHECK: .set baz.num_agpr, max(0, qux.num_agpr)
+; CHECK: .set baz.numbered_sgpr, max(34, qux.numbered_sgpr)
+; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size))
+; CHECK: .set baz.uses_vcc, or(1, qux.uses_vcc)
+; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch)
+; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack)
+; CHECK: .set baz.has_recursion, or(1, qux.has_recursion)
+; CHECK: .set baz.has_indirect_call, or(0, qux.has_indirect_call)
+
+; CHECK-LABEL: {{^}}bar
+; CHECK: .set bar.num_vgpr, max(42, baz.num_vgpr)
+; CHECK: .set bar.num_agpr, max(0, baz.num_agpr)
+; CHECK: .set bar.numbered_sgpr, max(34, baz.numbered_sgpr)
+; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size))
+; CHECK: .set bar.uses_vcc, or(1, baz.uses_vcc)
+; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch)
+; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack)
+; CHECK: .set bar.has_recursion, or(1, baz.has_recursion)
+; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call)
+
+; CHECK-LABEL: {{^}}foo
+; CHECK: .set foo.num_vgpr, 42
+; CHECK: .set foo.num_agpr, 0
+; CHECK: .set foo.numbered_sgpr, 34
+; CHECK: .set foo.private_seg_size, 16
+; CHECK: .set foo.uses_vcc, 1
+; CHECK: .set foo.uses_flat_scra...
[truncated]
|
SmallVector<const MCExpr *, 8> WorkList; | ||
SmallPtrSet<const MCExpr *, 8> Visited; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're still doing this worklist + visited set for each visited callee. Can you pull this out and merge with the loop over callees?
That is the Seen function set and the Visited MCExprs are probably redundant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately, I wasn't able to remove the Seen
function set as creating a new MCSymbolRefExpr
will always be unique, even if the same MCSymbol
is used. This means that the new WorkSet
wouldn't detect the duplicate and the function resource info expressions may end up with duplicate callees' resource info.
const MCExpr *Expr = *It; | ||
WorkSet.erase(Expr); | ||
|
||
FoundRecursion = findSymbolInExpr(Sym, Expr, WorkSet); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking explicitly for recursion as a case to handle still does not make sense to me. I expect a recursive visitor function that handles the recursive naturally, without trying to treat it as a special case. Have the function return null if it isn't resolvable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may not completely understand the suggested approach but the reason I explicitly add Sym
is because I want to explicitly check if Sym
is part of a recursive definition and not so much whether Sym
has any recursion as part of its expression. Is your suggested approach compatible with seeing whether Sym
is part of the recursion or not?
|
||
for (const Function *Callee : Callees) { | ||
if (!Seen.insert(Callee).second) | ||
continue; | ||
|
||
SmallPtrSet<const MCExpr *, 8> WorkSet; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I expect to maintain the set for all callees to be visited, not redone on each one
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried so as well but I had a hard time re-retrieving the MCSymbolRefExpr
from the WorkSet
after walking down it and concluding it should be added to ArgExprs
(I may be missing some low hanging fruit)
} | ||
if (CalleeValSym->isVariable() && !FoundRecursion) | ||
if (!CalleeIsVar || | ||
(CalleeIsVar && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CalleeIsVar &&
is redundant here
|
||
if (CalleeValSym->isVariable() && !FoundRecursion) | ||
if (!CalleeIsVar || | ||
(CalleeIsVar && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CalleeIsVar &&
redundant
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've lost sight of the original problem. What is the problem with creating a recursive expression? Can you deal with the recursion when the expression is evaluated instead of avoiding creating it?
The evaluation of the
It'd have to be in the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems the precedent is for MCExprs to not support recursion
If we are going this way, this code should be unified to deal with the weak edge cases.
Alternatively it feels more natural to directly support recursion in expressions, but maybe that is more difficult.
…and add MCTargetExpr specific subexpr considerations for isSymbolUsedInExpression
I've moved the implementation of |
ping |
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext)); | ||
if (!CalleeValSym->isVariable() || | ||
!CalleeValSym->getVariableValue(/*isUsed=*/false) | ||
->isSymbolUsedInExpression(Sym)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this miss adding other functions in the path to callee that we need to still add to the expression?
e.g.
f1->f2->f3->f2
From f1, we see that callee f2 appears in a recursive expression, so it is skipped. but that will mean we do not add f3 to the expression, even though it is needed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, sadly enough order matters in that it is possible for such a skip to occur. For now I've assumed worst case and taken the module level register maximums for recursion. Preferably I'd like to have cycle/SCC scope register maximums computed and used but I think that's out of scope for this PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should try to flatten the expression to remove the recursive edges
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed, will do so in a follow up PR
… introduced test.
Ping again, sorry |
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm but please continue working on flattening the expression instead of going to the worst case
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/10994 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/8515 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/8640 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/8706 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/8599 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/8594 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/95/builds/6275 Here is the relevant piece of the build log for the reference
|
Looking |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/35/builds/3687 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/76/builds/4541 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/6559 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/14815 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/8961 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/12346 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/12853 Here is the relevant piece of the build log for the reference
|
I just would like to see some debug-info as this might lead to more information being used. |
…ass (llvm#102913) Converts AMDGPUResourceUsageAnalysis pass from Module to MachineFunction pass. Moves function resource info propagation to to MC layer (through helpers in AMDGPUMCResourceInfo) by generating MCExprs for every function resource which the emitters have been prepped for. Fixes llvm#64863 [AMDGPU] Fix stack size metadata for functions with direct and indirect calls (llvm#110828) When a function has an external call, it should still use the stack sizes of direct, known, calls to calculate its own stack size [AMDGPU] Fix resource usage information for unnamed functions (llvm#115320) Resource usage information would try to overwrite unnamed functions if there are multiple within the same compilation unit. This aims to either use the `MCSymbol` assigned to the unnamed function (i.e., `CurrentFnSym`), or, rematerialize the `MCSymbol` for the unnamed function. Reapply [AMDGPU] Avoid resource propagation for recursion through multiple functions (llvm#112251) I was wrong last patch. I viewed the `Visited` set purely as a possible recursion deterrent where functions calling a callee multiple times are handled elsewhere. This doesn't consider cases where a function is called multiple times by different callers still part of the same call graph. New test shows the aforementioned case. Reapplies llvm#111004, fixes llvm#115562. [AMDGPU] Newly added test modified for recent SGPR use change (llvm#116427) Mistimed rebase for llvm#112251 which added new tests which did not consider the changes introduced in llvm#112403 yet Change-Id: I4dfe6a1b679137e080a6d2b44016347ea704b014
The
If I ever have time to clean it, I think the traversal should be moved to AMDGPU. |
We report cyclic dependency errors for variable symbols and rely on isSymbolUsedInExpression in parseAssignmentExpression at parse time, which does not catch all setVariableValue cases (e.g. cyclic .weakref). Instead, add a bit to MCSymbol and check it when walking the variable value MCExpr. When a cycle is detected when we have a final layout, report an error and set the variable to a constant to avoid duplicate errors. isSymbolUsedInExpression is considered deprecated, but it is still used by AMDGPU (#112251).
We report cyclic dependency errors for variable symbols and rely on isSymbolUsedInExpression in parseAssignmentExpression at parse time, which does not catch all setVariableValue cases (e.g. cyclic .weakref). Instead, add a bit to MCSymbol and check it when walking the variable value MCExpr. When a cycle is detected when we have a final layout, report an error and set the variable to a constant to avoid duplicate errors. isSymbolUsedInExpression is considered deprecated, but it is still used by AMDGPU (llvm#112251).
I was wrong last patch. I viewed the
Visited
set purely as a possible recursion deterrent where functions calling a callee multiple times are handled elsewhere. This doesn't consider cases where a function is called multiple times by different callers still part of the same call graph. New test shows the aforementioned case.Reapplies #111004, fixes #115562.