Skip to content

Commit c81d84c

Browse files
authored
[InlineCost]: Optimize inlining of recursive function. (#139982)
- Consider inlining recursive function of depth 1 only when the caller is the function itself instead of inlining it for each callsite so that we avoid redundant work. - Use CondContext instead of DomTree for better compilation time.
1 parent 107d8c7 commit c81d84c

File tree

2 files changed

+92
-60
lines changed

2 files changed

+92
-60
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 43 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
263263
// Cache the DataLayout since we use it a lot.
264264
const DataLayout &DL;
265265

266-
DominatorTree DT;
267-
268266
/// The OptimizationRemarkEmitter available for this compilation.
269267
OptimizationRemarkEmitter *ORE;
270268

@@ -1688,66 +1686,51 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
16881686
if (!isa<Argument>(Cmp.getOperand(0)) || !isa<Constant>(Cmp.getOperand(1)))
16891687
return false;
16901688
auto *CmpOp = Cmp.getOperand(0);
1691-
Function *F = Cmp.getFunction();
1692-
// Iterate over the users of the function to check if it's a recursive
1693-
// function:
1694-
for (auto *U : F->users()) {
1695-
CallInst *Call = dyn_cast<CallInst>(U);
1696-
if (!Call || Call->getFunction() != F || Call->getCalledFunction() != F)
1697-
continue;
1698-
auto *CallBB = Call->getParent();
1699-
auto *Predecessor = CallBB->getSinglePredecessor();
1700-
// Only handle the case when the callsite has a single predecessor:
1701-
if (!Predecessor)
1702-
continue;
1689+
// Make sure that the callsite is recursive:
1690+
if (CandidateCall.getCaller() != &F)
1691+
return false;
1692+
// Only handle the case when the callsite has a single predecessor:
1693+
auto *CallBB = CandidateCall.getParent();
1694+
auto *Predecessor = CallBB->getSinglePredecessor();
1695+
if (!Predecessor)
1696+
return false;
1697+
// Check if the callsite is guarded by the same Cmp instruction:
1698+
auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
1699+
if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
1700+
return false;
17031701

1704-
auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
1705-
if (!Br || Br->isUnconditional())
1706-
continue;
1707-
// Check if the Br condition is the same Cmp instr we are investigating:
1708-
if (Br->getCondition() != &Cmp)
1709-
continue;
1710-
// Check if there are any arg of the recursive callsite is affecting the cmp
1711-
// instr:
1712-
bool ArgFound = false;
1713-
Value *FuncArg = nullptr, *CallArg = nullptr;
1714-
for (unsigned ArgNum = 0;
1715-
ArgNum < F->arg_size() && ArgNum < Call->arg_size(); ArgNum++) {
1716-
FuncArg = F->getArg(ArgNum);
1717-
CallArg = Call->getArgOperand(ArgNum);
1718-
if (FuncArg == CmpOp && CallArg != CmpOp) {
1719-
ArgFound = true;
1720-
break;
1721-
}
1722-
}
1723-
if (!ArgFound)
1724-
continue;
1725-
// Now we have a recursive call that is guarded by a cmp instruction.
1726-
// Check if this cmp can be simplified:
1727-
SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
1728-
DomConditionCache DC;
1729-
DC.registerBranch(Br);
1730-
SQ.DC = &DC;
1731-
if (DT.root_size() == 0) {
1732-
// Dominator tree was never constructed for any function yet.
1733-
DT.recalculate(*F);
1734-
} else if (DT.getRoot()->getParent() != F) {
1735-
// Dominator tree was constructed for a different function, recalculate
1736-
// it for the current function.
1737-
DT.recalculate(*F);
1702+
// Check if there is any arg of the recursive callsite is affecting the cmp
1703+
// instr:
1704+
bool ArgFound = false;
1705+
Value *FuncArg = nullptr, *CallArg = nullptr;
1706+
for (unsigned ArgNum = 0;
1707+
ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {
1708+
FuncArg = F.getArg(ArgNum);
1709+
CallArg = CandidateCall.getArgOperand(ArgNum);
1710+
if (FuncArg == CmpOp && CallArg != CmpOp) {
1711+
ArgFound = true;
1712+
break;
17381713
}
1739-
SQ.DT = &DT;
1740-
Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
1741-
cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
1742-
if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
1743-
bool IsTrueSuccessor = CallBB == Br->getSuccessor(0);
1744-
// Make sure that the BB of the recursive call is NOT the next successor
1745-
// of the icmp. In other words, make sure that the recursion depth is 1.
1746-
if ((ConstVal->isOne() && !IsTrueSuccessor) ||
1747-
(ConstVal->isZero() && IsTrueSuccessor)) {
1748-
SimplifiedValues[&Cmp] = ConstVal;
1749-
return true;
1750-
}
1714+
}
1715+
if (!ArgFound)
1716+
return false;
1717+
1718+
// Now we have a recursive call that is guarded by a cmp instruction.
1719+
// Check if this cmp can be simplified:
1720+
SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
1721+
CondContext CC(&Cmp);
1722+
CC.Invert = (CallBB != Br->getSuccessor(0));
1723+
SQ.CC = &CC;
1724+
CC.AffectedValues.insert(FuncArg);
1725+
Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
1726+
cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
1727+
if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
1728+
// Make sure that the BB of the recursive call is NOT the true successor
1729+
// of the icmp. In other words, make sure that the recursion depth is 1.
1730+
if ((ConstVal->isOne() && CC.Invert) ||
1731+
(ConstVal->isZero() && !CC.Invert)) {
1732+
SimplifiedValues[&Cmp] = ConstVal;
1733+
return true;
17511734
}
17521735
}
17531736
return false;
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s
3+
4+
; This test shows that the recursive function will not get simplified
5+
; unless the caller is the function itself, not another different caller.
6+
7+
; CHECK: Inlining calls in: test
8+
; CHECK: Function size: 2
9+
; CHECK: NOT Inlining (cost=never): recursive, Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
10+
11+
; CHECK: Inlining calls in: inline_rec_true_successor
12+
; CHECK: Function size: 10
13+
; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
14+
; CHECK: Size after inlining: 17
15+
; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i)
16+
; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test.i = tail call float @test(float %x, float %call.i)
17+
; CHECK: Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor
18+
; CHECK: Updated inlining SCC: (test, inline_rec_true_successor)
19+
20+
; CHECK: Inlining calls in: test
21+
; CHECK: Function size: 2
22+
; CHECK: Inlining (cost=25, threshold=225), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
23+
; CHECK: Size after inlining: 10
24+
25+
define float @test(float %x, float %scale) noinline {
26+
entry:
27+
%call = tail call float @inline_rec_true_successor(float %x, float %scale)
28+
ret float %call
29+
}
30+
31+
define float @inline_rec_true_successor(float %x, float %scale) {
32+
entry:
33+
%cmp = fcmp olt float %x, 0.000000e+00
34+
br i1 %cmp, label %if.then, label %if.end
35+
36+
common.ret18: ; preds = %if.then, %if.end
37+
%common.ret18.op = phi float [ %call_test, %if.then ], [ %mul, %if.end ]
38+
ret float %common.ret18.op
39+
40+
if.then: ; preds = %entry
41+
%fneg = fneg float %x
42+
%call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
43+
%call_test = tail call float @test(float %fneg, float %call)
44+
br label %common.ret18
45+
46+
if.end: ; preds = %entry
47+
%mul = fmul float %x, %scale
48+
br label %common.ret18
49+
}

0 commit comments

Comments
 (0)