-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Analysis]: Allow inlining recursive call IF recursion depth is 1. #119677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Hassnaa Hamdi (hassnaaHamdi) ChangesFull diff: https://github.com/llvm/llvm-project/pull/119677.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index ed54b0c077b4a4..ac61ca064c28b3 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -235,7 +235,7 @@ struct InlineParams {
std::optional<bool> EnableDeferral;
/// Indicate whether we allow inlining for recursive call.
- std::optional<bool> AllowRecursiveCall = false;
+ std::optional<bool> AllowRecursiveCall = true;
};
std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind);
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 85287a39f2caad..c2ade73b2ddcb4 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomConditionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -1160,6 +1161,10 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
std::optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
+ bool shouldCheckRecursiveCall() {
+ return IsRecursiveCall && AllowRecursiveCall;
+ }
+ bool shouldInlineRecursiveCall(CallBase &Call);
};
// Return true if CB is the sole call to local function Callee.
@@ -2880,6 +2885,68 @@ InlineResult CallAnalyzer::analyze() {
return finalizeAnalysis();
}
+bool InlineCostCallAnalyzer::shouldInlineRecursiveCall(CallBase &Call) {
+ CallInst *CI = cast<CallInst>(&Call);
+ auto CIB = CI->getParent();
+ // Only handle case when we have sinlge predecessor
+ if (auto Predecessor = CIB->getSinglePredecessor()) {
+ BranchInst *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
+ if (!Br || Br->isUnconditional()) {
+ return false;
+ }
+ Value *Var = Br->getCondition();
+ CmpInst *CmpInstr = dyn_cast<CmpInst>(Var);
+ if (CmpInstr && !isa<Constant>(CmpInstr->getOperand(1))) {
+ // Current logic of ValueTracking/DomConditionCache works only if RHS is
+ // constant.
+ return false;
+ }
+ unsigned ArgNum = 0;
+ Value *FuncArg = nullptr, *CallArg = nullptr;
+ // Check which func argument the cmp instr is using:
+ for (; ArgNum < CI->getFunction()->arg_size(); ArgNum++) {
+ FuncArg = CI->getFunction()->getArg(ArgNum);
+ CallArg = CI->getArgOperand(ArgNum);
+ if (CmpInstr) {
+ if ((FuncArg == CmpInstr->getOperand(0)) &&
+ (CallArg != CmpInstr->getOperand(0)))
+ break;
+ } else if (FuncArg == Var && (CallArg != Var))
+ break;
+ }
+ // Only handle the case when a func argument controls the cmp instruction:
+ if (ArgNum < CI->getFunction()->arg_size()) {
+ bool isTrueSuccessor = CIB == Br->getSuccessor(0);
+ if (CmpInstr) {
+ SimplifyQuery SQ(CI->getFunction()->getDataLayout(),
+ dyn_cast<Instruction>(CallArg));
+ DomConditionCache DC;
+ DC.registerBranch(Br);
+ SQ.DC = &DC;
+ DominatorTree DT(*CI->getFunction());
+ SQ.DT = &DT;
+ Value *simplifiedInstruction = llvm::simplifyInstructionWithOperands(
+ CmpInstr, {CallArg, CmpInstr->getOperand(1)}, SQ);
+ if (!simplifiedInstruction)
+ return false;
+ if (auto *ConstVal =
+ dyn_cast<llvm::ConstantInt>(simplifiedInstruction)) {
+ if (ConstVal->isOne())
+ return !isTrueSuccessor;
+ return isTrueSuccessor;
+ }
+ } else {
+ if (auto *ConstVal = dyn_cast<llvm::ConstantInt>(CallArg)) {
+ if (ConstVal->isOne())
+ return !isTrueSuccessor;
+ return isTrueSuccessor;
+ }
+ }
+ }
+ }
+ return false;
+}
+
void InlineCostCallAnalyzer::print(raw_ostream &OS) {
#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
if (PrintInstructionComments)
@@ -3106,6 +3173,12 @@ InlineCost llvm::getInlineCost(
LLVM_DEBUG(CA.dump());
+ // Check if callee function is recursive:
+ if (ShouldInline.isSuccess()) {
+ if (CA.shouldCheckRecursiveCall() && !CA.shouldInlineRecursiveCall(Call))
+ return InlineCost::getNever("deep recursion");
+ }
+
// Always make cost benefit based decision explicit.
// We use always/never here since threshold is not meaningful,
// as it's not what drives cost-benefit analysis.
@@ -3148,6 +3221,13 @@ InlineResult llvm::isInlineViable(Function &F) {
// Disallow recursive calls.
Function *Callee = Call->getCalledFunction();
+ // This function is called when we have "alwaysinline" attribute.
+ // If we allowed the inlining here given that the recursive inlining is
+ // allowed, then there will be problem in the second trial of inlining,
+ // because the Inliner pass allow only one time inlining and then it
+ // inserts "noinline" attribute which will be in conflict with the
+ // attribute of "alwaysinline" so, "alwaysinline" for recursive function
+ // will be disallowed to avoid conflict of attributes.
if (&F == Callee)
return InlineResult::failure("recursive call");
diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn.ll b/llvm/test/Transforms/Inline/inline-recursive-fn.ll
new file mode 100644
index 00000000000000..db90050d009a04
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn.ll
@@ -0,0 +1,179 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='inline,instcombine' < %s | FileCheck %s
+
+define float @inline_rec_true_successor(float %x, float %scale) {
+; CHECK-LABEL: define float @inline_rec_true_successor(
+; CHECK-SAME: float [[X:%.*]], float [[SCALE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[X]], 0.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[COMMON_RET18:.*]]:
+; CHECK-NEXT: [[COMMON_RET18_OP:%.*]] = phi float [ [[COMMON_RET18_OP_I:%.*]], %[[INLINE_REC_TRUE_SUCCESSOR_EXIT:.*]] ], [ [[MUL:%.*]], %[[IF_END]] ]
+; CHECK-NEXT: ret float [[COMMON_RET18_OP]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: br i1 false, label %[[IF_THEN_I:.*]], label %[[IF_END_I:.*]]
+; CHECK: [[IF_THEN_I]]:
+; CHECK-NEXT: br label %[[INLINE_REC_TRUE_SUCCESSOR_EXIT]]
+; CHECK: [[IF_END_I]]:
+; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[SCALE]], [[FNEG]]
+; CHECK-NEXT: br label %[[INLINE_REC_TRUE_SUCCESSOR_EXIT]]
+; CHECK: [[INLINE_REC_TRUE_SUCCESSOR_EXIT]]:
+; CHECK-NEXT: [[COMMON_RET18_OP_I]] = phi float [ poison, %[[IF_THEN_I]] ], [ [[MUL_I]], %[[IF_END_I]] ]
+; CHECK-NEXT: br label %[[COMMON_RET18]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[MUL]] = fmul float [[X]], [[SCALE]]
+; CHECK-NEXT: br label %[[COMMON_RET18]]
+;
+entry:
+ %cmp = fcmp olt float %x, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+common.ret18: ; preds = %if.then, %if.end
+ %common.ret18.op = phi float [ %call, %if.then ], [ %mul, %if.end ]
+ ret float %common.ret18.op
+
+if.then: ; preds = %entry
+ %fneg = fneg float %x
+ %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
+ br label %common.ret18
+
+if.end: ; preds = %entry
+ %mul = fmul float %x, %scale
+ br label %common.ret18
+}
+
+define float @test_inline_rec_true_successor(float %x, float %scale) {
+entry:
+ %res = tail call float @inline_rec_true_successor(float %x, float %scale)
+ ret float %res
+}
+
+; Same as previous test except that the recursive callsite is in the false successor
+define float @inline_rec_false_successor(float %x, float %scale) {
+; CHECK-LABEL: define float @inline_rec_false_successor(
+; CHECK-SAME: float [[Y:%.*]], float [[SCALE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[Y]], 0.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[COMMON_RET18:.*]]:
+; CHECK-NEXT: [[COMMON_RET18_OP:%.*]] = phi float [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[COMMON_RET18_OP_I:%.*]], %[[INLINE_REC_FALSE_SUCCESSOR_EXIT:.*]] ]
+; CHECK-NEXT: ret float [[COMMON_RET18_OP]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[MUL]] = fmul float [[Y]], [[SCALE]]
+; CHECK-NEXT: br label %[[COMMON_RET18]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: br i1 true, label %[[IF_THEN_I:.*]], label %[[IF_END_I:.*]]
+; CHECK: [[IF_THEN_I]]:
+; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[Y]]
+; CHECK-NEXT: [[MUL_I:%.*]] = fmul float [[SCALE]], [[FNEG]]
+; CHECK-NEXT: br label %[[INLINE_REC_FALSE_SUCCESSOR_EXIT]]
+; CHECK: [[IF_END_I]]:
+; CHECK-NEXT: br label %[[INLINE_REC_FALSE_SUCCESSOR_EXIT]]
+; CHECK: [[INLINE_REC_FALSE_SUCCESSOR_EXIT]]:
+; CHECK-NEXT: [[COMMON_RET18_OP_I]] = phi float [ [[MUL_I]], %[[IF_THEN_I]] ], [ poison, %[[IF_END_I]] ]
+; CHECK-NEXT: br label %[[COMMON_RET18]]
+;
+entry:
+ %cmp = fcmp uge float %x, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+common.ret18: ; preds = %if.then, %if.end
+ %common.ret18.op = phi float [ %mul, %if.then ], [ %call, %if.end ]
+ ret float %common.ret18.op
+
+if.then: ; preds = %entry
+ %mul = fmul float %x, %scale
+ br label %common.ret18
+
+if.end: ; preds = %entry
+ %fneg = fneg float %x
+ %call = tail call float @inline_rec_false_successor(float %fneg, float %scale)
+ br label %common.ret18
+}
+
+define float @test_inline_rec_false_successor(float %x, float %scale) {
+entry:
+ %res = tail call float @inline_rec_false_successor(float %x, float %scale)
+ ret float %res
+}
+
+; Test when the BR has Value not cmp instruction
+define float @inline_rec_no_cmp(i1 %flag, float %scale) {
+; CHECK-LABEL: define float @inline_rec_no_cmp(
+; CHECK-SAME: i1 [[FLAG:%.*]], float [[SCALE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[FLAG]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[SUM:%.*]] = fadd float [[SCALE]], 5.000000e+00
+; CHECK-NEXT: [[SUM1:%.*]] = fadd float [[SUM]], [[SCALE]]
+; CHECK-NEXT: br label %[[COMMON_RET:.*]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[SUM2:%.*]] = fadd float [[SCALE]], 5.000000e+00
+; CHECK-NEXT: br label %[[COMMON_RET]]
+; CHECK: [[COMMON_RET]]:
+; CHECK-NEXT: [[COMMON_RET_RES:%.*]] = phi float [ [[SUM1]], %[[IF_THEN]] ], [ [[SUM2]], %[[IF_END]] ]
+; CHECK-NEXT: ret float [[COMMON_RET_RES]]
+;
+entry:
+ br i1 %flag, label %if.then, label %if.end
+if.then:
+ %res = tail call float @inline_rec_no_cmp(i1 false, float %scale)
+ %sum1 = fadd float %res, %scale
+ br label %common.ret
+if.end:
+ %sum2 = fadd float %scale, 5.000000e+00
+ br label %common.ret
+common.ret:
+ %common.ret.res = phi float [ %sum1, %if.then ], [ %sum2, %if.end ]
+ ret float %common.ret.res
+}
+
+define float @test_inline_rec_no_cmp(i1 %flag, float %scale) {
+entry:
+ %res = tail call float @inline_rec_no_cmp(i1 %flag, float %scale)
+ ret float %res
+}
+
+define float @no_inline_rec(float %x, float %scale) {
+; CHECK-LABEL: define float @no_inline_rec(
+; CHECK-SAME: float [[Z:%.*]], float [[SCALE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[Z]], 5.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[COMMON_RET18:.*]]:
+; CHECK-NEXT: [[COMMON_RET18_OP:%.*]] = phi float [ [[FNEG1:%.*]], %[[IF_THEN]] ], [ [[MUL:%.*]], %[[IF_END]] ]
+; CHECK-NEXT: ret float [[COMMON_RET18_OP]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[FADD:%.*]] = fadd float [[Z]], 5.000000e+00
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @no_inline_rec(float [[FADD]], float [[SCALE]])
+; CHECK-NEXT: [[FNEG1]] = fneg float [[CALL]]
+; CHECK-NEXT: br label %[[COMMON_RET18]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[MUL]] = fmul float [[Z]], [[SCALE]]
+; CHECK-NEXT: br label %[[COMMON_RET18]]
+;
+entry:
+ %cmp = fcmp olt float %x, 5.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+common.ret18: ; preds = %if.then, %if.end
+ %common.ret18.op = phi float [ %fneg1, %if.then ], [ %mul, %if.end ]
+ ret float %common.ret18.op
+
+if.then: ; preds = %entry
+ %fadd = fadd float %x, 5.000000e+00
+ %call = tail call float @no_inline_rec(float %fadd, float %scale)
+ %fneg1 = fneg float %call
+ br label %common.ret18
+
+if.end: ; preds = %entry
+ %mul = fmul float %x, %scale
+ br label %common.ret18
+}
+
+define float @test_no_inline(float %x, float %scale) {
+entry:
+ %res = tail call float @no_inline_rec(float %x, float %scale)
+ ret float %res
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/inline-remark.ll b/llvm/test/Transforms/Inline/inline-remark.ll
index 166c7bb0659246..5af20e42dd31a1 100644
--- a/llvm/test/Transforms/Inline/inline-remark.ll
+++ b/llvm/test/Transforms/Inline/inline-remark.ll
@@ -56,6 +56,6 @@ define void @test3() {
}
; CHECK: attributes [[ATTR1]] = { "inline-remark"="(cost=25, threshold=0)" }
-; CHECK: attributes [[ATTR2]] = { "inline-remark"="(cost=never): recursive" }
+; CHECK: attributes [[ATTR2]] = { "inline-remark"="(cost=0, threshold=0)" }
; CHECK: attributes [[ATTR3]] = { "inline-remark"="unsupported operand bundle; (cost={{.*}}, threshold={{.*}})" }
; CHECK: attributes [[ATTR4]] = { alwaysinline "inline-remark"="(cost=never): recursive call" }
|
@@ -235,7 +235,7 @@ struct InlineParams { | |||
std::optional<bool> EnableDeferral; | |||
|
|||
/// Indicate whether we allow inlining for recursive call. | |||
std::optional<bool> AllowRecursiveCall = false; | |||
std::optional<bool> AllowRecursiveCall = true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should probably become a flag.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like something of a hack to me. The way this is supposed to work is that InlineCost constant folds branches, and as a result the recursive call is rendered dead and never visited, such that the IsRecursiveCall flag is never set.
What you are doing here is to essentially add a new heuristic for the branch folding, which is based on a dominating condition in the caller instead of plain constant propagation. If you want to do that, I think the correct place to do it would be as part of the normal visitCmpInst, not as a separate, after the fact check.
Hi @nikic |
I don't think that visitCallBase is the right place. The instruction that actually gets simplified is the icmp (which then results in a dead block -- which means that we will not cost any of the instructions in that block, not just the call itself). You do need to use the information from the call, but I think visitCmpInst is the right place to use it. |
There will be another patch for folding a Br instruction when the condition is a Value NOT an instruction. like this example:
I found out that case is already covered by the current code in the InlineCost pass, so no future patches. |
Hi @nikic |
@nikic |
please add a description describing what this patch is doing and why |
Done. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
doesn't this miscompile a non-recursive call to a recursive function? we're assuming the recursion has happened, which isn't true if the caller was another function
llvm/lib/Analysis/InlineCost.cpp
Outdated
DomConditionCache DC; | ||
DC.registerBranch(Br); | ||
SQ.DC = &DC; | ||
DominatorTree DT(*F); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
creating a domtree for potentially every cmp inst is not going to work compile time wise, we need some way of reusing a domtree for an entire inline cost invocation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will not be used for each cmp inst, but only for each recursive call.
But also I will look at your suggestion.
Thanks @aeubanks for the review. |
f2fa9a2
to
2e327f8
Compare
@aeubanks Does the latest commit make sense ? I create single instance of DT, and I build the tree only if I got new function. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Converting recursive functions to non-recursive functions certainly sounds like a nice idea. I guess this is getting stuck because it requires a dom-tree, and it needs to search in some way for the call from the icmp? After looking into it I think the searching is probably OK - looking for the Call that uses the Function and looking up to the ICmp. The alternative of searching down to the recursive call sounds like it might need to search more on average. And the DomTree we already calculate elsewhere during inlining and AFAIU should be fine for compile time so long as it is rare enough.
…an simplify the Cmp decision.
Change-Id: I296a55dba127c1ae74cca9e0edcb10f2f44640e9
2e327f8
to
d5ff993
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the updates.
@nikic do you mind running compile-time checks on this patch? I could run them here but it would probably be better if they were run somewhere everyone can see them. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - if you can update the last point about this returning true on simplification then this looks OK to me if there are no other comments.
…implifying icmp guarding recursive function
Add a test for that case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - LGTM if there are no other comments.
// of the icmp. In other words, make sure that the recursion depth is 1. | ||
if ((ConstVal->isOne() && !IsTrueSuccessor) || | ||
(ConstVal->isZero() && IsTrueSuccessor)) { | ||
SimplifiedValues[&Cmp] = ConstVal; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We were talking about this - I think it would be valid for this to always happen if we know that the condition simplifies, but the recursion would always be called, leading to a infinite recursion. It would just mean that we mark the function as IsRecursive=true and not inline, so the end results should be the same and this is just being more careful (IIUC). Either way seems OK to me.
Function *F = Cmp.getFunction(); | ||
// Iterate over the users of the function to check if it's a recursive | ||
// function: | ||
for (auto *U : F->users()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this iterating over the uses of the function? Shouldn't this be inspecting just the CandidateCall in particular?
It looks like this checks if there is any recursive call of the right form, even if it's not the call-site being analyzed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I want to get the recursive call that is guarded by the cmp instr that I'm analyzing.
I can do that by either top-down approach by finding the branch of the icmp and then getting the successors and iterate over all the instructions in the successors to find the recursive call, or the other way is bottom-up approach by finding any recursive call of the function and check its predecessor and so on until I get the icmp.
so, I think the bottom-up approach is better here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My concern here is that there may be multiple calls of the function, only one of which is the recursive call you are interested in. We will separate compute the cost for each of these call-sites -- but we will treat each of them as if they were the recursive call, and thus incorrectly assign them a lower cost. Instead, we should only do this when trying to inline the actual recursive call, as given by CandidateCall.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I got your point. Yes, I agree with you.
I think there should be a patch for ValueTracking changes, and another following patch for the 2 points you mentioned about the Inliner.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Today, I will create a patch for the ValueTracking, and another patch for resolving your comments on InlineCost.
DT.recalculate(*F); | ||
} else if (DT.getRoot()->getParent() != F) { | ||
// Dominator tree was constructed for a different function, recalculate | ||
// it for the current function. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks fishy. Isn't the CallAnalyzer instantiated per call-site? How can we end up with different functions here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I agree. The else-if statement should be removed.
// it for the current function. | ||
DT.recalculate(*F); | ||
} | ||
SQ.DT = &DT; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to inject the condition via CondContext instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given the current logic at ValueTracking.cpp, then injecting the condition will not be useful.
But if the logic of ValueTracking.cpp::computeKnownFPClassFromContext
got changed to check the CondContext
and use directly computeKnownFPClassFromCond
, similarly to the logic at computeKnownBitsFromContext
, then injection will be useful.
Maybe I create a patch for the ValueTracking, and then a follow-up patch for applying your suggestion.
[InlineCost]: Add a new heuristic to branch folding.
This patch checks if a branch condition could be resolved by the following iteration of a recursive call that the branch is gaurding, so that we can determine if the recursion depth is 1 or not.
Inlining recursive function is disabled by default, but by folding the branch, the recursive call will be dead, then there could be a possibility for inlining it for better performance.
Here is an example of a function that has recursion depth of 1.
bool rec(int flag) {
if (flag < 0) {
return rec(flag * -1);
// by applying this iteration, the flag will be positive, then the branch condition will be false.
}
return flag * 5;
}