[Analysis]: Allow inlining recursive call IF recursion depth is 1. #119677

hassnaaHamdi · 2024-12-12T09:00:27Z

[InlineCost]: Add a new heuristic to branch folding.
This patch checks if a branch condition could be resolved by the following iteration of a recursive call that the branch is gaurding, so that we can determine if the recursion depth is 1 or not.
Inlining recursive function is disabled by default, but by folding the branch, the recursive call will be dead, then there could be a possibility for inlining it for better performance.
Here is an example of a function that has recursion depth of 1.
bool rec(int flag) {
if (flag < 0) {
return rec(flag * -1);
// by applying this iteration, the flag will be positive, then the branch condition will be false.
}
return flag * 5;
}

llvmbot · 2024-12-12T09:01:02Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Hassnaa Hamdi (hassnaaHamdi)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/119677.diff

4 Files Affected:

(modified) llvm/include/llvm/Analysis/InlineCost.h (+1-1)
(modified) llvm/lib/Analysis/InlineCost.cpp (+80)
(added) llvm/test/Transforms/Inline/inline-recursive-fn.ll (+179)
(modified) llvm/test/Transforms/Inline/inline-remark.ll (+1-1)

diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index ed54b0c077b4a4..ac61ca064c28b3 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -235,7 +235,7 @@ struct InlineParams {
   std::optional<bool> EnableDeferral;
 
   /// Indicate whether we allow inlining for recursive call.
-  std::optional<bool> AllowRecursiveCall = false;
+  std::optional<bool> AllowRecursiveCall = true;
 };
 
 std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind);
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 85287a39f2caad..c2ade73b2ddcb4 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomConditionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -1160,6 +1161,10 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
   std::optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
   bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
   bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
+  bool shouldCheckRecursiveCall() {
+    return IsRecursiveCall && AllowRecursiveCall;
+  }
+  bool shouldInlineRecursiveCall(CallBase &Call);
 };
 
 // Return true if CB is the sole call to local function Callee.
@@ -2880,6 +2885,68 @@ InlineResult CallAnalyzer::analyze() {
   return finalizeAnalysis();
 }
 
+bool InlineCostCallAnalyzer::shouldInlineRecursiveCall(CallBase &Call) {
+  CallInst *CI = cast<CallInst>(&Call);
+  auto CIB = CI->getParent();
+  // Only handle case when we have sinlge predecessor
+  if (auto Predecessor = CIB->getSinglePredecessor()) {
+    BranchInst *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
+    if (!Br || Br->isUnconditional()) {
+      return false;
+    }
+    Value *Var = Br->getCondition();
+    CmpInst *CmpInstr = dyn_cast<CmpInst>(Var);
+    if (CmpInstr && !isa<Constant>(CmpInstr->getOperand(1))) {
+      // Current logic of ValueTracking/DomConditionCache works only if RHS is
+      // constant.
+      return false;
+    }
+    unsigned ArgNum = 0;
+    Value *FuncArg = nullptr, *CallArg = nullptr;
+    // Check which func argument the cmp instr is using:
+    for (; ArgNum < CI->getFunction()->arg_size(); ArgNum++) {
+      FuncArg = CI->getFunction()->getArg(ArgNum);
+      CallArg = CI->getArgOperand(ArgNum);
+      if (CmpInstr) {
+        if ((FuncArg == CmpInstr->getOperand(0)) &&
+            (CallArg != CmpInstr->getOperand(0)))
+          break;
+      } else if (FuncArg == Var && (CallArg != Var))
+        break;
+    }
+    // Only handle the case when a func argument controls the cmp instruction:
+    if (ArgNum < CI->getFunction()->arg_size()) {
+      bool isTrueSuccessor = CIB == Br->getSuccessor(0);
+      if (CmpInstr) {
+        SimplifyQuery SQ(CI->getFunction()->getDataLayout(),
+                         dyn_cast<Instruction>(CallArg));
+        DomConditionCache DC;
+        DC.registerBranch(Br);
+        SQ.DC = &DC;
+        DominatorTree DT(*CI->getFunction());
+        SQ.DT = &DT;
+        Value *simplifiedInstruction = llvm::simplifyInstructionWithOperands(
+            CmpInstr, {CallArg, CmpInstr->getOperand(1)}, SQ);
+        if (!simplifiedInstruction)
+          return false;
+        if (auto *ConstVal =
+                dyn_cast<llvm::ConstantInt>(simplifiedInstruction)) {
+          if (ConstVal->isOne())
+            return !isTrueSuccessor;
+          return isTrueSuccessor;
+        }
+      } else {
+        if (auto *ConstVal = dyn_cast<llvm::ConstantInt>(CallArg)) {
+          if (ConstVal->isOne())
+            return !isTrueSuccessor;
+          return isTrueSuccessor;
+        }
+      }
+    }
+  }
+  return false;
+}
+
 void InlineCostCallAnalyzer::print(raw_ostream &OS) {
 #define DEBUG_PRINT_STAT(x) OS << "      " #x ": " << x << "\n"
   if (PrintInstructionComments)
@@ -3106,6 +3173,12 @@ InlineCost llvm::getInlineCost(
 
   LLVM_DEBUG(CA.dump());
 
+  // Check if callee function is recursive:
+  if (ShouldInline.isSuccess()) {
+    if (CA.shouldCheckRecursiveCall() && !CA.shouldInlineRecursiveCall(Call))
+      return InlineCost::getNever("deep recursion");
+  }
+
   // Always make cost benefit based decision explicit.
   // We use always/never here since threshold is not meaningful,
   // as it's not what drives cost-benefit analysis.
@@ -3148,6 +3221,13 @@ InlineResult llvm::isInlineViable(Function &F) {
 
       // Disallow recursive calls.
       Function *Callee = Call->getCalledFunction();
+      // This function is called when we have "alwaysinline" attribute.
+      // If we allowed the inlining here given that the recursive inlining is
+      // allowed, then there will be problem in the second trial of inlining,
+      // because the Inliner pass allow only one time inlining and then it
+      // inserts "noinline" attribute which will be in conflict with the
+      // attribute of "alwaysinline" so, "alwaysinline" for recursive function
+      // will be disallowed to avoid conflict of attributes.
       if (&F == Callee)
         return InlineResult::failure("recursive call");
 
diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn.ll b/llvm/test/Transforms/Inline/inline-recursive-fn.ll
new file mode 100644
index 00000000000000..db90050d009a04
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn.ll
@@ -0,0 +1,179 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='inline,instcombine' < %s | FileCheck %s
+
+define float @inline_rec_true_successor(float %x, float %scale) {
+; CHECK-LABEL: define float @inline_rec_true_successor(
+; CHECK-SAME: float [[X:%.*]], float [[SCALE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[X]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[COMMON_RET18:.*]]:
+; CHECK-NEXT:    [[COMMON_RET18_OP:%.*]] = phi float [ [[COMMON_RET18_OP_I:%.*]], %[[INLINE_REC_TRUE_SUCCESSOR_EXIT:.*]] ], [ [[MUL:%.*]], %[[IF_END]] ]
+; CHECK-NEXT:    ret float [[COMMON_RET18_OP]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br i1 false, label %[[IF_THEN_I:.*]], label %[[IF_END_I:.*]]
+; CHECK:       [[IF_THEN_I]]:
+; CHECK-NEXT:    br label %[[INLINE_REC_TRUE_SUCCESSOR_EXIT]]
+; CHECK:       [[IF_END_I]]:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[SCALE]], [[FNEG]]
+; CHECK-NEXT:    br label %[[INLINE_REC_TRUE_SUCCESSOR_EXIT]]
+; CHECK:       [[INLINE_REC_TRUE_SUCCESSOR_EXIT]]:
+; CHECK-NEXT:    [[COMMON_RET18_OP_I]] = phi float [ poison, %[[IF_THEN_I]] ], [ [[MUL_I]], %[[IF_END_I]] ]
+; CHECK-NEXT:    br label %[[COMMON_RET18]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[MUL]] = fmul float [[X]], [[SCALE]]
+; CHECK-NEXT:    br label %[[COMMON_RET18]]
+;
+entry:
+  %cmp = fcmp olt float %x, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+common.ret18:                                     ; preds = %if.then, %if.end
+  %common.ret18.op = phi float [ %call, %if.then ], [ %mul, %if.end ]
+  ret float %common.ret18.op
+
+if.then:                                          ; preds = %entry
+  %fneg = fneg float %x
+  %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
+  br label %common.ret18
+
+if.end:                                           ; preds = %entry
+  %mul = fmul float %x, %scale
+  br label %common.ret18
+}
+
+define float @test_inline_rec_true_successor(float %x, float %scale)  {
+entry:
+  %res = tail call float @inline_rec_true_successor(float %x, float %scale)
+  ret float %res
+}
+
+; Same as previous test except that the recursive callsite is in the false successor
+define float @inline_rec_false_successor(float %x, float %scale) {
+; CHECK-LABEL: define float @inline_rec_false_successor(
+; CHECK-SAME: float [[Y:%.*]], float [[SCALE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp uge float [[Y]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[COMMON_RET18:.*]]:
+; CHECK-NEXT:    [[COMMON_RET18_OP:%.*]] = phi float [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[COMMON_RET18_OP_I:%.*]], %[[INLINE_REC_FALSE_SUCCESSOR_EXIT:.*]] ]
+; CHECK-NEXT:    ret float [[COMMON_RET18_OP]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[MUL]] = fmul float [[Y]], [[SCALE]]
+; CHECK-NEXT:    br label %[[COMMON_RET18]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    br i1 true, label %[[IF_THEN_I:.*]], label %[[IF_END_I:.*]]
+; CHECK:       [[IF_THEN_I]]:
+; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[Y]]
+; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[SCALE]], [[FNEG]]
+; CHECK-NEXT:    br label %[[INLINE_REC_FALSE_SUCCESSOR_EXIT]]
+; CHECK:       [[IF_END_I]]:
+; CHECK-NEXT:    br label %[[INLINE_REC_FALSE_SUCCESSOR_EXIT]]
+; CHECK:       [[INLINE_REC_FALSE_SUCCESSOR_EXIT]]:
+; CHECK-NEXT:    [[COMMON_RET18_OP_I]] = phi float [ [[MUL_I]], %[[IF_THEN_I]] ], [ poison, %[[IF_END_I]] ]
+; CHECK-NEXT:    br label %[[COMMON_RET18]]
+;
+entry:
+  %cmp = fcmp uge float %x, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+common.ret18:                                     ; preds = %if.then, %if.end
+  %common.ret18.op = phi float [ %mul, %if.then ], [ %call, %if.end ]
+  ret float %common.ret18.op
+
+if.then:                                          ; preds = %entry
+  %mul = fmul float %x, %scale
+  br label %common.ret18
+
+if.end:                                           ; preds = %entry
+  %fneg = fneg float %x
+  %call = tail call float @inline_rec_false_successor(float %fneg, float %scale)
+  br label %common.ret18
+}
+
+define float @test_inline_rec_false_successor(float %x, float %scale)  {
+entry:
+  %res = tail call float @inline_rec_false_successor(float %x, float %scale)
+  ret float %res
+}
+
+; Test when the BR has Value not cmp instruction
+define float @inline_rec_no_cmp(i1 %flag, float %scale) {
+; CHECK-LABEL: define float @inline_rec_no_cmp(
+; CHECK-SAME: i1 [[FLAG:%.*]], float [[SCALE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[FLAG]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[SUM:%.*]] = fadd float [[SCALE]], 5.000000e+00
+; CHECK-NEXT:    [[SUM1:%.*]] = fadd float [[SUM]], [[SCALE]]
+; CHECK-NEXT:    br label %[[COMMON_RET:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[SUM2:%.*]] = fadd float [[SCALE]], 5.000000e+00
+; CHECK-NEXT:    br label %[[COMMON_RET]]
+; CHECK:       [[COMMON_RET]]:
+; CHECK-NEXT:    [[COMMON_RET_RES:%.*]] = phi float [ [[SUM1]], %[[IF_THEN]] ], [ [[SUM2]], %[[IF_END]] ]
+; CHECK-NEXT:    ret float [[COMMON_RET_RES]]
+;
+entry:
+  br i1 %flag, label %if.then, label %if.end
+if.then:
+  %res = tail call float @inline_rec_no_cmp(i1 false, float %scale)
+  %sum1 = fadd float %res, %scale
+  br label %common.ret
+if.end:
+  %sum2 = fadd float %scale, 5.000000e+00
+  br label %common.ret
+common.ret:
+  %common.ret.res = phi float [ %sum1, %if.then ], [ %sum2, %if.end ]
+  ret float %common.ret.res
+}
+
+define float @test_inline_rec_no_cmp(i1 %flag, float %scale)  {
+entry:
+  %res = tail call float @inline_rec_no_cmp(i1 %flag, float %scale)
+  ret float %res
+}
+
+define float @no_inline_rec(float %x, float %scale) {
+; CHECK-LABEL: define float @no_inline_rec(
+; CHECK-SAME: float [[Z:%.*]], float [[SCALE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[Z]], 5.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[COMMON_RET18:.*]]:
+; CHECK-NEXT:    [[COMMON_RET18_OP:%.*]] = phi float [ [[FNEG1:%.*]], %[[IF_THEN]] ], [ [[MUL:%.*]], %[[IF_END]] ]
+; CHECK-NEXT:    ret float [[COMMON_RET18_OP]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[FADD:%.*]] = fadd float [[Z]], 5.000000e+00
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @no_inline_rec(float [[FADD]], float [[SCALE]])
+; CHECK-NEXT:    [[FNEG1]] = fneg float [[CALL]]
+; CHECK-NEXT:    br label %[[COMMON_RET18]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[MUL]] = fmul float [[Z]], [[SCALE]]
+; CHECK-NEXT:    br label %[[COMMON_RET18]]
+;
+entry:
+  %cmp = fcmp olt float %x, 5.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+common.ret18:                                     ; preds = %if.then, %if.end
+  %common.ret18.op = phi float [ %fneg1, %if.then ], [ %mul, %if.end ]
+  ret float %common.ret18.op
+
+if.then:                                          ; preds = %entry
+  %fadd = fadd float %x, 5.000000e+00
+  %call = tail call float @no_inline_rec(float %fadd, float %scale)
+  %fneg1 = fneg float %call
+  br label %common.ret18
+
+if.end:                                           ; preds = %entry
+  %mul = fmul float %x, %scale
+  br label %common.ret18
+}
+
+define float @test_no_inline(float %x, float %scale)  {
+entry:
+  %res = tail call float @no_inline_rec(float %x, float %scale)
+  ret float %res
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/inline-remark.ll b/llvm/test/Transforms/Inline/inline-remark.ll
index 166c7bb0659246..5af20e42dd31a1 100644
--- a/llvm/test/Transforms/Inline/inline-remark.ll
+++ b/llvm/test/Transforms/Inline/inline-remark.ll
@@ -56,6 +56,6 @@ define void @test3() {
 }
 
 ; CHECK: attributes [[ATTR1]] = { "inline-remark"="(cost=25, threshold=0)" }
-; CHECK: attributes [[ATTR2]] = { "inline-remark"="(cost=never): recursive" }
+; CHECK: attributes [[ATTR2]] = { "inline-remark"="(cost=0, threshold=0)" }
 ; CHECK: attributes [[ATTR3]] = { "inline-remark"="unsupported operand bundle; (cost={{.*}}, threshold={{.*}})" }
 ; CHECK: attributes [[ATTR4]] = { alwaysinline "inline-remark"="(cost=never): recursive call" }

mtrofin · 2024-12-12T15:26:31Z

llvm/include/llvm/Analysis/InlineCost.h

@@ -235,7 +235,7 @@ struct InlineParams {
  std::optional<bool> EnableDeferral;

  /// Indicate whether we allow inlining for recursive call.
-  std::optional<bool> AllowRecursiveCall = false;
+  std::optional<bool> AllowRecursiveCall = true;


this should probably become a flag.

nikic

This looks like something of a hack to me. The way this is supposed to work is that InlineCost constant folds branches, and as a result the recursive call is rendered dead and never visited, such that the IsRecursiveCall flag is never set.

What you are doing here is to essentially add a new heuristic for the branch folding, which is based on a dominating condition in the caller instead of plain constant propagation. If you want to do that, I think the correct place to do it would be as part of the normal visitCmpInst, not as a separate, after the fact check.

hassnaaHamdi · 2024-12-13T07:42:50Z

Hi @nikic
Thanks for your comment.
In general I agree with you.
But I think that the correct place could be visitCallBase? because the simplification will depend on the arguments of the next iteration of the function, which are the arguments of the CallBase.
So I think the flow could start from visitCallBase and then simplifyCallSite(..) which will check if this callsite is guarded by a ComInst that is using one of the arguments, and then the flow ends by calling visitCmpInstWithOps, because I need the CmpInst to work on the new argument of the next iteration.
What do you think ?

nikic · 2024-12-13T11:44:17Z

But I think that the correct place could be visitCallBase? because the simplification will depend on the arguments of the next iteration of the function, which are the arguments of the CallBase. So I think the flow could start from visitCallBase and then simplifyCallSite(..) which will check if this callsite is guarded by a ComInst that is using one of the arguments, and then the flow ends by calling visitCmpInstWithOps, because I need the CmpInst to work on the new argument of the next iteration. What do you think ?

I don't think that visitCallBase is the right place. The instruction that actually gets simplified is the icmp (which then results in a dead block -- which means that we will not cost any of the instructions in that block, not just the call itself). You do need to use the information from the call, but I think visitCmpInst is the right place to use it.

hassnaaHamdi · 2024-12-17T17:45:42Z

There will be another patch for folding a Br instruction when the condition is a Value NOT an instruction. like this example:

define void @bar(i1 %p) {
  br i1 %p, label %bb1, label %bb2
  bb1:
    ..... 
    .....
    ret void
  bb2:
    call void @bar(i1 true)
    ret void
}

I found out that case is already covered by the current code in the InlineCost pass, so no future patches.

hassnaaHamdi · 2025-01-02T13:03:59Z

Hi @nikic
Thanks for your comments.
What do you think about the recent changes ?

hassnaaHamdi · 2025-01-09T13:58:41Z

@nikic
Gentle ping :'D

aeubanks · 2025-01-09T22:00:12Z

please add a description describing what this patch is doing and why

hassnaaHamdi · 2025-01-13T15:25:56Z

please add a description describing what this patch is doing and why

Done.

hassnaaHamdi · 2025-01-15T14:09:07Z

Hi @nikic
What do you think about the latest changes ?
@aeubanks I have added a description.
I wish to land this patch before the LLVM20 release.

hassnaaHamdi · 2025-01-20T16:13:29Z

@nikic @aeubanks
gentle ping :'D

aeubanks

doesn't this miscompile a non-recursive call to a recursive function? we're assuming the recursion has happened, which isn't true if the caller was another function

aeubanks · 2025-01-23T21:08:15Z

llvm/lib/Analysis/InlineCost.cpp

+    DomConditionCache DC;
+    DC.registerBranch(Br);
+    SQ.DC = &DC;
+    DominatorTree DT(*F);


creating a domtree for potentially every cmp inst is not going to work compile time wise, we need some way of reusing a domtree for an entire inline cost invocation

It will not be used for each cmp inst, but only for each recursive call.
But also I will look at your suggestion.

hassnaaHamdi · 2025-02-03T21:22:50Z

doesn't this miscompile a non-recursive call to a recursive function? we're assuming the recursion has happened, which isn't true if the caller was another function

Thanks @aeubanks for the review.
No, this works only for recursive functions, the code bails out if it's not recursive function:
if (!Call || Call->getFunction() != F) continue;

hassnaaHamdi · 2025-02-26T02:03:38Z

@aeubanks Does the latest commit make sense ? I create single instance of DT, and I build the tree only if I got new function.

hassnaaHamdi · 2025-03-24T15:26:27Z

@aeubanks @nikic
Hi, Do you have further comments/thoughts on this ?

davemgreen

Converting recursive functions to non-recursive functions certainly sounds like a nice idea. I guess this is getting stuck because it requires a dom-tree, and it needs to search in some way for the call from the icmp? After looking into it I think the searching is probably OK - looking for the Call that uses the Function and looking up to the ICmp. The alternative of searching down to the recursive call sounds like it might need to search more on average. And the DomTree we already calculate elsewhere during inlining and AFAIU should be fine for compile time so long as it is rare enough.

llvm/lib/Analysis/InlineCost.cpp

…an simplify the Cmp decision.

Change-Id: I296a55dba127c1ae74cca9e0edcb10f2f44640e9

github-actions · 2025-04-07T11:31:04Z

✅ With the latest revision this PR passed the C/C++ code formatter.

davemgreen

Thanks for the updates.

@nikic do you mind running compile-time checks on this patch? I could run them here but it would probably be better if they were run somewhere everyone can see them. Thanks.

llvm/lib/Analysis/InlineCost.cpp

nikic · 2025-04-10T20:07:21Z

@davemgreen No significant impact: https://llvm-compile-time-tracker.com/compare.php?from=156e2532edeaea9a60c37c041e8059fc5693122f&to=97213682850e7f310f4712313c0a380af9518180&stat=instructions:u

davemgreen

Thanks - if you can update the last point about this returning true on simplification then this looks OK to me if there are no other comments.

llvm/lib/Analysis/InlineCost.cpp

…implifying icmp guarding recursive function

Add a test for that case.

davemgreen

Thanks - LGTM if there are no other comments.

davemgreen · 2025-04-30T07:50:43Z

llvm/lib/Analysis/InlineCost.cpp

+      // of the icmp. In other words, make sure that the recursion depth is 1.
+      if ((ConstVal->isOne() && !IsTrueSuccessor) ||
+          (ConstVal->isZero() && IsTrueSuccessor)) {
+        SimplifiedValues[&Cmp] = ConstVal;


We were talking about this - I think it would be valid for this to always happen if we know that the condition simplifies, but the recursion would always be called, leading to a infinite recursion. It would just mean that we mark the function as IsRecursive=true and not inline, so the end results should be the same and this is just being more careful (IIUC). Either way seems OK to me.

nikic · 2025-05-06T10:37:30Z

llvm/lib/Analysis/InlineCost.cpp

+  Function *F = Cmp.getFunction();
+  // Iterate over the users of the function to check if it's a recursive
+  // function:
+  for (auto *U : F->users()) {


Why is this iterating over the uses of the function? Shouldn't this be inspecting just the CandidateCall in particular?

It looks like this checks if there is any recursive call of the right form, even if it's not the call-site being analyzed.

I want to get the recursive call that is guarded by the cmp instr that I'm analyzing.

I can do that by either top-down approach by finding the branch of the icmp and then getting the successors and iterate over all the instructions in the successors to find the recursive call, or the other way is bottom-up approach by finding any recursive call of the function and check its predecessor and so on until I get the icmp.
so, I think the bottom-up approach is better here.

My concern here is that there may be multiple calls of the function, only one of which is the recursive call you are interested in. We will separate compute the cost for each of these call-sites -- but we will treat each of them as if they were the recursive call, and thus incorrectly assign them a lower cost. Instead, we should only do this when trying to inline the actual recursive call, as given by CandidateCall.

Yeah, I got your point. Yes, I agree with you.
I think there should be a patch for ValueTracking changes, and another following patch for the 2 points you mentioned about the Inliner.

Today, I will create a patch for the ValueTracking, and another patch for resolving your comments on InlineCost.

nikic · 2025-05-06T10:40:22Z

llvm/lib/Analysis/InlineCost.cpp

+      DT.recalculate(*F);
+    } else if (DT.getRoot()->getParent() != F) {
+      // Dominator tree was constructed for a different function, recalculate
+      // it for the current function.


This looks fishy. Isn't the CallAnalyzer instantiated per call-site? How can we end up with different functions here?

Yes, I agree. The else-if statement should be removed.

nikic · 2025-05-06T10:48:45Z

llvm/lib/Analysis/InlineCost.cpp

+      // it for the current function.
+      DT.recalculate(*F);
+    }
+    SQ.DT = &DT;


Would it be possible to inject the condition via CondContext instead?

Given the current logic at ValueTracking.cpp, then injecting the condition will not be useful.
But if the logic of ValueTracking.cpp::computeKnownFPClassFromContext got changed to check the CondContext and use directly computeKnownFPClassFromCond, similarly to the logic at computeKnownBitsFromContext, then injection will be useful.
Maybe I create a patch for the ValueTracking, and then a follow-up patch for applying your suggestion.

llvmbot added llvm:analysis llvm:transforms labels Dec 12, 2024

hassnaaHamdi requested a review from kazutakahirata December 12, 2024 09:00

hassnaaHamdi requested review from mtrofin, WenleiHe, david-arm, david-greene-cb and davemgreen and removed request for david-greene-cb December 12, 2024 09:01

mtrofin reviewed Dec 12, 2024

View reviewed changes

nikic reviewed Dec 12, 2024

View reviewed changes

hassnaaHamdi requested a review from nikic December 20, 2024 11:07

davemgreen requested a review from aeubanks January 9, 2025 10:45

aeubanks reviewed Jan 23, 2025

View reviewed changes

hassnaaHamdi force-pushed the inline_recursive_fn branch 2 times, most recently from f2fa9a2 to 2e327f8 Compare February 26, 2025 00:40

hassnaaHamdi requested a review from aeubanks February 26, 2025 11:04

davemgreen reviewed Mar 31, 2025

View reviewed changes

hassnaaHamdi added 5 commits April 7, 2025 09:56

[Analysis]: Allow inlining recursive call IF recursion depth is 1.

efe2fc3

[InlineCost]: fold CmpInstr when next iteration of a recursive call c…

ef31305

…an simplify the Cmp decision.

format

48cbe62

Update DT only when we get different functions

6afc48f

Change-Id: I296a55dba127c1ae74cca9e0edcb10f2f44640e9

resolve review comments

d5ff993

hassnaaHamdi force-pushed the inline_recursive_fn branch from 2e327f8 to d5ff993 Compare April 7, 2025 11:28

fix format

d6642e2

davemgreen reviewed Apr 10, 2025

View reviewed changes

llvm/lib/Analysis/InlineCost.cpp Outdated Show resolved Hide resolved

llvm/lib/Analysis/InlineCost.cpp Outdated Show resolved Hide resolved

resolve review comments

c5003a6

davemgreen reviewed Apr 23, 2025

View reviewed changes

llvm/lib/Analysis/InlineCost.cpp Outdated Show resolved Hide resolved

hassnaaHamdi added 4 commits April 28, 2025 13:38

resolve commens: update function signature to represent the case of s…

18f19f6

…implifying icmp guarding recursive function

remove not needed test functions

0f51cf9

fix format

d9f2861

Don't keep simplified value unless recurison depth is 1.

a053b90

Add a test for that case.

davemgreen approved these changes Apr 30, 2025

View reviewed changes

hassnaaHamdi merged commit 0159a26 into llvm:main May 6, 2025
11 checks passed

nikic reviewed May 6, 2025

View reviewed changes

hassnaaHamdi mentioned this pull request May 21, 2025

[ValueTracking][NFC]: Use injected condition to compute known FPClass #139832

Merged

nikic mentioned this pull request May 30, 2025

[InlineCost]: Optimize inlining of recursive function. #139982

Merged

[Analysis]: Allow inlining recursive call IF recursion depth is 1. #119677

[Analysis]: Allow inlining recursive call IF recursion depth is 1. #119677

Uh oh!

Conversation

hassnaaHamdi commented Dec 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

hassnaaHamdi commented Dec 13, 2024

Uh oh!

nikic commented Dec 13, 2024

Uh oh!

hassnaaHamdi commented Dec 17, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

hassnaaHamdi commented Jan 2, 2025

Uh oh!

hassnaaHamdi commented Jan 9, 2025

Uh oh!

aeubanks commented Jan 9, 2025

Uh oh!

hassnaaHamdi commented Jan 13, 2025

Uh oh!

hassnaaHamdi commented Jan 15, 2025

Uh oh!

hassnaaHamdi commented Jan 20, 2025

Uh oh!

aeubanks left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

hassnaaHamdi commented Feb 3, 2025

Uh oh!

hassnaaHamdi commented Feb 26, 2025

Uh oh!

hassnaaHamdi commented Mar 24, 2025

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Apr 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

nikic commented Apr 10, 2025

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

hassnaaHamdi commented Dec 12, 2024 •

edited

Loading

llvmbot commented Dec 12, 2024 •

edited

Loading

hassnaaHamdi commented Dec 17, 2024 •

edited

Loading

github-actions bot commented Apr 7, 2025 •

edited

Loading

hassnaaHamdi May 7, 2025 •

edited

Loading