From 30cdf1e959d2e4dee1c871ff37470dcdb7e8d099 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 19 Sep 2024 10:19:15 +0200 Subject: [PATCH] [SimplifyCFG] Pass context instruction to isSafeToSpeculativelyExecute() (#109132) Pass speculation target and assumption cache to isSafeToSpeculativelyExecute() calls. This allows speculating based on dereferenceable/align assumptions, but the primary motivation here is to avoid regressions from planned changes to fix https://github.com/llvm/llvm-project/issues/108854. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 32 +++++++++---------- .../SimplifyCFG/speculate-derefable-load.ll | 19 +++-------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 09461e65e2dc21..69c4475a494cbe 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -398,9 +398,6 @@ static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, /// expensive. static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI) { - assert((!isa(I) || - isSafeToSpeculativelyExecute(cast(I))) && - "Instruction is not safe to speculatively execute!"); return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency); } @@ -421,12 +418,11 @@ static InstructionCost computeSpeculationCost(const User *I, /// After this function returns, Cost is increased by the cost of /// V plus its non-dominating operands. If that cost is greater than /// Budget, false is returned and Cost is undefined. -static bool dominatesMergePoint(Value *V, BasicBlock *BB, +static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl &AggressiveInsts, - InstructionCost &Cost, - InstructionCost Budget, + InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, - unsigned Depth = 0) { + AssumptionCache *AC, unsigned Depth = 0) { // It is possible to hit a zero-cost cycle (phi/gep instructions for example), // so limit the recursion depth. // TODO: While this recursion limit does prevent pathological behavior, it @@ -461,7 +457,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!isSafeToSpeculativelyExecute(I)) + if (!isSafeToSpeculativelyExecute(I, InsertPt, AC)) return false; Cost += computeSpeculationCost(I, TTI); @@ -480,8 +476,8 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (Use &Op : I->operands()) - if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI, - Depth + 1)) + if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget, + TTI, AC, Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts.insert(I); @@ -3140,7 +3136,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI, return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!IsSafeCheapLoadStore && !isSafeToSpeculativelyExecute(&I) && + if (!IsSafeCheapLoadStore && + !isSafeToSpeculativelyExecute(&I, BI, Options.AC) && !(HoistCondStores && !SpeculatedStoreValue && (SpeculatedStoreValue = isSafeToSpeculateStore(&I, BB, ThenBB, EndBB)))) @@ -3651,7 +3648,8 @@ static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI, /// Given a BB that starts with the specified two-entry PHI node, /// see if we can eliminate it. static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, - DomTreeUpdater *DTU, const DataLayout &DL, + DomTreeUpdater *DTU, AssumptionCache *AC, + const DataLayout &DL, bool SpeculateUnpredictables) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we @@ -3741,10 +3739,10 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, continue; } - if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, - Cost, Budget, TTI) || - !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, - Cost, Budget, TTI)) + if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI, + AggressiveInsts, Cost, Budget, TTI, AC) || + !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI, + AggressiveInsts, Cost, Budget, TTI, AC)) return Changed; } @@ -8116,7 +8114,7 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { // eliminate it, do so now. if (auto *PN = dyn_cast(BB->begin())) if (PN->getNumIncomingValues() == 2) - if (foldTwoEntryPHINode(PN, TTI, DTU, DL, + if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL, Options.SpeculateUnpredictables)) return true; } diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll b/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll index 9e3f333018e680..8c7afa4598bd4b 100644 --- a/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll +++ b/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll @@ -4,14 +4,10 @@ define i64 @align_deref_align(i1 %c, ptr %p) { ; CHECK-LABEL: define i64 @align_deref_align( ; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ] -; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[EXIT:.*]] -; CHECK: [[IF]]: ; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8 -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 [[V]], i64 0 ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -30,17 +26,12 @@ exit: define i64 @assume_deref_align2(i1 %c1, i32 %x, ptr %p) { ; CHECK-LABEL: define i64 @assume_deref_align2( ; CHECK-SAME: i1 [[C1:%.*]], i32 [[X:%.*]], ptr [[P:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ] -; CHECK-NEXT: br i1 [[C1]], label %[[IF1:.*]], label %[[EXIT:.*]] -; CHECK: [[IF1]]: ; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[X]], 10 -; CHECK-NEXT: br i1 [[C2]], label %[[IF2:.*]], label %[[EXIT]] -; CHECK: [[IF2]]: ; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8 -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF2]] ], [ 1, %[[IF1]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C2]], i64 [[V]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i64 [[SPEC_SELECT]], i64 0 ; CHECK-NEXT: ret i64 [[RES]] ; entry: