Skip to content

Commit 8249669

Browse files
committed
[SimplifyCFG] More accurate use legality check for sinking
When sinking instructions, we have to make sure that the uses of that instruction are consistent: If used in a phi node in the sink target, then the phi operands have to match the sink candidates. This allows the phi to be removed when the instruction is sunk. This case is already handled accurately. However, what the current code doesn't handle are uses in the same block. These are just unconditionally accepted, even though this needs the same consistency check for the phi node that sinking the using instruction would introduce. Instead, the code has another check when actually performing the sinking, which repeats the phi check (just at a later time, where all the later instructions have already been sunk and any new phis introduced). This is problematic, because it messes up the profitability heuristic. The code will think that certain instructions will get sunk, but they actually won't. This may result in more phi nodes being created than is considered profitable. See the changed test for a case where we no longer do this after this patch. The new approach makes sure that the uses are consistent during the initial legality check. This is based on PhiOperands, which we already collect. The primary motivation for this is to generalize sinking to support more than one use, and doing that generalization is hard with the current split checking approach.
1 parent 851710d commit 8249669

File tree

2 files changed

+41
-54
lines changed

2 files changed

+41
-54
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 37 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1930,7 +1930,7 @@ static bool replacingOperandWithVariableIsCheap(const Instruction *I,
19301930
// PHI node (because an operand varies in each input block), add to PHIOperands.
19311931
static bool canSinkInstructions(
19321932
ArrayRef<Instruction *> Insts,
1933-
DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1933+
DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
19341934
// Prune out obviously bad instructions to move. Each instruction must have
19351935
// exactly zero or one use, and we check later that use is by a single, common
19361936
// PHI instruction in the successor.
@@ -1981,21 +1981,19 @@ static bool canSinkInstructions(
19811981
return false;
19821982
}
19831983

1984-
// All instructions in Insts are known to be the same opcode. If they have a
1985-
// use, check that the only user is a PHI or in the same block as the
1986-
// instruction, because if a user is in the same block as an instruction we're
1987-
// contemplating sinking, it must already be determined to be sinkable.
1984+
// Uses must be consistent: If I0 is used in a phi node in the sink target,
1985+
// then the other phi operands must match the instructions from Insts. This
1986+
// also has to hold true for any phi nodes that would be created as a result
1987+
// of sinking. Both of these cases are represented by PhiOperands.
19881988
if (HasUse) {
1989-
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1990-
auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1991-
if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1992-
auto *U = cast<Instruction>(*I->user_begin());
1993-
return (PNUse &&
1994-
PNUse->getParent() == Succ &&
1995-
PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1996-
U->getParent() == I->getParent();
1997-
}))
1989+
const Use &U = *I0->use_begin();
1990+
auto It = PHIOperands.find(&U);
1991+
if (It == PHIOperands.end())
1992+
// There may be uses in other blocks when sinking into a loop header.
19981993
return false;
1994+
for (auto [I1, I2] : zip(Insts, It->second))
1995+
if (I1 != I2)
1996+
return false;
19991997
}
20001998

20011999
// Because SROA can't handle speculating stores of selects, try not to sink
@@ -2061,8 +2059,9 @@ static bool canSinkInstructions(
20612059
!canReplaceOperandWithVariable(I0, OI))
20622060
// We can't create a PHI from this GEP.
20632061
return false;
2062+
auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
20642063
for (auto *I : Insts)
2065-
PHIOperands[I].push_back(I->getOperand(OI));
2064+
Ops.push_back(I->getOperand(OI));
20662065
}
20672066
}
20682067
return true;
@@ -2071,7 +2070,7 @@ static bool canSinkInstructions(
20712070
// Assuming canSinkInstructions(Blocks) has returned true, sink the last
20722071
// instruction of every block in Blocks to their common successor, commoning
20732072
// into one instruction.
2074-
static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2073+
static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
20752074
auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
20762075

20772076
// canSinkInstructions returning true guarantees that every block has at
@@ -2086,23 +2085,10 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
20862085
Insts.push_back(I);
20872086
}
20882087

2089-
// The only checking we need to do now is that all users of all instructions
2090-
// are the same PHI node. canSinkInstructions should have checked this but
2091-
// it is slightly over-aggressive - it gets confused by commutative
2092-
// instructions so double-check it here.
2093-
Instruction *I0 = Insts.front();
2094-
if (!I0->user_empty()) {
2095-
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
2096-
if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
2097-
auto *U = cast<Instruction>(*I->user_begin());
2098-
return U == PNUse;
2099-
}))
2100-
return false;
2101-
}
2102-
21032088
// We don't need to do any more checking here; canSinkInstructions should
21042089
// have done it all for us.
21052090
SmallVector<Value*, 4> NewOperands;
2091+
Instruction *I0 = Insts.front();
21062092
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
21072093
// This check is different to that in canSinkInstructions. There, we
21082094
// cared about the global view once simplifycfg (and instcombine) have
@@ -2170,8 +2156,6 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
21702156
I->replaceAllUsesWith(I0);
21712157
I->eraseFromParent();
21722158
}
2173-
2174-
return true;
21752159
}
21762160

21772161
namespace {
@@ -2312,9 +2296,19 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
23122296
// carry on. If we can sink an instruction but need to PHI-merge some operands
23132297
// (because they're not identical in each instruction) we add these to
23142298
// PHIOperands.
2299+
// We prepopulate PHIOperands with the phis that already exist in BB.
2300+
DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
2301+
for (PHINode &PN : BB->phis()) {
2302+
SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
2303+
for (const Use &U : PN.incoming_values())
2304+
IncomingVals.insert({PN.getIncomingBlock(U), &U});
2305+
auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2306+
for (BasicBlock *Pred : UnconditionalPreds)
2307+
Ops.push_back(*IncomingVals[Pred]);
2308+
}
2309+
23152310
int ScanIdx = 0;
23162311
SmallPtrSet<Value*,4> InstructionsToSink;
2317-
DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
23182312
LockstepReverseIterator LRI(UnconditionalPreds);
23192313
while (LRI.isValid() &&
23202314
canSinkInstructions(*LRI, PHIOperands)) {
@@ -2336,20 +2330,19 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
23362330
// actually sink before encountering instruction that is unprofitable to
23372331
// sink?
23382332
auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2339-
unsigned NumPHIdValues = 0;
2340-
for (auto *I : *LRI)
2341-
for (auto *V : PHIOperands[I]) {
2342-
if (!InstructionsToSink.contains(V))
2343-
++NumPHIdValues;
2333+
unsigned NumPHIInsts = 0;
2334+
for (Use &U : (*LRI)[0]->operands()) {
2335+
auto It = PHIOperands.find(&U);
2336+
if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2337+
return InstructionsToSink.contains(V);
2338+
})) {
2339+
++NumPHIInsts;
23442340
// FIXME: this check is overly optimistic. We may end up not sinking
23452341
// said instruction, due to the very same profitability check.
23462342
// See @creating_too_many_phis in sink-common-code.ll.
23472343
}
2348-
LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2349-
unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2350-
if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2351-
NumPHIInsts++;
2352-
2344+
}
2345+
LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
23532346
return NumPHIInsts <= 1;
23542347
};
23552348

@@ -2474,13 +2467,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
24742467
// sink is always at index 0.
24752468
LRI.reset();
24762469

2477-
if (!sinkLastInstruction(UnconditionalPreds)) {
2478-
LLVM_DEBUG(
2479-
dbgs()
2480-
<< "SINK: stopping here, failed to actually sink instruction!\n");
2481-
break;
2482-
}
2483-
2470+
sinkLastInstruction(UnconditionalPreds);
24842471
NumSinkCommonInstrs++;
24852472
Changed = true;
24862473
}

llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -568,16 +568,16 @@ define zeroext i1 @test_crash(i1 zeroext %flag, ptr %i4, ptr %m, ptr %n) {
568568
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
569569
; CHECK: if.then:
570570
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I4:%.*]], align 4
571+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -1
571572
; CHECK-NEXT: br label [[IF_END:%.*]]
572573
; CHECK: if.else:
573574
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[M:%.*]], align 4
574575
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N:%.*]], align 4
576+
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], [[TMP4]]
575577
; CHECK-NEXT: br label [[IF_END]]
576578
; CHECK: if.end:
577-
; CHECK-NEXT: [[TMP4_SINK:%.*]] = phi i32 [ [[TMP4]], [[IF_ELSE]] ], [ -1, [[IF_THEN]] ]
578-
; CHECK-NEXT: [[TMP3_SINK:%.*]] = phi i32 [ [[TMP3]], [[IF_ELSE]] ], [ [[TMP1]], [[IF_THEN]] ]
579-
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP3_SINK]], [[TMP4_SINK]]
580-
; CHECK-NEXT: store i32 [[TMP5]], ptr [[I4]], align 4
579+
; CHECK-NEXT: [[TMP5_SINK:%.*]] = phi i32 [ [[TMP5]], [[IF_ELSE]] ], [ [[TMP2]], [[IF_THEN]] ]
580+
; CHECK-NEXT: store i32 [[TMP5_SINK]], ptr [[I4]], align 4
581581
; CHECK-NEXT: ret i1 true
582582
;
583583
entry:

0 commit comments

Comments
 (0)