Skip to content

Commit 203b67e

Browse files
authored
Ignore fallthrough blocks in optRecognizeAndOptimizeSwitchJumps (#118614)
1 parent 30700fb commit 203b67e

File tree

2 files changed

+67
-20
lines changed

2 files changed

+67
-20
lines changed

src/coreclr/jit/compiler.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4912,6 +4912,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
49124912

49134913
if (opts.OptimizationEnabled())
49144914
{
4915+
// Conditional to switch conversion, and switch peeling
4916+
//
4917+
DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optRecognizeAndOptimizeSwitchJumps);
4918+
49154919
// Optimize boolean conditions
49164920
//
49174921
DoPhase(this, PHASE_OPTIMIZE_BOOLS, &Compiler::optOptimizeBools);
@@ -4920,10 +4924,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
49204924
//
49214925
DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion);
49224926

4923-
// Conditional to switch conversion, and switch peeling
4924-
//
4925-
DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optRecognizeAndOptimizeSwitchJumps);
4926-
49274927
// Run flow optimizations before reordering blocks
49284928
//
49294929
DoPhase(this, PHASE_OPTIMIZE_PRE_LAYOUT, &Compiler::optOptimizePreLayout);

src/coreclr/jit/switchrecognition.cpp

Lines changed: 63 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
// We mainly rely on TryLowerSwitchToBitTest in these heuristics, but jump tables can be useful
1010
// even without conversion to a bitmap test.
1111
#define SWITCH_MAX_DISTANCE ((TARGET_POINTER_SIZE * BITS_PER_BYTE) - 1)
12-
#define SWITCH_MIN_TESTS 3
12+
13+
#if TARGET_ARM64
14+
// ARM64 has conditional instructions which can be more efficient than a switch->bittest (optOptimizeBools)
15+
#define SWITCH_MIN_TESTS 5
16+
#else
17+
#define SWITCH_MIN_TESTS 3
18+
#endif
1319

1420
// This is a heuristics based value tuned for optimal performance
1521
#define CONVERT_SWITCH_TO_CCMP_MIN_TEST 5
@@ -35,9 +41,6 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
3541
continue;
3642
}
3743

38-
// Limit to XARCH, ARM is already doing a great job with such comparisons using
39-
// a series of ccmp instruction (see ifConvert phase).
40-
#ifdef TARGET_XARCH
4144
// block->KindIs(BBJ_COND) check is for better throughput.
4245
if (block->KindIs(BBJ_COND) && optSwitchDetectAndConvert(block))
4346
{
@@ -47,10 +50,7 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
4750
// Converted switches won't have dominant cases, so we can skip the switch peeling check.
4851
assert(!block->GetSwitchTargets()->HasDominantCase());
4952
}
50-
else
51-
#endif
52-
53-
if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->HasDominantCase())
53+
else if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->HasDominantCase())
5454
{
5555
fgPeelSwitch(block);
5656
modified = true;
@@ -66,11 +66,49 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
6666
return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
6767
}
6868

69+
//------------------------------------------------------------------------------
70+
// SkipFallthroughBlocks : Skip all fallthrough blocks (empty BBJ_ALWAYS blocks)
71+
//
72+
// Arguments:
73+
// compiler - The compiler instance.
74+
// block - the block to process
75+
//
76+
// Return Value:
77+
// block that is not an empty fallthrough block
78+
//
79+
static BasicBlock* SkipFallthroughBlocks(Compiler* comp, BasicBlock* block)
80+
{
81+
BasicBlock* origBlock = block;
82+
if (!block->KindIs(BBJ_ALWAYS) || (block->firstStmt() != nullptr))
83+
{
84+
// Fast path
85+
return block;
86+
}
87+
88+
// We might consider ignoring statements with only NOPs if that is profitable.
89+
90+
BitVecTraits traits(comp->compBasicBlockID, comp);
91+
BitVec visited = BitVecOps::MakeEmpty(&traits);
92+
BitVecOps::AddElemD(&traits, visited, block->bbID);
93+
while (block->KindIs(BBJ_ALWAYS) && (block->firstStmt() == nullptr) &&
94+
BasicBlock::sameEHRegion(block, block->GetTarget()))
95+
{
96+
block = block->GetTarget();
97+
if (!BitVecOps::TryAddElemD(&traits, visited, block->bbID))
98+
{
99+
// A cycle detected, bail out.
100+
return origBlock;
101+
}
102+
}
103+
return block;
104+
}
105+
69106
//------------------------------------------------------------------------------
70107
// IsConstantTestCondBlock : Does the given block represent a simple BBJ_COND
71108
// constant test? e.g. JTRUE(EQ/NE(X, CNS)).
72109
//
73110
// Arguments:
111+
// compiler - The compiler instance.
74112
// block - The block to check
75113
// allowSideEffects - is variableNode allowed to have side-effects (COMMA)?
76114
// trueTarget - [out] The successor visited if X == CNS
@@ -82,7 +120,8 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
82120
// Return Value:
83121
// True if the block represents a constant test, false otherwise
84122
//
85-
bool IsConstantTestCondBlock(const BasicBlock* block,
123+
bool IsConstantTestCondBlock(Compiler* comp,
124+
const BasicBlock* block,
86125
bool allowSideEffects,
87126
BasicBlock** trueTarget,
88127
BasicBlock** falseTarget,
@@ -123,9 +162,11 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
123162
return false;
124163
}
125164

126-
*isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
127-
*trueTarget = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget();
128-
*falseTarget = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget();
165+
*isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
166+
*trueTarget =
167+
SkipFallthroughBlocks(comp, *isReversed ? block->GetFalseTarget() : block->GetTrueTarget());
168+
*falseTarget =
169+
SkipFallthroughBlocks(comp, *isReversed ? block->GetTrueTarget() : block->GetFalseTarget());
129170

130171
if (block->FalseTargetIs(block) || block->TrueTargetIs(block))
131172
{
@@ -172,6 +213,12 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor
172213
{
173214
assert(firstBlock->KindIs(BBJ_COND));
174215

216+
#ifdef TARGET_ARM
217+
// Bitmap test (TryLowerSwitchToBitTest) is quite verbose on ARM32 (~6 instructions), it results in massive size
218+
// regressions.
219+
return false;
220+
#endif
221+
175222
GenTree* variableNode = nullptr;
176223
ssize_t cns = 0;
177224
BasicBlock* trueTarget = nullptr;
@@ -184,7 +231,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor
184231
// and then try to accumulate as many constant test blocks as possible. Once we hit
185232
// a block that doesn't match the pattern, we start processing the accumulated blocks.
186233
bool isReversed = false;
187-
if (IsConstantTestCondBlock(firstBlock, true, &trueTarget, &falseTarget, &isReversed, &variableNode, &cns))
234+
if (IsConstantTestCondBlock(this, firstBlock, true, &trueTarget, &falseTarget, &isReversed, &variableNode, &cns))
188235
{
189236
if (isReversed)
190237
{
@@ -234,7 +281,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor
234281
}
235282

236283
// Inspect secondary blocks
237-
if (IsConstantTestCondBlock(currBb, false, &currTrueTarget, &currFalseTarget, &isReversed,
284+
if (IsConstantTestCondBlock(this, currBb, false, &currTrueTarget, &currFalseTarget, &isReversed,
238285
&currVariableNode, &currCns))
239286
{
240287
if (currTrueTarget != trueTarget)
@@ -399,10 +446,10 @@ bool Compiler::optSwitchConvert(BasicBlock* firstBlock,
399446
BasicBlock* blockIfTrue = nullptr;
400447
BasicBlock* blockIfFalse = nullptr;
401448
bool isReversed = false;
402-
const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed);
449+
const bool isTest = IsConstantTestCondBlock(this, lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed);
403450
assert(isTest);
404451

405-
assert(firstBlock->TrueTargetIs(blockIfTrue));
452+
assert(SkipFallthroughBlocks(this, firstBlock->GetTrueTarget()) == SkipFallthroughBlocks(this, blockIfTrue));
406453
FlowEdge* const trueEdge = firstBlock->GetTrueEdge();
407454
FlowEdge* const falseEdge = firstBlock->GetFalseEdge();
408455

0 commit comments

Comments
 (0)