Skip to content

Commit 70fd5dc

Browse files
amanasifkhalidAman Khalid
andauthored
Implement fake hot/cold splitting and corresponding stress mode (#69763)
* Implemented fake code splitting in JIT for testing without VM * Implement stress mode for hot/cold splitting Implementation splits after first basic block in method, assuming there is more than one block. Accompanying this implementation are the following fixes: - Loop alignment is disabled for cold blocks, as moving blocks into the cold section may invalidate the initial decision to align. - Long jumps are no longer reduced to short jumps if crossing hot/cold sections. Co-authored-by: Aman Khalid <t-amankhalid@microsoft.com>
1 parent d7b9fce commit 70fd5dc

File tree

7 files changed

+124
-41
lines changed

7 files changed

+124
-41
lines changed

src/coreclr/jit/compiler.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3186,14 +3186,18 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
31863186

31873187
opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC);
31883188

3189+
bool enableFakeSplitting = false;
3190+
31893191
#ifdef DEBUG
3192+
enableFakeSplitting = JitConfig.JitFakeProcedureSplitting();
3193+
31903194
#if defined(TARGET_XARCH)
31913195
// Whether encoding of absolute addr as PC-rel offset is enabled
31923196
opts.compEnablePCRelAddr = (JitConfig.EnablePCRelAddr() != 0);
31933197
#endif
31943198
#endif // DEBUG
31953199

3196-
opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT);
3200+
opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || enableFakeSplitting;
31973201

31983202
#ifdef TARGET_ARM64
31993203
// TODO-ARM64-NYI: enable hot/cold splitting
@@ -3207,7 +3211,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
32073211
if (opts.compProcedureSplitting)
32083212
{
32093213
// Note that opts.compdbgCode is true under ngen for checked assemblies!
3210-
opts.compProcedureSplitting = !opts.compDbgCode;
3214+
opts.compProcedureSplitting = !opts.compDbgCode || enableFakeSplitting;
32113215

32123216
#ifdef DEBUG
32133217
// JitForceProcedureSplitting is used to force procedure splitting on checked assemblies.
@@ -3236,6 +3240,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
32363240
}
32373241

32383242
#ifdef DEBUG
3243+
32393244
// Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK
32403245
if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30))
32413246
{
@@ -5185,6 +5190,9 @@ void Compiler::placeLoopAlignInstructions()
51855190

51865191
if ((block->bbNext != nullptr) && (block->bbNext->isLoopAlign()))
51875192
{
5193+
// Loop alignment is disabled for cold blocks
5194+
assert((block->bbFlags & BBF_COLD) == 0);
5195+
51885196
// If jmp was not found, then block before the loop start is where align instruction will be added.
51895197
if (bbHavingAlign == nullptr)
51905198
{

src/coreclr/jit/compiler.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7607,6 +7607,8 @@ class Compiler
76077607

76087608
// ICorJitInfo wrappers
76097609

7610+
void eeAllocMem(AllocMemArgs* args);
7611+
76107612
void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize);
76117613

76127614
void eeAllocUnwindInfo(BYTE* pHotCode,

src/coreclr/jit/ee_il_dll.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,37 @@ void Compiler::eeDispLineInfos()
11221122
* (e.g., host AMD64, target ARM64), then VM will get confused anyway.
11231123
*/
11241124

1125+
void Compiler::eeAllocMem(AllocMemArgs* args)
1126+
{
1127+
#ifdef DEBUG
1128+
// Fake splitting implementation: hot section = hot code + 4K buffer + cold code
1129+
const UNATIVE_OFFSET hotSizeRequest = args->hotCodeSize;
1130+
const UNATIVE_OFFSET coldSizeRequest = args->coldCodeSize;
1131+
const UNATIVE_OFFSET fakeSplittingBuffer = 4096;
1132+
1133+
if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0))
1134+
{
1135+
args->hotCodeSize = hotSizeRequest + fakeSplittingBuffer + coldSizeRequest;
1136+
args->coldCodeSize = 0;
1137+
}
1138+
#endif
1139+
1140+
info.compCompHnd->allocMem(args);
1141+
1142+
#ifdef DEBUG
1143+
if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0))
1144+
{
1145+
// Fix up hot/cold code pointers
1146+
args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest + fakeSplittingBuffer;
1147+
args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest + fakeSplittingBuffer;
1148+
1149+
// Reset args' hot/cold code sizes in case caller reads them later
1150+
args->hotCodeSize = hotSizeRequest;
1151+
args->coldCodeSize = coldSizeRequest;
1152+
}
1153+
#endif
1154+
}
1155+
11251156
void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize)
11261157
{
11271158
#ifdef DEBUG
@@ -1130,6 +1161,13 @@ void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwind
11301161
printf("reserveUnwindInfo(isFunclet=%s, isColdCode=%s, unwindSize=0x%x)\n", isFunclet ? "true" : "false",
11311162
isColdCode ? "true" : "false", unwindSize);
11321163
}
1164+
1165+
// Fake splitting currently does not handle unwind info for cold code
1166+
if (isColdCode && JitConfig.JitFakeProcedureSplitting())
1167+
{
1168+
JITDUMP("reserveUnwindInfo for cold code with JitFakeProcedureSplitting enabled: ignoring cold unwind info\n");
1169+
return;
1170+
}
11331171
#endif // DEBUG
11341172

11351173
if (info.compMatchedVM)
@@ -1169,6 +1207,13 @@ void Compiler::eeAllocUnwindInfo(BYTE* pHotCode,
11691207
}
11701208
printf(")\n");
11711209
}
1210+
1211+
// Fake splitting currently does not handle unwind info for cold code
1212+
if (pColdCode && JitConfig.JitFakeProcedureSplitting())
1213+
{
1214+
JITDUMP("allocUnwindInfo for cold code with JitFakeProcedureSplitting enabled: ignoring cold unwind info\n");
1215+
return;
1216+
}
11721217
#endif // DEBUG
11731218

11741219
if (info.compMatchedVM)

src/coreclr/jit/emit.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6065,7 +6065,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
60656065
args.xcptnsCount = xcptnsCount;
60666066
args.flag = allocMemFlag;
60676067

6068-
emitCmpHandle->allocMem(&args);
6068+
emitComp->eeAllocMem(&args);
60696069

60706070
codeBlock = (BYTE*)args.hotCodeBlock;
60716071
codeBlockRW = (BYTE*)args.hotCodeBlockRW;
@@ -6083,7 +6083,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
60836083
args.xcptnsCount = xcptnsCount;
60846084
args.flag = allocMemFlag;
60856085

6086-
emitCmpHandle->allocMem(&args);
6086+
emitComp->eeAllocMem(&args);
60876087

60886088
codeBlock = (BYTE*)args.hotCodeBlock;
60896089
codeBlockRW = (BYTE*)args.hotCodeBlockRW;
@@ -6337,6 +6337,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
63376337
assert(coldCodeBlock);
63386338
cp = coldCodeBlock;
63396339
writeableOffset = coldCodeBlockRW - coldCodeBlock;
6340+
emitOffsAdj = 0;
63406341
#ifdef DEBUG
63416342
if (emitComp->opts.disAsm || emitComp->verbose)
63426343
{

src/coreclr/jit/fgopt.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5963,9 +5963,10 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication)
59635963
(bNext != nullptr) && // block is not the last block
59645964
(bNext->bbRefs == 1) && // No other block jumps to bNext
59655965
(bNext->bbJumpKind == BBJ_ALWAYS) && // The next block is a BBJ_ALWAYS block
5966-
bNext->isEmpty() && // and it is an an empty block
5966+
bNext->isEmpty() && // and it is an empty block
59675967
(bNext != bNext->bbJumpDest) && // special case for self jumps
5968-
(bDest != fgFirstColdBlock))
5968+
(bDest != fgFirstColdBlock) &&
5969+
(!fgInDifferentRegions(block, bDest))) // do not cross hot/cold sections
59695970
{
59705971
// case (a)
59715972
//

src/coreclr/jit/flowgraph.cpp

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3410,49 +3410,64 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
34103410
BasicBlock* block;
34113411
BasicBlock* lblk;
34123412

3413-
for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
3414-
{
3415-
bool blockMustBeInHotSection = false;
3413+
bool forceSplit = false;
34163414

3417-
#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
3418-
if (bbIsHandlerBeg(block))
3419-
{
3420-
blockMustBeInHotSection = true;
3421-
}
3422-
#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
3415+
#ifdef DEBUG
3416+
// If stress-splitting, split right after the first block; don't handle functions with EH
3417+
forceSplit = JitConfig.JitStressProcedureSplitting() && (compHndBBtabCount == 0);
3418+
#endif
34233419

3424-
// Do we have a candidate for the first cold block?
3425-
if (firstColdBlock != nullptr)
3420+
if (forceSplit)
3421+
{
3422+
firstColdBlock = fgFirstBB->bbNext;
3423+
prevToFirstColdBlock = fgFirstBB;
3424+
}
3425+
else
3426+
{
3427+
for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
34263428
{
3427-
// We have a candidate for first cold block
3429+
bool blockMustBeInHotSection = false;
34283430

3429-
// Is this a hot block?
3430-
if (blockMustBeInHotSection || (block->isRunRarely() == false))
3431+
#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
3432+
if (bbIsHandlerBeg(block))
34313433
{
3432-
// We have to restart the search for the first cold block
3433-
firstColdBlock = nullptr;
3434-
prevToFirstColdBlock = nullptr;
3434+
blockMustBeInHotSection = true;
34353435
}
3436-
}
3437-
else // (firstColdBlock == NULL)
3438-
{
3439-
// We don't have a candidate for first cold block
3436+
#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
34403437

3441-
// Is this a cold block?
3442-
if (!blockMustBeInHotSection && (block->isRunRarely() == true))
3438+
// Do we have a candidate for the first cold block?
3439+
if (firstColdBlock != nullptr)
34433440
{
3444-
//
3445-
// If the last block that was hot was a BBJ_COND
3446-
// then we will have to add an unconditional jump
3447-
// so the code size for block needs be large
3448-
// enough to make it worth our while
3449-
//
3450-
if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8))
3441+
// We have a candidate for first cold block
3442+
3443+
// Is this a hot block?
3444+
if (blockMustBeInHotSection || (block->isRunRarely() == false))
34513445
{
3452-
// This block is now a candidate for first cold block
3453-
// Also remember the predecessor to this block
3454-
firstColdBlock = block;
3455-
prevToFirstColdBlock = lblk;
3446+
// We have to restart the search for the first cold block
3447+
firstColdBlock = nullptr;
3448+
prevToFirstColdBlock = nullptr;
3449+
}
3450+
}
3451+
else // (firstColdBlock == NULL)
3452+
{
3453+
// We don't have a candidate for first cold block
3454+
3455+
// Is this a cold block?
3456+
if (!blockMustBeInHotSection && (block->isRunRarely() == true))
3457+
{
3458+
//
3459+
// If the last block that was hot was a BBJ_COND
3460+
// then we will have to add an unconditional jump
3461+
// so the code size for block needs be large
3462+
// enough to make it worth our while
3463+
//
3464+
if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8))
3465+
{
3466+
// This block is now a candidate for first cold block
3467+
// Also remember the predecessor to this block
3468+
firstColdBlock = block;
3469+
prevToFirstColdBlock = lblk;
3470+
}
34563471
}
34573472
}
34583473
}
@@ -3479,8 +3494,9 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
34793494
// then it may not be worth it to move it
34803495
// into the Cold section as a jump to the
34813496
// Cold section is 5 bytes in size.
3497+
// Ignore if stress-splitting.
34823498
//
3483-
if (firstColdBlock->bbNext == nullptr)
3499+
if (!forceSplit && firstColdBlock->bbNext == nullptr)
34843500
{
34853501
// If the size of the cold block is 7 or less
34863502
// then we will keep it in the Hot section.
@@ -3553,6 +3569,7 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
35533569
for (block = firstColdBlock; block != nullptr; block = block->bbNext)
35543570
{
35553571
block->bbFlags |= BBF_COLD;
3572+
block->unmarkLoopAlign(this DEBUG_ARG("Loop alignment disabled for cold blocks"));
35563573
}
35573574

35583575
EXIT:;

src/coreclr/jit/jitconfigvalues.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ CONFIG_INTEGER(JitStressBiasedCSE, W("JitStressBiasedCSE"), 0x101) // Intern
164164
// stress.
165165
CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0) // Internal Jit stress: if nonzero, only enable
166166
// stress modes listed in JitStressModeNames
167+
CONFIG_INTEGER(JitStressProcedureSplitting, W("JitStressProcedureSplitting"), 0) // Always split after the first basic
168+
// block. Skips functions with EH
169+
// for simplicity.
167170
CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0)
168171
CONFIG_STRING(JitStressRegsRange, W("JitStressRegsRange")) // Only apply JitStressRegs to methods in this hash range
169172

@@ -192,6 +195,12 @@ CONFIG_INTEGER(JitDumpAtOSROffset, W("JitDumpAtOSROffset"), -1) // Only dump OSR
192195
CONFIG_INTEGER(JitDumpInlinePhases, W("JitDumpInlinePhases"), 1) // Dump inline compiler phases
193196
CONFIG_METHODSET(JitEHDump, W("JitEHDump")) // Dump the EH table for the method, as reported to the VM
194197
CONFIG_METHODSET(JitExclude, W("JitExclude"))
198+
CONFIG_INTEGER(JitFakeProcedureSplitting, W("JitFakeProcedureSplitting"), 0) // Do code splitting independent of VM.
199+
// For now, this disables unwind info for
200+
// cold sections, breaking stack walks.
201+
// Set COMPlus_GCgen0size=1000000 to avoid
202+
// running the GC, which requires
203+
// stack-walking.
195204
CONFIG_METHODSET(JitForceProcedureSplitting, W("JitForceProcedureSplitting"))
196205
CONFIG_METHODSET(JitGCDump, W("JitGCDump"))
197206
CONFIG_METHODSET(JitDebugDump, W("JitDebugDump"))

0 commit comments

Comments
 (0)