-
Notifications
You must be signed in to change notification settings - Fork 5k
[RISC-V] Remove unused stack probing methods #112347
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
b10c38d
remove redundant code
sirntar ef0892c
StackOverflow test
sirntar 971be9b
remove StackOverflow (for rv64) test
sirntar b98abe6
remove commented code
sirntar e520193
apply format patch
sirntar 3a43c85
reverse RH region mark and null check
sirntar 9de229b
revert emitJumpKindInstructions
sirntar f8034ce
Merge branch 'main' into remove-stack-probe
sirntar 32efb37
Merge branch 'main' into remove-stack-probe
sirntar 8b0271e
simplify stack probing
sirntar 8709e9d
Merge branch 'main' into remove-stack-probe
sirntar File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1635,7 +1635,7 @@ void CodeGen::genLclHeap(GenTree* tree) | |
// The SP might already be in the guard page, so we must touch it BEFORE | ||
// the alloc, not after. | ||
|
||
// ld_w r0, 0(SP) | ||
// tickle the page - this triggers a page fault when on the guard page | ||
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SP, 0); | ||
|
||
lastTouchDelta = amount; | ||
|
@@ -1680,8 +1680,7 @@ void CodeGen::genLclHeap(GenTree* tree) | |
// and localloc size is a multiple of STACK_ALIGN. | ||
|
||
// Loop: | ||
ssize_t imm = -16; | ||
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); | ||
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -16); | ||
|
||
emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 8); | ||
emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 0); | ||
|
@@ -1693,8 +1692,8 @@ void CodeGen::genLclHeap(GenTree* tree) | |
|
||
emit->emitIns_R_R_I(INS_addi, emitActualTypeSize(type), regCnt, regCnt, -16); | ||
|
||
assert(imm == (-4 << 2)); // goto loop. | ||
emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2)); | ||
// goto Loop | ||
emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, -4 << 2); | ||
|
||
lastTouchDelta = 0; | ||
} | ||
|
@@ -1708,7 +1707,6 @@ void CodeGen::genLclHeap(GenTree* tree) | |
// case SP is on the last byte of the guard page. Thus you must | ||
// touch SP-0 first not SP-0x1000. | ||
// | ||
// This is similar to the prolog code in CodeGen::genAllocLclFrame(). | ||
// | ||
// Note that we go through a few hoops so that SP never points to | ||
// illegal pages at any time during the tickling process. | ||
|
@@ -1719,23 +1717,20 @@ void CodeGen::genLclHeap(GenTree* tree) | |
// addi regCnt, REG_R0, 0 | ||
// | ||
// Skip: | ||
// lui regTmp, eeGetPageSize()>>12 | ||
// lui regPageSize, eeGetPageSize()>>12 | ||
// addi regTmp, SP, 0 | ||
// Loop: | ||
// lw r0, 0(SP) // tickle the page - read from the page | ||
// sub RA, SP, regTmp // decrement SP by eeGetPageSize() | ||
// bltu RA, regCnt, Done | ||
// sub SP, SP,regTmp | ||
// j Loop | ||
// lw r0, 0(regTmp) // tickle the page - read from the page | ||
// sub regTmp, regTmp, regPageSize | ||
// bgeu regTmp, regCnt, Loop | ||
// | ||
// Done: | ||
// mov SP, regCnt | ||
// addi SP, regCnt, 0 | ||
// | ||
|
||
if (tempReg == REG_NA) | ||
tempReg = internalRegisters.Extract(tree); | ||
|
||
regNumber rPageSize = internalRegisters.GetSingle(tree); | ||
|
||
assert(regCnt != tempReg); | ||
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); | ||
|
||
|
@@ -1746,35 +1741,24 @@ void CodeGen::genLclHeap(GenTree* tree) | |
emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2); | ||
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regCnt, REG_R0, 0); | ||
|
||
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); | ||
|
||
// genDefineTempLabel(loop); | ||
|
||
// tickle the page - Read from the updated SP - this triggers a page fault when on the guard page | ||
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SPBASE, 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to make sure it's not doing anything shady, what's preventing the |
||
|
||
// decrement SP by eeGetPageSize() | ||
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, rPageSize); | ||
regNumber rPageSize = internalRegisters.GetSingle(tree); | ||
|
||
assert(rPageSize != tempReg); | ||
noway_assert(rPageSize != tempReg); | ||
|
||
ssize_t imm = 3 << 2; // goto done. | ||
emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, tempReg, regCnt, imm); | ||
|
||
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rPageSize); | ||
|
||
imm = -4 << 2; | ||
// Jump to loop and tickle new stack address | ||
emit->emitIns_I(INS_j, EA_PTRSIZE, imm); | ||
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); | ||
regSet.verifyRegUsed(rPageSize); | ||
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, REG_SPBASE, 0); | ||
|
||
// Done with stack tickle loop | ||
// genDefineTempLabel(done); | ||
// tickle the page - this triggers a page fault when on the guard page | ||
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, tempReg, 0); | ||
emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tempReg, tempReg, rPageSize); | ||
|
||
// Now just move the final value to SP | ||
emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0); | ||
emit->emitIns_R_R_I(INS_bgeu, EA_PTRSIZE, tempReg, regCnt, -2 << 2); | ||
|
||
// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, | ||
// we're going to assume the worst and probe. | ||
// Move the final value to SP | ||
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, regCnt, 0); | ||
} | ||
|
||
ALLOC_DONE: | ||
|
@@ -6701,175 +6685,6 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) | |
}; | ||
} | ||
|
||
//------------------------------------------------------------------------ | ||
// genStackProbe: Probe the stack without changing it | ||
// | ||
// Notes: | ||
// This function is using loop to probe each memory page. | ||
// | ||
// Arguments: | ||
// frameSize - total frame size | ||
// rOffset - usually initial register number | ||
// rLimit - an extra register for comparison | ||
// rPageSize - register for storing page size | ||
// | ||
void CodeGen::genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize) | ||
{ | ||
// make sure frameSize safely fits within 4 bytes | ||
noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); | ||
|
||
const target_size_t pageSize = compiler->eeGetPageSize(); | ||
|
||
// According to RISC-V Privileged ISA page size should be equal 4KiB | ||
noway_assert(pageSize == 0x1000); | ||
|
||
emitter* emit = GetEmitter(); | ||
|
||
emit->emitLoadImmediate(EA_PTRSIZE, rLimit, -frameSize); | ||
regSet.verifyRegUsed(rLimit); | ||
|
||
emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, rLimit, rLimit, REG_SPBASE); | ||
|
||
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); | ||
regSet.verifyRegUsed(rPageSize); | ||
|
||
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, REG_SPBASE, rPageSize); | ||
|
||
// Loop: | ||
// tickle the page - Read from the updated SP - this triggers a page fault when on the guard page | ||
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, rOffset, 0); | ||
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, rOffset, rPageSize); | ||
|
||
// each instr is 4 bytes | ||
// if (rOffset >= rLimit) goto Loop; | ||
emit->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, -2 << 2); | ||
} | ||
|
||
//------------------------------------------------------------------------ | ||
// genAllocLclFrame: Probe the stack. | ||
// | ||
// Notes: | ||
// This only does the probing; allocating the frame is done when callee-saved registers are saved. | ||
// This is done before anything has been pushed. The previous frame might have a large outgoing argument | ||
// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might | ||
// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however, | ||
// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard | ||
// page by default, so we need to be more careful. We do an extra probe if we might not have probed | ||
// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this | ||
// on Windows as well just to be consistent, even though it should not be necessary. | ||
// | ||
// Arguments: | ||
// frameSize - the size of the stack frame being allocated. | ||
// initReg - register to use as a scratch register. | ||
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if | ||
// this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged. | ||
// maskArgRegsLiveIn - incoming argument registers that are currently live. | ||
// | ||
// Return value: | ||
// None | ||
// | ||
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) | ||
{ | ||
assert(compiler->compGeneratingProlog); | ||
|
||
if (frameSize == 0) | ||
{ | ||
return; | ||
} | ||
|
||
// According to RISC-V Privileged ISA page size should be equal 4KiB | ||
const target_size_t pageSize = compiler->eeGetPageSize(); | ||
|
||
assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); | ||
|
||
target_size_t lastTouchDelta = 0; | ||
|
||
emitter* emit = GetEmitter(); | ||
|
||
// Emit the following sequence to 'tickle' the pages. | ||
// Note it is important that stack pointer not change until this is complete since the tickles | ||
// could cause a stack overflow, and we need to be able to crawl the stack afterward | ||
// (which means the stack pointer needs to be known). | ||
|
||
if (frameSize < pageSize) | ||
{ | ||
// no probe needed | ||
lastTouchDelta = frameSize; | ||
} | ||
else if (frameSize < 3 * pageSize) | ||
{ | ||
// between 1 and 3 pages we will probe each page without a loop, | ||
// because it is faster that way and doesn't cost us much | ||
lastTouchDelta = frameSize; | ||
|
||
for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize) | ||
{ | ||
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, initReg, probeOffset >> 12); | ||
regSet.verifyRegUsed(initReg); | ||
|
||
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg); | ||
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0); | ||
|
||
lastTouchDelta -= pageSize; | ||
} | ||
|
||
assert(pInitRegZeroed != nullptr); | ||
*pInitRegZeroed = false; // The initReg does not contain zero | ||
|
||
assert(lastTouchDelta == frameSize % pageSize); | ||
compiler->unwindPadding(); | ||
} | ||
else | ||
{ | ||
// probe each page, that we need to allocate large stack frame | ||
assert(frameSize >= 3 * pageSize); | ||
|
||
regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED); | ||
availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers | ||
// as they are currently live | ||
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg | ||
|
||
noway_assert(availMask != RBM_NONE); | ||
|
||
regMaskTP regMask = genFindLowestBit(availMask); | ||
regNumber rLimit = genRegNumFromMask(regMask); | ||
|
||
availMask &= ~regMask; // Remove rLimit register | ||
|
||
noway_assert(availMask != RBM_NONE); | ||
|
||
regMask = genFindLowestBit(availMask); | ||
regNumber rPageSize = genRegNumFromMask(regMask); | ||
|
||
genStackProbe((ssize_t)frameSize, initReg, rLimit, rPageSize); | ||
|
||
assert(pInitRegZeroed != nullptr); | ||
*pInitRegZeroed = false; // The initReg does not contain zero | ||
|
||
lastTouchDelta = frameSize % pageSize; | ||
compiler->unwindPadding(); | ||
} | ||
|
||
#if STACK_PROBE_BOUNDARY_THRESHOLD_BYTES != 0 | ||
// if the last page was too far, we will make an extra probe at the bottom | ||
if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) | ||
{ | ||
assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < pageSize << 1); | ||
|
||
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, initReg, REG_R0, frameSize); | ||
regSet.verifyRegUsed(initReg); | ||
|
||
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg); | ||
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0); | ||
|
||
assert(pInitRegZeroed != nullptr); | ||
*pInitRegZeroed = false; // The initReg does not contain zero | ||
|
||
compiler->unwindPadding(); | ||
} | ||
#endif | ||
} | ||
|
||
void CodeGen::genJumpToThrowHlpBlk_la( | ||
SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2) | ||
{ | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason not to decrement
sp
directly?