Skip to content

Commit

Permalink
x86 zero initialization improvements for arrays
Browse files Browse the repository at this point in the history
Avoid REP STOS zero initialization for arrays whose length is below a prescribed
threshold checked at runtime.  Use faster GPR stores instead.  Move REP STOS
initialization out of line.

Signed-off-by: Daryl Maier <maier@ca.ibm.com>
  • Loading branch information
0xdaryl committed May 17, 2024
1 parent ab74432 commit 42f5414
Showing 1 changed file with 79 additions and 12 deletions.
91 changes: 79 additions & 12 deletions runtime/compiler/x/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6918,27 +6918,94 @@ static bool genZeroInitForEntireObjectOrHybridArraylet(
//
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, segmentReg, generateX86MemoryReference(newObjectAddressReg, headerSizeInBytes, cg), cg);

TR::Register *scratchReg = NULL;
TR::Register *zeroInitScratchReg = NULL;
if (comp->target().is64Bit())
{
scratchReg = srm->findOrCreateScratchRegister();
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, scratchReg, newObjectAddressReg, cg);
zeroInitScratchReg = srm->findOrCreateScratchRegister();
}
else

// If the compile-time size is unknown, generate a runtime length check to
// determine if REP STOS initialization is more appropriate.
//
// On 32-bit, always do REP STOS initialization inline.
//

static const char *p = feGetEnv("TR_repStosZeroInitThresholdBytes");
static int32_t repStosZeroInitThresholdBytes = p ? atoi(p) : 64;
static bool doInlineRepStosZeroInit = feGetEnv("TR_dontInlineRepStosZeroInit") ? false : true;

#ifdef TR_TARGET_64BIT
if (sizeReg && doInlineRepStosZeroInit)
{
generateRegInstruction(TR::InstOpCode::PUSHReg, node, newObjectAddressReg, cg);
TR::LabelSymbol *repStosInitLabelSym = generateLabelSymbol(cg);
TR::LabelSymbol *mergeInitLabelSym = generateLabelSymbol(cg);

generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, numBytesToZeroInitReg, repStosZeroInitThresholdBytes, cg);
generateLabelInstruction(TR::InstOpCode::JG4, node, repStosInitLabelSym, cg);

generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, zeroInitScratchReg, zeroInitScratchReg, cg);

// Generate mainline zero initialization with stores
//
TR::LabelSymbol *zeroInitLoopLabelSym = generateLabelSymbol(cg);
generateLabelInstruction(TR::InstOpCode::label, node, zeroInitLoopLabelSym, cg);
generateMemRegInstruction(TR::InstOpCode::S8MemReg, node,
generateX86MemoryReference(segmentReg, 0, cg),
zeroInitScratchReg, cg);
generateRegImmInstruction(TR::InstOpCode::ADD8RegImms, node, segmentReg, 8, cg);
generateRegImmInstruction(TR::InstOpCode::SUB8RegImms, node, numBytesToZeroInitReg, 8, cg);
generateRegImmInstruction(TR::InstOpCode::CMP8RegImms, node, numBytesToZeroInitReg, 0, cg);
generateLabelInstruction(TR::InstOpCode::JG4, node, zeroInitLoopLabelSym, cg);

{
// Generate out-of-line REP STOS initialization
//
TR_OutlinedInstructionsGenerator og(repStosInitLabelSym, node, cg);

// newObjectAddressReg must be in rax
// segmentReg must be in rdi
// numBytesToZeroInitReg must be in rcx
//
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, zeroInitScratchReg, newObjectAddressReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, newObjectAddressReg, newObjectAddressReg, cg);
generateInstruction(TR::InstOpCode::REPSTOSB, node, cg);
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, newObjectAddressReg, zeroInitScratchReg, cg);
generateLabelInstruction(TR::InstOpCode::JMP4, node, mergeInitLabelSym, cg);
og.endOutlinedInstructionSequence();
}
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, newObjectAddressReg, newObjectAddressReg, cg);
generateInstruction(TR::InstOpCode::REPSTOSB, node, cg);
if (comp->target().is64Bit())
{
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, newObjectAddressReg, scratchReg, cg);
srm->reclaimScratchRegister(scratchReg);

srm->reclaimScratchRegister(zeroInitScratchReg);

// Merge
//
generateLabelInstruction(TR::InstOpCode::label, node, mergeInitLabelSym, cg);
}
else
{
generateRegInstruction(TR::InstOpCode::POPReg, node, newObjectAddressReg, cg);
#endif

if (comp->target().is64Bit())
{
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, zeroInitScratchReg, newObjectAddressReg, cg);
}
else
{
generateRegInstruction(TR::InstOpCode::PUSHReg, node, newObjectAddressReg, cg);
}
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, newObjectAddressReg, newObjectAddressReg, cg);
generateInstruction(TR::InstOpCode::REPSTOSB, node, cg);
if (comp->target().is64Bit())
{
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, newObjectAddressReg, zeroInitScratchReg, cg);
srm->reclaimScratchRegister(zeroInitScratchReg);
}
else
{
generateRegInstruction(TR::InstOpCode::POPReg, node, newObjectAddressReg, cg);
}
#ifdef TR_TARGET_64BIT
}
#endif

return true;
}
Expand Down

0 comments on commit 42f5414

Please sign in to comment.