Skip to content

Commit

Permalink
Merge pull request #19320 from R2steven/master
Browse files Browse the repository at this point in the history
Use 32-bit XOR to zero registers in x64
  • Loading branch information
0xdaryl authored May 8, 2024
2 parents c58b9f8 + 37f4869 commit 9eb718e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 17 deletions.
4 changes: 2 additions & 2 deletions runtime/compiler/x/amd64/codegen/AMD64JNILinkage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ TR::Register *J9::X86::AMD64::JNILinkage::processJNIReferenceArg(TR::Node *child
if (child->pointsToNull())
{
refReg = cg()->allocateRegister();
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), child, refReg, refReg, cg());
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, child, refReg, refReg, cg());
// TODO (81564): We need to kill the scratch register to prevent an
// assertion error, but is this the right place to do so?
cg()->stopUsingRegister(refReg);
Expand Down Expand Up @@ -938,7 +938,7 @@ void J9::X86::AMD64::JNILinkage::acquireVMAccess(TR::Node *callNode)
TR::Register *scratchReg1 = cg()->allocateRegister();
TR::Register *scratchReg2 = cg()->allocateRegister();

generateRegRegInstruction(TR::InstOpCode::XORRegReg(), callNode, scratchReg1, scratchReg1, cg());
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, callNode, scratchReg1, scratchReg1, cg());

TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
uintptr_t mask = fej9->constAcquireVMAccessOutOfLineMask();
Expand Down
28 changes: 14 additions & 14 deletions runtime/compiler/x/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1694,7 +1694,7 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
if (isIndexableDataAddrPresent)
{
// No offset is needed since 1st dimension array is contiguous.
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, temp3Reg, temp3Reg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, temp3Reg, temp3Reg, cg);
generateLabelInstruction(TR::InstOpCode::JMP4, node, populateFirstDimDataAddrSlot, cg);
}
else
Expand Down Expand Up @@ -3629,7 +3629,7 @@ TR::Register * highestOneBit(TR::Node *node, TR::CodeGenerator *cg, TR::Register
// shl r1, r2
TR::Register *scratchReg = cg->allocateRegister();
TR::Register *bsrReg = cg->allocateRegister();
generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, scratchReg, scratchReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, scratchReg, scratchReg, cg);
generateRegRegInstruction(TR::InstOpCode::BSRRegReg(is64Bit), node, bsrReg, reg, cg);
generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, scratchReg, cg);
TR::RegisterDependencyConditions *shiftDependencies = generateRegisterDependencyConditions((uint8_t)1, 1, cg);
Expand Down Expand Up @@ -3762,7 +3762,7 @@ TR::Register *numberOfLeadingZeros(TR::Node *node, TR::CodeGenerator *cg, TR::Re
// ret r1
TR::Register *maskReg = cg->allocateRegister();
TR::Register *bsrReg = cg->allocateRegister();
generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, maskReg, maskReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, maskReg, maskReg, cg);
generateRegRegInstruction(TR::InstOpCode::BSRRegReg(is64Bit), node, bsrReg, reg, cg);
generateRegInstruction(TR::InstOpCode::SETE1Reg, node, maskReg, cg);
generateRegInstruction(TR::InstOpCode::DECReg(is64Bit), node, maskReg, cg);
Expand Down Expand Up @@ -3841,7 +3841,7 @@ TR::Register * numberOfTrailingZeros(TR::Node *node, TR::CodeGenerator *cg, TR::
TR::Register *bsfReg = cg->allocateRegister();
TR::Register *tempReg = cg->allocateRegister();
TR::Register *maskReg = cg->allocateRegister();
generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, tempReg, tempReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, tempReg, tempReg, cg);
generateRegRegInstruction(TR::InstOpCode::BSFRegReg(is64Bit), node, bsfReg, reg, cg);
generateRegInstruction(TR::InstOpCode::SETE1Reg, node, tempReg, cg);
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(is64Bit), node, maskReg, tempReg, cg);
Expand Down Expand Up @@ -5582,7 +5582,7 @@ J9::X86::TreeEvaluator::VMmonexitEvaluator(
{
unlockedReg = cg->allocateRegister();
eaxReal = cg->allocateRegister();
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, unlockedReg, unlockedReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, unlockedReg, unlockedReg, cg);
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, eaxReal, INC_DEC_VALUE, cg);

TR::InstOpCode::Mnemonic op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHGMemReg(gen64BitInstr) : TR::InstOpCode::CMPXCHGMemReg(gen64BitInstr);
Expand Down Expand Up @@ -5840,7 +5840,7 @@ static void genHeapAlloc(
// heap allocation, so proceed
if (sizeReg)
{
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, eaxReal, eaxReal, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg);

// make sure size isn't too big
// convert max object size to num elements because computing an object size from num elements may overflow
Expand Down Expand Up @@ -6214,7 +6214,7 @@ static void genHeapAlloc(
#endif
{
#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, tempReg, tempReg, cg);
#endif


Expand Down Expand Up @@ -7211,7 +7211,7 @@ static bool genZeroInitObject2(
{
generateRegInstruction(TR::InstOpCode::PUSHReg, node, targetReg, cg);
}
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, targetReg, targetReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, targetReg, targetReg, cg);
generateInstruction(TR::InstOpCode::REPSTOSB, node, cg);
if (comp->target().is64Bit())
{
Expand Down Expand Up @@ -7467,7 +7467,7 @@ static bool genZeroInitObject(
generateRegInstruction(TR::InstOpCode::PUSHReg, node, targetReg, cg);
}

generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, targetReg, targetReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, targetReg, targetReg, cg);

// We just pushed targetReg on the stack and zeroed it out. targetReg contained the address of the
// beginning of the header. We want to use the 0-reg to initialize the monitor slot, so we use
Expand Down Expand Up @@ -7500,7 +7500,7 @@ static bool genZeroInitObject(

if (numSlots > 0)
{
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, tempReg, tempReg, cg);

bool initLw = (node->getOpCodeValue() != TR::New);
int lwOffset = fej9->getByteOffsetToLockword(clazz);
Expand Down Expand Up @@ -7907,7 +7907,7 @@ J9::X86::TreeEvaluator::VMnewEvaluator(
static bool UseOldBVI = feGetEnv("TR_UseOldBVI");
if (UseOldBVI)
{
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, tempReg, tempReg, cg);
while (bvi.hasMoreElements())
{
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node,
Expand Down Expand Up @@ -8106,7 +8106,7 @@ J9::X86::TreeEvaluator::VMnewEvaluator(
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());

discontiguousDataAddrOffsetReg = cg->allocateRegister();
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, discontiguousDataAddrOffsetReg, discontiguousDataAddrOffsetReg, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, discontiguousDataAddrOffsetReg, discontiguousDataAddrOffsetReg, cg);
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, 1, cg);
generateRegImmInstruction(TR::InstOpCode::ADCRegImm4(), node, discontiguousDataAddrOffsetReg, 0, cg);
dataAddrMR = generateX86MemoryReference(targetReg, discontiguousDataAddrOffsetReg, 3, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
Expand Down Expand Up @@ -12359,7 +12359,7 @@ J9::X86::TreeEvaluator::stringCaseConversionHelper(TR::Node *node, TR::CodeGener
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, result, 1, cg);

// initialize the loop counter
cursor = generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, counter, counter, cg); iComment("initialize loop counter");
cursor = generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, counter, counter, cg); iComment("initialize loop counter");

//calculate the residueStartLength. Later instructions compare the counter with this length and decide when to jump to the residue handling sequence
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, residueStartLength, length, cg);
Expand Down Expand Up @@ -12452,7 +12452,7 @@ J9::X86::TreeEvaluator::stringCaseConversionHelper(TR::Node *node, TR::CodeGener

// 4. handle invalid case
generateLabelInstruction(TR::InstOpCode::label, node, failLabel, cg);
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, result, result, cg);
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, result, result, cg);

generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
node->setRegister(result);
Expand Down
2 changes: 1 addition & 1 deletion runtime/compiler/x/codegen/X86PrivateLinkage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ void J9::X86::PrivateLinkage::createPrologue(TR::Instruction *cursor)

if (numReferenceLocalSlotsToInitialize > 0 || numInternalPointerSlotsToInitialize > 0)
{
cursor = new (trHeapMemory()) TR::X86RegRegInstruction(cursor, TR::InstOpCode::XORRegReg(), scratchReg, scratchReg, cg());
cursor = new (trHeapMemory()) TR::X86RegRegInstruction(cursor, TR::InstOpCode::XOR4RegReg, scratchReg, scratchReg, cg());

// Initialize locals that are live on entry
//
Expand Down

0 comments on commit 9eb718e

Please sign in to comment.