Skip to content

[release/9.0-preview3] Revert #99140 #100147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 16 additions & 50 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3626,7 +3626,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
unsigned slots = layout->GetSlotCount();

// Temp register(s) used to perform the sequence of loads and stores.
regNumber tmpReg = cpObjNode->ExtractTempReg(RBM_ALLINT);
regNumber tmpReg = cpObjNode->ExtractTempReg();
regNumber tmpReg2 = REG_NA;

assert(genIsValidIntReg(tmpReg));
Expand All @@ -3635,7 +3635,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)

if (slots > 1)
{
tmpReg2 = cpObjNode->ExtractTempReg(RBM_ALLINT);
tmpReg2 = cpObjNode->GetSingleTempReg();
assert(tmpReg2 != tmpReg);
assert(genIsValidIntReg(tmpReg2));
assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
Expand Down Expand Up @@ -3682,69 +3682,35 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

// We might also need SIMD regs if we have 4 or more continuous non-gc slots
// On ARM64, SIMD loads/stores provide 8-byte atomicity guarantees when aligned to 8 bytes.
regNumber tmpSimdReg1 = REG_NA;
regNumber tmpSimdReg2 = REG_NA;
if ((slots >= 4) && compiler->IsBaselineSimdIsaSupported())
{
tmpSimdReg1 = cpObjNode->ExtractTempReg(RBM_ALLFLOAT);
tmpSimdReg2 = cpObjNode->ExtractTempReg(RBM_ALLFLOAT);
}

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
{
// How many continuous non-gc slots do we have?
unsigned nonGcSlots = 0;
do
// Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
{
nonGcSlots++;
i++;
} while ((i < slots) && !layout->IsGCPtr(i));

const regNumber srcReg = REG_WRITE_BARRIER_SRC_BYREF;
const regNumber dstReg = REG_WRITE_BARRIER_DST_BYREF;
while (nonGcSlots > 0)
emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
++i; // extra increment of i, since we are copying two items
}
else
{
regNumber tmp1 = tmpReg;
regNumber tmp2 = tmpReg2;
emitAttr size = EA_8BYTE;
insOpts opts = INS_OPTS_POST_INDEX;

// Copy at least two slots at a time
if (nonGcSlots >= 2)
{
// Do 4 slots at a time if SIMD is supported
if ((nonGcSlots >= 4) && compiler->IsBaselineSimdIsaSupported())
{
// We need SIMD temp regs now
tmp1 = tmpSimdReg1;
tmp2 = tmpSimdReg2;
size = EA_16BYTE;
nonGcSlots -= 2;
}
nonGcSlots -= 2;
emit->emitIns_R_R_R_I(INS_ldp, size, tmp1, tmp2, srcReg, EA_SIZE(size) * 2, opts);
emit->emitIns_R_R_R_I(INS_stp, size, tmp1, tmp2, dstReg, EA_SIZE(size) * 2, opts);
}
else
{
nonGcSlots--;
emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmp1, srcReg, EA_SIZE(size), opts);
emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmp1, dstReg, EA_SIZE(size), opts);
}
emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
INS_OPTS_POST_INDEX);
emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
INS_OPTS_POST_INDEX);
}
}
else
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
i++;
}
++i;
}
assert(gcPtrCount == 0);
}
Expand Down
7 changes: 0 additions & 7 deletions src/coreclr/jit/lsraarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,13 +697,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
}

if (size >= 4 * REGSIZE_BYTES && compiler->IsBaselineSimdIsaSupported())
{
// We can use 128-bit SIMD ldp/stp for larger block sizes
buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
}

// If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;

Expand Down