Skip to content

Commit 0c9568a

Browse files
authored
Improve RA for LowerBlockStore (#83627)
1 parent 0b03ca6 commit 0c9568a

File tree

1 file changed

+15
-3
lines changed

1 file changed

+15
-3
lines changed

src/coreclr/jit/lowerxarch.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -348,15 +348,27 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
348348
{
349349
const bool canUse16BytesSimdMov = !blkNode->IsOnHeapAndContainsReferences();
350350
#ifdef TARGET_AMD64
351-
const bool willUseOnlySimdMov = canUse16BytesSimdMov && (size % XMM_REGSIZE_BYTES == 0);
351+
352+
bool willUseOnlySimdMov = size % XMM_REGSIZE_BYTES == 0;
353+
if (!willUseOnlySimdMov)
354+
{
355+
// If we have a remainder we still might only use SIMD to process it (via overlapping)
356+
// unless it's more efficient to do that via scalar op (for sizes 1,2,4 and 8)
357+
const unsigned remainder = size % XMM_REGSIZE_BYTES;
358+
if (!isPow2(remainder) || (remainder > REGSIZE_BYTES))
359+
{
360+
willUseOnlySimdMov = true;
361+
}
362+
}
352363
#else
353364
const bool willUseOnlySimdMov = (size % 8 == 0);
354365
#endif
355-
if (willUseOnlySimdMov)
366+
if (willUseOnlySimdMov && canUse16BytesSimdMov)
356367
{
357368
src->SetContained();
358369
}
359-
else if (size > comp->getUnrollThreshold(Compiler::UnrollKind::Memset, /*canUseSimd*/ false))
370+
else if (size > comp->getUnrollThreshold(Compiler::UnrollKind::Memset,
371+
/*canUseSimd*/ canUse16BytesSimdMov))
360372
{
361373
// It turns out we can't use SIMD so the default threshold is too big
362374
goto TOO_BIG_TO_UNROLL;

0 commit comments

Comments
 (0)