File tree 1 file changed +15
-3
lines changed 1 file changed +15
-3
lines changed Original file line number Diff line number Diff line change @@ -348,15 +348,27 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
348
348
{
349
349
const bool canUse16BytesSimdMov = !blkNode->IsOnHeapAndContainsReferences ();
350
350
#ifdef TARGET_AMD64
351
- const bool willUseOnlySimdMov = canUse16BytesSimdMov && (size % XMM_REGSIZE_BYTES == 0 );
351
+
352
+ bool willUseOnlySimdMov = size % XMM_REGSIZE_BYTES == 0 ;
353
+ if (!willUseOnlySimdMov)
354
+ {
355
+ // If we have a remainder we still might only use SIMD to process it (via overlapping)
356
+ // unless it's more efficient to do that via scalar op (for sizes 1,2,4 and 8)
357
+ const unsigned remainder = size % XMM_REGSIZE_BYTES;
358
+ if (!isPow2 (remainder ) || (remainder > REGSIZE_BYTES))
359
+ {
360
+ willUseOnlySimdMov = true ;
361
+ }
362
+ }
352
363
#else
353
364
const bool willUseOnlySimdMov = (size % 8 == 0 );
354
365
#endif
355
- if (willUseOnlySimdMov)
366
+ if (willUseOnlySimdMov && canUse16BytesSimdMov )
356
367
{
357
368
src->SetContained ();
358
369
}
359
- else if (size > comp->getUnrollThreshold (Compiler::UnrollKind::Memset, /* canUseSimd*/ false ))
370
+ else if (size > comp->getUnrollThreshold (Compiler::UnrollKind::Memset,
371
+ /* canUseSimd*/ canUse16BytesSimdMov))
360
372
{
361
373
// It turns out we can't use SIMD so the default threshold is too big
362
374
goto TOO_BIG_TO_UNROLL;
You can’t perform that action at this time.
0 commit comments