Skip to content

Commit 519c93d

Browse files
[release/7.0] Ensure that the SSE fallback for Vector3.Dot masks off the unused element of op1 and op2 (#74980)
* Ensure that the SSE fallback for Vector3.Dot masks off the unused element of op1 and op2 * Applying formatting patch * Ensure we use TYP_SIMD16 in for the simdType when generating the fallback Dot nodes Co-authored-by: Tanner Gooding <tagoo@outlook.com>
1 parent 4a03353 commit 519c93d

File tree

1 file changed

+49
-17
lines changed

1 file changed

+49
-17
lines changed

src/coreclr/jit/lowerxarch.cpp

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3419,45 +3419,77 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
34193419
{
34203420
assert(simdBaseType == TYP_FLOAT);
34213421

3422+
// We need to mask off the most significant element to avoid the shuffle + add
3423+
// from including it in the computed result. We need to do this for both op1 and
3424+
// op2 in case one of them is `NaN` (because Zero * NaN == NaN)
3425+
3426+
simd16_t simd16Val = {};
3427+
3428+
simd16Val.i32[0] = -1;
3429+
simd16Val.i32[1] = -1;
3430+
simd16Val.i32[2] = -1;
3431+
simd16Val.i32[3] = +0;
3432+
3433+
simdType = TYP_SIMD16;
3434+
simdSize = 16;
3435+
34223436
// We will be constructing the following parts:
34233437
// ...
34243438
// +--* CNS_INT int -1
34253439
// +--* CNS_INT int -1
34263440
// +--* CNS_INT int -1
34273441
// +--* CNS_INT int 0
34283442
// tmp1 = * HWINTRINSIC simd16 T Create
3429-
// /--* op2 simd16
3443+
// /--* op1 simd16
34303444
// +--* tmp1 simd16
34313445
// op1 = * HWINTRINSIC simd16 T And
34323446
// ...
34333447

34343448
// This is roughly the following managed code:
34353449
// ...
34363450
// tmp1 = Vector128.Create(-1, -1, -1, 0);
3437-
// op1 = Sse.And(op1, tmp2);
3451+
// op1 = Sse.And(op1, tmp1);
34383452
// ...
34393453

3440-
GenTree* cns0 = comp->gtNewIconNode(-1, TYP_INT);
3441-
BlockRange().InsertAfter(op1, cns0);
3454+
GenTreeVecCon* vecCon1 = comp->gtNewVconNode(simdType, simdBaseJitType);
3455+
vecCon1->gtSimd16Val = simd16Val;
34423456

3443-
GenTree* cns1 = comp->gtNewIconNode(-1, TYP_INT);
3444-
BlockRange().InsertAfter(cns0, cns1);
3457+
BlockRange().InsertAfter(op1, vecCon1);
34453458

3446-
GenTree* cns2 = comp->gtNewIconNode(-1, TYP_INT);
3447-
BlockRange().InsertAfter(cns1, cns2);
3459+
op1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, vecCon1, NI_SSE_And, simdBaseJitType, simdSize);
3460+
BlockRange().InsertAfter(vecCon1, op1);
34483461

3449-
GenTree* cns3 = comp->gtNewIconNode(0, TYP_INT);
3450-
BlockRange().InsertAfter(cns2, cns3);
3462+
LowerNode(vecCon1);
3463+
LowerNode(op1);
34513464

3452-
tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, cns0, cns1, cns2, cns3, NI_Vector128_Create,
3453-
CORINFO_TYPE_INT, 16);
3454-
BlockRange().InsertAfter(cns3, tmp1);
3465+
// We will be constructing the following parts:
3466+
// ...
3467+
// +--* CNS_INT int -1
3468+
// +--* CNS_INT int -1
3469+
// +--* CNS_INT int -1
3470+
// +--* CNS_INT int 0
3471+
// tmp2 = * HWINTRINSIC simd16 T Create
3472+
// /--* op2 simd16
3473+
// +--* tmp2 simd16
3474+
// op2 = * HWINTRINSIC simd16 T And
3475+
// ...
34553476

3456-
op1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, tmp1, NI_SSE_And, simdBaseJitType, simdSize);
3457-
BlockRange().InsertAfter(tmp1, op1);
3477+
// This is roughly the following managed code:
3478+
// ...
3479+
// tmp2 = Vector128.Create(-1, -1, -1, 0);
3480+
// op2 = Sse.And(op2, tmp2);
3481+
// ...
34583482

3459-
LowerNode(tmp1);
3460-
LowerNode(op1);
3483+
GenTreeVecCon* vecCon2 = comp->gtNewVconNode(simdType, simdBaseJitType);
3484+
vecCon2->gtSimd16Val = simd16Val;
3485+
3486+
BlockRange().InsertAfter(op2, vecCon2);
3487+
3488+
op2 = comp->gtNewSimdHWIntrinsicNode(simdType, op2, vecCon2, NI_SSE_And, simdBaseJitType, simdSize);
3489+
BlockRange().InsertAfter(vecCon2, op2);
3490+
3491+
LowerNode(vecCon2);
3492+
LowerNode(op2);
34613493
}
34623494
}
34633495

0 commit comments

Comments
 (0)