Skip to content

Commit 00e6482

Browse files
Updating CreateScalar to be intrinsic for Vector64/128/256 (#77798)
* Updating CreateScalar to be intrinsic for Vector64/128/256 * Applying formatting patch * Fixing CreateScalar VecCon nodes created on import and find use before insert * Applying formatting patch * Ensure we zero extend TYP_BYTE and TYP_SHORT * Ensure TYP_UBYTE and TYP_USHORT are also explicitly zero extended * Fix the cast and add a comment explaining "why" * Applying formatting patch
1 parent 4820105 commit 00e6482

File tree

11 files changed

+407
-294
lines changed

11 files changed

+407
-294
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17362,32 +17362,48 @@ bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
1736217362
// true if node represents a constant; otherwise, false
1736317363
bool GenTreeVecCon::IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val)
1736417364
{
17365-
var_types simdType = node->TypeGet();
17366-
var_types simdBaseType = node->GetSimdBaseType();
17367-
unsigned simdSize = node->GetSimdSize();
17365+
NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
17366+
var_types simdType = node->TypeGet();
17367+
var_types simdBaseType = node->GetSimdBaseType();
17368+
unsigned simdSize = node->GetSimdSize();
1736817369

1736917370
size_t argCnt = node->GetOperandCount();
1737017371
size_t cnsArgCnt = 0;
1737117372

17372-
switch (node->GetHWIntrinsicId())
17373+
switch (intrinsic)
1737317374
{
1737417375
case NI_Vector128_Create:
17376+
case NI_Vector128_CreateScalar:
1737517377
case NI_Vector128_CreateScalarUnsafe:
1737617378
#if defined(TARGET_XARCH)
1737717379
case NI_Vector256_Create:
17380+
case NI_Vector256_CreateScalar:
1737817381
case NI_Vector256_CreateScalarUnsafe:
1737917382
#elif defined(TARGET_ARM64)
1738017383
case NI_Vector64_Create:
17384+
case NI_Vector64_CreateScalar:
1738117385
case NI_Vector64_CreateScalarUnsafe:
1738217386
#endif
1738317387
{
17388+
// Zero out the simd32Val
17389+
simd32Val = {};
17390+
1738417391
// These intrinsics are meant to set the same value to every element.
1738517392
if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, simd32Val, simdBaseType))
1738617393
{
17387-
// Now assign the rest of the arguments.
17388-
for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
17394+
// CreateScalar leaves the upper bits as zero
17395+
17396+
#if defined(TARGET_XARCH)
17397+
if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar))
17398+
#elif defined(TARGET_ARM64)
17399+
if ((intrinsic != NI_Vector64_CreateScalar) && (intrinsic != NI_Vector128_CreateScalar))
17400+
#endif
1738917401
{
17390-
HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
17402+
// Now assign the rest of the arguments.
17403+
for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
17404+
{
17405+
HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
17406+
}
1739117407
}
1739217408

1739317409
cnsArgCnt = 1;
@@ -18974,6 +18990,13 @@ bool GenTree::isContainableHWIntrinsic() const
1897418990
return true;
1897518991
}
1897618992

18993+
case NI_Vector128_get_Zero:
18994+
case NI_Vector256_get_Zero:
18995+
{
18996+
// These HWIntrinsic operations are contained as part of Sse41.Insert
18997+
return true;
18998+
}
18999+
1897719000
default:
1897819001
{
1897919002
return false;

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
545545
break;
546546
}
547547

548+
case NI_Vector64_CreateScalar:
548549
case NI_Vector64_CreateScalarUnsafe:
549550
{
550551
if (genTypeSize(simdBaseType) == 8)
@@ -556,12 +557,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
556557

557558
case NI_Vector64_Create:
558559
case NI_Vector128_Create:
560+
case NI_Vector128_CreateScalar:
559561
case NI_Vector128_CreateScalarUnsafe:
560562
{
561563
uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
562564
assert((sig->numArgs == 1) || (sig->numArgs == simdLength));
563565

564-
bool isConstant = true;
566+
bool isConstant = true;
567+
bool isCreateScalar = (intrinsic == NI_Vector64_CreateScalar) || (intrinsic == NI_Vector128_CreateScalar);
565568

566569
if (varTypeIsFloating(simdBaseType))
567570
{
@@ -620,7 +623,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
620623
vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal;
621624
}
622625

623-
if (sig->numArgs == 1)
626+
if (isCreateScalar)
627+
{
628+
vecCon->gtSimd32Val = {};
629+
vecCon->gtSimd32Val.u8[0] = cnsVal;
630+
}
631+
else if (sig->numArgs == 1)
624632
{
625633
for (uint32_t index = 0; index < simdLength - 1; index++)
626634
{
@@ -641,7 +649,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
641649
vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal;
642650
}
643651

644-
if (sig->numArgs == 1)
652+
if (isCreateScalar)
653+
{
654+
vecCon->gtSimd32Val = {};
655+
vecCon->gtSimd32Val.u16[0] = cnsVal;
656+
}
657+
else if (sig->numArgs == 1)
645658
{
646659
for (uint32_t index = 0; index < (simdLength - 1); index++)
647660
{
@@ -662,7 +675,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
662675
vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal;
663676
}
664677

665-
if (sig->numArgs == 1)
678+
if (isCreateScalar)
679+
{
680+
vecCon->gtSimd32Val = {};
681+
vecCon->gtSimd32Val.u32[0] = cnsVal;
682+
}
683+
else if (sig->numArgs == 1)
666684
{
667685
for (uint32_t index = 0; index < (simdLength - 1); index++)
668686
{
@@ -683,7 +701,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
683701
vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal;
684702
}
685703

686-
if (sig->numArgs == 1)
704+
if (isCreateScalar)
705+
{
706+
vecCon->gtSimd32Val = {};
707+
vecCon->gtSimd32Val.u64[0] = cnsVal;
708+
}
709+
else if (sig->numArgs == 1)
687710
{
688711
for (uint32_t index = 0; index < (simdLength - 1); index++)
689712
{
@@ -703,7 +726,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
703726
vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal;
704727
}
705728

706-
if (sig->numArgs == 1)
729+
if (isCreateScalar)
730+
{
731+
vecCon->gtSimd32Val = {};
732+
vecCon->gtSimd32Val.f32[0] = cnsVal;
733+
}
734+
else if (sig->numArgs == 1)
707735
{
708736
for (uint32_t index = 0; index < (simdLength - 1); index++)
709737
{
@@ -723,7 +751,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
723751
vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal;
724752
}
725753

726-
if (sig->numArgs == 1)
754+
if (isCreateScalar)
755+
{
756+
vecCon->gtSimd32Val = {};
757+
vecCon->gtSimd32Val.f64[0] = cnsVal;
758+
}
759+
else if (sig->numArgs == 1)
727760
{
728761
for (uint32_t index = 0; index < (simdLength - 1); index++)
729762
{

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,13 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins,
596596

597597
regNumber op1Reg = op1->GetRegNum();
598598

599+
if ((ins == INS_insertps) && (op1Reg == REG_NA))
600+
{
601+
// insertps is special and can contain op1 when it is zero
602+
assert(op1->isContained() && op1->IsVectorZero());
603+
op1Reg = targetReg;
604+
}
605+
599606
assert(targetReg != REG_NA);
600607
assert(op1Reg != REG_NA);
601608

src/coreclr/jit/hwintrinsiclistarm64.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ HARDWARE_INTRINSIC(Vector64, ConvertToInt64,
3939
HARDWARE_INTRINSIC(Vector64, ConvertToSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
4040
HARDWARE_INTRINSIC(Vector64, ConvertToUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
4141
HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
42-
HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
42+
HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
43+
HARDWARE_INTRINSIC(Vector64, CreateScalar, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
4344
HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
4445
HARDWARE_INTRINSIC(Vector64, Divide, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
4546
HARDWARE_INTRINSIC(Vector64, Dot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
@@ -143,6 +144,7 @@ HARDWARE_INTRINSIC(Vector128, ConvertToSingle,
143144
HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
144145
HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
145146
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
147+
HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
146148
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
147149
HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
148150
HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)

0 commit comments

Comments
 (0)