Skip to content

Commit 31cc133

Browse files
authored
JIT: Added SVE APIs - Test*, ExtractVector (#103739)
* Initial work for Test* and Extract* SVE apis * Properly handling Test APIs * Added Sve.ExtractVector tests and they pass * Added more extract tests. Still getting asserts. * Fixed tests. Still working on assertion failures for LastScalar. * Removed ExtractLast and ExtractAfterLast APIs for now. Added more test coverage for TestAnyTrue, TestFirstTrue, TestLastTrue. * Fixing test * Fixed test again. Formatting. * Feedback * Fix ordering
1 parent 64f7f95 commit 31cc133

File tree

14 files changed

+1197
-15
lines changed

14 files changed

+1197
-15
lines changed

src/coreclr/jit/emitarm64sve.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2727,6 +2727,7 @@ void emitter::emitInsSve_R_R_I(instruction ins,
27272727
if (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR)
27282728
{
27292729
fmt = IF_SVE_BQ_2A;
2730+
unreached(); // Not supported yet.
27302731
}
27312732
else
27322733
{

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,27 +1900,40 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
19001900
{
19011901
assert(numArgs > 0);
19021902
GenTree* op1 = retNode->AsHWIntrinsic()->Op(1);
1903-
if (intrinsic == NI_Sve_ConditionalSelect)
1903+
1904+
switch (intrinsic)
19041905
{
1905-
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
1906-
{
1907-
return retNode->AsHWIntrinsic()->Op(2);
1908-
}
1909-
else if (op1->IsVectorZero())
1906+
case NI_Sve_ConditionalSelect:
19101907
{
1911-
return retNode->AsHWIntrinsic()->Op(3);
1908+
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
1909+
{
1910+
return retNode->AsHWIntrinsic()->Op(2);
1911+
}
1912+
else if (op1->IsVectorZero())
1913+
{
1914+
return retNode->AsHWIntrinsic()->Op(3);
1915+
}
1916+
break;
19121917
}
1913-
}
1914-
else if (intrinsic == NI_Sve_GetActiveElementCount)
1915-
{
1916-
GenTree* op2 = retNode->AsHWIntrinsic()->Op(2);
19171918

1918-
// HWInstrinsic requires a mask for op2
1919-
if (!varTypeIsMask(op2))
1919+
case NI_Sve_GetActiveElementCount:
1920+
case NI_Sve_TestAnyTrue:
1921+
case NI_Sve_TestFirstTrue:
1922+
case NI_Sve_TestLastTrue:
19201923
{
1921-
retNode->AsHWIntrinsic()->Op(2) =
1922-
gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize);
1924+
GenTree* op2 = retNode->AsHWIntrinsic()->Op(2);
1925+
1926+
// HWInstrinsic requires a mask for op2
1927+
if (!varTypeIsMask(op2))
1928+
{
1929+
retNode->AsHWIntrinsic()->Op(2) =
1930+
gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize);
1931+
}
1932+
break;
19231933
}
1934+
1935+
default:
1936+
break;
19241937
}
19251938

19261939
if (!varTypeIsMask(op1))

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ void HWIntrinsicInfo::lookupImmBounds(
403403
case NI_AdvSimd_Arm64_InsertSelectedScalar:
404404
case NI_Sve_FusedMultiplyAddBySelectedScalar:
405405
case NI_Sve_FusedMultiplySubtractBySelectedScalar:
406+
case NI_Sve_ExtractVector:
406407
immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1;
407408
break;
408409

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,6 +2111,36 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
21112111
break;
21122112
}
21132113

2114+
case NI_Sve_TestAnyTrue:
2115+
case NI_Sve_TestFirstTrue:
2116+
case NI_Sve_TestLastTrue:
2117+
assert(targetReg == REG_NA);
2118+
GetEmitter()->emitIns_R_R(ins, EA_SCALABLE, op1Reg, op2Reg, INS_OPTS_SCALABLE_B);
2119+
break;
2120+
2121+
case NI_Sve_ExtractVector:
2122+
{
2123+
assert(isRMW);
2124+
2125+
if (targetReg != op1Reg)
2126+
{
2127+
assert(targetReg != op2Reg);
2128+
2129+
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2130+
}
2131+
2132+
HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
2133+
2134+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
2135+
{
2136+
const int elementIndex = helper.ImmValue();
2137+
const int byteIndex = genTypeSize(intrin.baseType) * elementIndex;
2138+
2139+
GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op2Reg, byteIndex, INS_OPTS_SCALABLE_B);
2140+
}
2141+
break;
2142+
}
2143+
21142144
case NI_Sve_InsertIntoShiftedVector:
21152145
{
21162146
assert(isRMW);

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit,
6868
HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
6969
HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics)
7070
HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation)
71+
HARDWARE_INTRINSIC(Sve, ExtractVector, -1, 3, true, {INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
7172
HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
7273
HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation)
7374
HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
@@ -204,6 +205,9 @@ HARDWARE_INTRINSIC(Sve, StoreNarrowing,
204205
HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, true, {INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
205206
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
206207
HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
208+
HARDWARE_INTRINSIC(Sve, TestAnyTrue, -1, 2, true, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
209+
HARDWARE_INTRINSIC(Sve, TestFirstTrue, -1, 2, true, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
210+
HARDWARE_INTRINSIC(Sve, TestLastTrue, -1, 2, true, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
207211
HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, true, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
208212
HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, true, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
209213
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,6 +1266,27 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
12661266
return LowerHWIntrinsicCmpOp(node, GT_NE);
12671267
}
12681268

1269+
case NI_Sve_TestAnyTrue:
1270+
{
1271+
LowerNodeCC(node, GenCondition::NE);
1272+
node->gtType = TYP_VOID;
1273+
return node->gtNext;
1274+
}
1275+
1276+
case NI_Sve_TestFirstTrue:
1277+
{
1278+
LowerNodeCC(node, GenCondition::SLT);
1279+
node->gtType = TYP_VOID;
1280+
return node->gtNext;
1281+
}
1282+
1283+
case NI_Sve_TestLastTrue:
1284+
{
1285+
LowerNodeCC(node, GenCondition::ULT);
1286+
node->gtType = TYP_VOID;
1287+
return node->gtNext;
1288+
}
1289+
12691290
case NI_Vector128_WithLower:
12701291
case NI_Vector128_WithUpper:
12711292
{
@@ -3192,6 +3213,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
31923213
case NI_Sve_PrefetchInt16:
31933214
case NI_Sve_PrefetchInt32:
31943215
case NI_Sve_PrefetchInt64:
3216+
case NI_Sve_ExtractVector:
31953217
assert(hasImmediateOperand);
31963218
assert(varTypeIsIntegral(intrin.op3));
31973219
if (intrin.op3->IsCnsIntOrI())

src/coreclr/jit/lsraarm64.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
14451445
case NI_Sve_PrefetchInt16:
14461446
case NI_Sve_PrefetchInt32:
14471447
case NI_Sve_PrefetchInt64:
1448+
case NI_Sve_ExtractVector:
14481449
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
14491450
break;
14501451

0 commit comments

Comments
 (0)