Skip to content

Commit 67e1983

Browse files
JIT ARM64-SVE: Add Sve.ConditionalExtract* APIs (#104150)
* JIT ARM64-SVE: Add Sve.ConditionalExtract* APIs * cleanup: fix formatting * Update Helpers.cs * cleanup: group and place APIs in alphabedical order and align the field * Remove redundant HasScalarInputVariant flags from *Replicate intrinsics * cleanup: place special intrinsics in alphabetical order * cleanup: correctly specify emitted instructions in the comments * fixup! cleanup: place special intrinsics in alphabetical order * fixup! cleanup: group and place APIs in alphabedical order and align the field * fixup! ARM64-SVE: GatherPrefetch (#103826) * Revert "fixup! ARM64-SVE: GatherPrefetch (#103826)" This reverts commit b0212ca. * cleanup: align the lines for the newly added tests. --------- Co-authored-by: Kunal Pathak <Kunal.Pathak@microsoft.com>
1 parent e224ffb commit 67e1983

File tree

10 files changed

+2179
-5
lines changed

10 files changed

+2179
-5
lines changed

src/coreclr/jit/hwintrinsic.h

+6
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,12 @@ struct HWIntrinsicInfo
957957

958958
switch (id)
959959
{
960+
case NI_Sve_ConditionalExtractAfterLastActiveElement:
961+
return NI_Sve_ConditionalExtractAfterLastActiveElementScalar;
962+
963+
case NI_Sve_ConditionalExtractLastActiveElement:
964+
return NI_Sve_ConditionalExtractLastActiveElementScalar;
965+
960966
case NI_Sve_SaturatingDecrementBy16BitElementCount:
961967
return NI_Sve_SaturatingDecrementBy16BitElementCountScalar;
962968

src/coreclr/jit/hwintrinsicarm64.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -2801,6 +2801,35 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
28012801

28022802
break;
28032803
}
2804+
case NI_Sve_ConditionalExtractAfterLastActiveElementScalar:
2805+
case NI_Sve_ConditionalExtractLastActiveElementScalar:
2806+
{
2807+
assert(sig->numArgs == 3);
2808+
2809+
#ifdef DEBUG
2810+
isValidScalarIntrinsic = true;
2811+
#endif
2812+
2813+
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
2814+
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
2815+
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
2816+
var_types argType = TYP_UNKNOWN;
2817+
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
2818+
2819+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
2820+
op3 = getArgForHWIntrinsic(argType, argClass);
2821+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
2822+
op2 = getArgForHWIntrinsic(argType, argClass);
2823+
CorInfoType op2BaseJitType = getBaseJitTypeOfSIMDType(argClass);
2824+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
2825+
op1 = getArgForHWIntrinsic(argType, argClass);
2826+
2827+
retNode = gtNewScalarHWIntrinsicNode(retType, op1, op2, op3, intrinsic);
2828+
2829+
retNode->AsHWIntrinsic()->SetSimdBaseJitType(simdBaseJitType);
2830+
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(op2BaseJitType);
2831+
break;
2832+
}
28042833

28052834
default:
28062835
{

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

+45
Original file line numberDiff line numberDiff line change
@@ -2252,6 +2252,51 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
22522252
break;
22532253
}
22542254

2255+
case NI_Sve_ConditionalExtractAfterLastActiveElementScalar:
2256+
case NI_Sve_ConditionalExtractLastActiveElementScalar:
2257+
{
2258+
opt = emitter::optGetSveInsOpt(emitTypeSize(node->GetSimdBaseType()));
2259+
2260+
if (emitter::isGeneralRegisterOrZR(targetReg))
2261+
{
2262+
assert(varTypeIsIntegralOrI(intrin.baseType));
2263+
2264+
emitSize = emitTypeSize(node);
2265+
2266+
if (targetReg != op2Reg)
2267+
{
2268+
assert(targetReg != op1Reg);
2269+
assert(targetReg != op3Reg);
2270+
GetEmitter()->emitIns_Mov(INS_mov, emitSize, targetReg, op2Reg,
2271+
/* canSkip */ true);
2272+
}
2273+
2274+
GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt,
2275+
INS_SCALABLE_OPTS_NONE);
2276+
break;
2277+
}
2278+
2279+
// FP scalars are processed by the INS_SCALABLE_OPTS_WITH_SIMD_SCALAR variant of the instructions
2280+
FALLTHROUGH;
2281+
}
2282+
case NI_Sve_ConditionalExtractAfterLastActiveElement:
2283+
case NI_Sve_ConditionalExtractLastActiveElement:
2284+
{
2285+
assert(emitter::isFloatReg(targetReg));
2286+
assert(varTypeIsFloating(node->gtType) || varTypeIsSIMD(node->gtType));
2287+
2288+
if (targetReg != op2Reg)
2289+
{
2290+
assert(targetReg != op1Reg);
2291+
assert(targetReg != op3Reg);
2292+
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg,
2293+
/* canSkip */ true);
2294+
}
2295+
GetEmitter()->emitInsSve_R_R_R(ins, EA_SCALABLE, targetReg, op1Reg, op3Reg, opt,
2296+
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR);
2297+
break;
2298+
}
2299+
22552300
default:
22562301
unreached();
22572302
}

src/coreclr/jit/hwintrinsiclistarm64sve.h

+10-5
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ HARDWARE_INTRINSIC(Sve, Compute16BitAddresses,
4141
HARDWARE_INTRINSIC(Sve, Compute32BitAddresses, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
4242
HARDWARE_INTRINSIC(Sve, Compute64BitAddresses, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
4343
HARDWARE_INTRINSIC(Sve, Compute8BitAddresses, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
44+
HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElement, -1, 3, true, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialCodeGen)
45+
HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementAndReplicate, -1, 3, true, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
46+
HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElement, -1, 3, true, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasScalarInputVariant|HW_Flag_SpecialCodeGen)
47+
HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementAndReplicate, -1, 3, true, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
4448
HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, true, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment)
4549
HARDWARE_INTRINSIC(Sve, ConvertToInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
4650
HARDWARE_INTRINSIC(Sve, ConvertToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
@@ -259,21 +263,22 @@ HARDWARE_INTRINSIC(Sve, ZipLow,
259263
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
260264
// Special intrinsics that are generated during importing or lowering
261265

262-
#define SPECIAL_NI_Sve NI_Sve_ConvertMaskToVector
266+
#define SPECIAL_NI_Sve NI_Sve_ConditionalExtractAfterLastActiveElementScalar
267+
HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementScalar, 0, 3, false, {INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta, INS_sve_clasta}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
268+
HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementScalar, 0, 3, false, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
263269
HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, true, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation)
264270
HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, true, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation)
265271
HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, -1, false, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask)
266-
HARDWARE_INTRINSIC(Sve, StoreAndZipx2, -1, 3, true, {INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
267-
HARDWARE_INTRINSIC(Sve, StoreAndZipx3, -1, 3, true, {INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
268-
HARDWARE_INTRINSIC(Sve, StoreAndZipx4, -1, 3, true, {INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
269-
270272
// Scalar variants of Saturating*By*BitElementCount. There is 8bit versions as the generic version is scalar only.
271273
HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy16BitElementCountScalar, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdech, INS_sve_uqdech, INS_sve_sqdech, INS_sve_uqdech, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
272274
HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy32BitElementCountScalar, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecw, INS_sve_uqdecw, INS_sve_sqdecw, INS_sve_uqdecw, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
273275
HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy64BitElementCountScalar, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecd, INS_sve_uqdecd, INS_sve_sqdecd, INS_sve_uqdecd, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
274276
HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy16BitElementCountScalar, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqinch, INS_sve_uqinch, INS_sve_sqinch, INS_sve_uqinch, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
275277
HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy32BitElementCountScalar, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincw, INS_sve_uqincw, INS_sve_sqincw, INS_sve_uqincw, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
276278
HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCountScalar, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincd, INS_sve_uqincd, INS_sve_sqincd, INS_sve_uqincd, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
279+
HARDWARE_INTRINSIC(Sve, StoreAndZipx2, -1, 3, true, {INS_sve_st2b, INS_sve_st2b, INS_sve_st2h, INS_sve_st2h, INS_sve_st2w, INS_sve_st2w, INS_sve_st2d, INS_sve_st2d, INS_sve_st2w, INS_sve_st2d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
280+
HARDWARE_INTRINSIC(Sve, StoreAndZipx3, -1, 3, true, {INS_sve_st3b, INS_sve_st3b, INS_sve_st3h, INS_sve_st3h, INS_sve_st3w, INS_sve_st3w, INS_sve_st3d, INS_sve_st3d, INS_sve_st3w, INS_sve_st3d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
281+
HARDWARE_INTRINSIC(Sve, StoreAndZipx4, -1, 3, true, {INS_sve_st4b, INS_sve_st4b, INS_sve_st4h, INS_sve_st4h, INS_sve_st4w, INS_sve_st4w, INS_sve_st4d, INS_sve_st4d, INS_sve_st4w, INS_sve_st4d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_NeedsConsecutiveRegisters)
277282

278283

279284
#endif // FEATURE_HW_INTRINSIC

0 commit comments

Comments
 (0)