Skip to content

Commit d13eb76

Browse files
a74nhSwapnilGaikwad
authored andcommitted
JIT ARM64-SVE: Add Count*BitElements (dotnet#101188)
* JIT ARM64-SVE: Add Count*BitElements * Generic ValidateResult testing * Fix formatting * Add option to change SVE vector length for current and children processes. TEST_LABEL: ent-arch-aarch64 TEST_IMG: ubuntu/dotnet-build TEST_CMD: safe ./projects/dotnet/test-runtime.sh --scope coreclr,libs Jira: ENTLLT-7328 Change-Id: I727edf8652a5c8648e7008d4ca47e7a4f36d5a1e * Revert "Add option to change SVE vector length for current and children processes." This reverts commit 2937557. * fix CreateTrueMask --------- Co-authored-by: Swapnil Gaikwad <swapnil.gaikwad@arm.com>
1 parent ce60472 commit d13eb76

File tree

11 files changed

+297
-174
lines changed

11 files changed

+297
-174
lines changed

src/coreclr/jit/hwintrinsic.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ enum HWIntrinsicCategory : uint8_t
5858
HW_Category_ShiftLeftByImmediate,
5959
HW_Category_ShiftRightByImmediate,
6060
HW_Category_SIMDByIndexedElement,
61-
HW_Category_EnumPattern,
6261

6362
// Helper intrinsics
6463
// - do not directly correspond to a instruction, such as Vector64.AllBitsSet
@@ -232,6 +231,11 @@ enum HWIntrinsicFlag : unsigned int
232231

233232
// The intrinsic is an embedded masking incompatible intrinsic
234233
HW_Flag_EmbMaskingIncompatible = 0x20000000,
234+
#elif defined(TARGET_ARM64)
235+
236+
// The intrinsic has an enum operand. Using this implies HW_Flag_HasImmediateOperand.
237+
HW_Flag_HasEnumOperand = 0x1000000,
238+
235239
#endif // TARGET_XARCH
236240

237241
HW_Flag_CanBenefitFromConstantProp = 0x80000000,
@@ -867,7 +871,7 @@ struct HWIntrinsicInfo
867871
static bool HasImmediateOperand(NamedIntrinsic id)
868872
{
869873
const HWIntrinsicFlag flags = lookupFlags(id);
870-
return (flags & HW_Flag_HasImmediateOperand) != 0;
874+
return ((flags & HW_Flag_HasImmediateOperand) != 0) || HasEnumOperand(id);
871875
}
872876

873877
static bool IsScalable(NamedIntrinsic id)
@@ -906,6 +910,12 @@ struct HWIntrinsicInfo
906910
return (flags & HW_Flag_ExplicitMaskedOperation) != 0;
907911
}
908912

913+
static bool HasEnumOperand(NamedIntrinsic id)
914+
{
915+
const HWIntrinsicFlag flags = lookupFlags(id);
916+
return (flags & HW_Flag_HasEnumOperand) != 0;
917+
}
918+
909919
#endif // TARGET_ARM64
910920

911921
static bool HasSpecialSideEffect(NamedIntrinsic id)

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,10 @@ void HWIntrinsicInfo::lookupImmBounds(
290290
case NI_Sve_CreateTrueMaskUInt16:
291291
case NI_Sve_CreateTrueMaskUInt32:
292292
case NI_Sve_CreateTrueMaskUInt64:
293+
case NI_Sve_Count16BitElements:
294+
case NI_Sve_Count32BitElements:
295+
case NI_Sve_Count64BitElements:
296+
case NI_Sve_Count8BitElements:
293297
immLowerBound = (int)SVE_PATTERN_POW2;
294298
immUpperBound = (int)SVE_PATTERN_ALL;
295299
break;

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
380380
emitShift(intrin.op2, op1Reg);
381381
}
382382
}
383-
else if (intrin.category == HW_Category_EnumPattern)
383+
else if (HWIntrinsicInfo::HasEnumOperand(intrin.id))
384384
{
385385
assert(hasImmediateOperand);
386386

@@ -1404,6 +1404,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
14041404
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt);
14051405
break;
14061406

1407+
case NI_Sve_Count16BitElements:
1408+
case NI_Sve_Count32BitElements:
1409+
case NI_Sve_Count64BitElements:
1410+
case NI_Sve_Count8BitElements:
1411+
{
1412+
// Instruction has an additional immediate to multiply the result by. Use 1.
1413+
assert(hasImmediateOperand);
1414+
HWIntrinsicImmOpHelper helper(this, intrin.op1, node);
1415+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
1416+
{
1417+
const insSvePattern pattern = (insSvePattern)helper.ImmValue();
1418+
GetEmitter()->emitIns_R_PATTERN_I(ins, emitSize, targetReg, pattern, 1, opt);
1419+
}
1420+
break;
1421+
}
1422+
14071423
case NI_Sve_CreateTrueMaskAll:
14081424
// Must use the pattern variant, as the non-pattern varient is SVE2.1.
14091425
GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL);

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,20 @@
2020
HARDWARE_INTRINSIC(Sve, Abs, -1, -1, false, {INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_fabs, INS_sve_fabs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation)
2121
HARDWARE_INTRINSIC(Sve, Add, -1, -1, false, {INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_fadd, INS_sve_fadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
2222
HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, true, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment)
23-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
24-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
25-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
26-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
27-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
28-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, false, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
29-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
30-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
31-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
32-
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
23+
HARDWARE_INTRINSIC(Sve, Count16BitElements, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cnth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)
24+
HARDWARE_INTRINSIC(Sve, Count32BitElements, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)
25+
HARDWARE_INTRINSIC(Sve, Count64BitElements, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)
26+
HARDWARE_INTRINSIC(Sve, Count8BitElements, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)
27+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, false, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
28+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
29+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
30+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
31+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
32+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, false, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
33+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
34+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
35+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
36+
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_ReturnsPerElementMask)
3337
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
3438
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
3539
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3295,6 +3295,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
32953295
case NI_Sve_CreateTrueMaskUInt16:
32963296
case NI_Sve_CreateTrueMaskUInt32:
32973297
case NI_Sve_CreateTrueMaskUInt64:
3298+
case NI_Sve_Count16BitElements:
3299+
case NI_Sve_Count32BitElements:
3300+
case NI_Sve_Count64BitElements:
3301+
case NI_Sve_Count8BitElements:
32983302
assert(hasImmediateOperand);
32993303
assert(varTypeIsIntegral(intrin.op1));
33003304
if (intrin.op1->IsCnsIntOrI())

src/coreclr/jit/lsraarm64.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,6 +1464,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
14641464
case NI_Sve_CreateTrueMaskUInt16:
14651465
case NI_Sve_CreateTrueMaskUInt32:
14661466
case NI_Sve_CreateTrueMaskUInt64:
1467+
case NI_Sve_Count16BitElements:
1468+
case NI_Sve_Count32BitElements:
1469+
case NI_Sve_Count64BitElements:
1470+
case NI_Sve_Count8BitElements:
14671471
needBranchTargetReg = !intrin.op1->isContainedIntOrIImmed();
14681472
break;
14691473

0 commit comments

Comments
 (0)