Skip to content

Commit 4752d12

Browse files
committed
ARM64-SVE: Fix conditional select for Zeroing predicates
1 parent 28c4dce commit 4752d12

7 files changed

+100
-129
lines changed

src/coreclr/jit/hwintrinsic.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -236,16 +236,17 @@ enum HWIntrinsicFlag : unsigned int
236236
// then the intrinsic should be switched to a scalar only version.
237237
HW_Flag_HasScalarInputVariant = 0x2000000,
238238

239+
// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register.
240+
HW_Flag_LowVectorOperation = 0x4000000,
241+
242+
// The intrinsic uses a mask in arg1 to select elements present in the result, which zeros inactive elements (instead of merging).
243+
HW_Flag_ZeroingMaskedOperation = 0x8000000,
244+
239245
#endif // TARGET_XARCH
240246

241247
// The intrinsic is a FusedMultiplyAdd intrinsic
242248
HW_Flag_FmaIntrinsic = 0x40000000,
243249

244-
#if defined(TARGET_ARM64)
245-
// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register.
246-
HW_Flag_LowVectorOperation = 0x4000000,
247-
#endif
248-
249250
HW_Flag_CanBenefitFromConstantProp = 0x80000000,
250251
};
251252

@@ -955,6 +956,12 @@ struct HWIntrinsicInfo
955956
return (flags & HW_Flag_HasScalarInputVariant) != 0;
956957
}
957958

959+
static bool IsZeroingMaskedOperation(NamedIntrinsic id)
960+
{
961+
const HWIntrinsicFlag flags = lookupFlags(id);
962+
return (flags & HW_Flag_ZeroingMaskedOperation) != 0;
963+
}
964+
958965
static NamedIntrinsic GetScalarInputVariant(NamedIntrinsic id)
959966
{
960967
assert(HasScalarInputVariant(id));

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 79 additions & 79 deletions
Large diffs are not rendered by default.

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4028,9 +4028,13 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode)
40284028
// `trueValue`
40294029
GenTreeHWIntrinsic* nestedCndSel = op2->AsHWIntrinsic();
40304030
GenTree* nestedOp1 = nestedCndSel->Op(1);
4031+
GenTree* nestedOp2 = nestedCndSel->Op(2);
40314032
assert(varTypeIsMask(nestedOp1));
4033+
assert(nestedOp2->OperIsHWIntrinsic());
40324034

4033-
if (nestedOp1->IsMaskAllBitsSet())
4035+
NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId();
4036+
4037+
if (nestedOp1->IsMaskAllBitsSet() && !HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id))
40344038
{
40354039
GenTree* nestedOp2 = nestedCndSel->Op(2);
40364040
GenTree* nestedOp3 = nestedCndSel->Op(3);

src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveGatherVectorByteOffsets.template

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
5656
test.RunStructFldScenario();
5757

5858
// Validates using inside ConditionalSelect with value falseValue
59-
// Currently, using this operation in ConditionalSelect() gives incorrect result
60-
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
61-
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
62-
// instructions should be marked similar to RMW (a different flag name) to make sure that
63-
// we do not assign falseReg/targetReg same. Then, we would do something like this:
64-
//
65-
// ldnf1sh target, pg/z, [x0]
66-
// sel mask, target, target, falseReg
67-
//
68-
// This needs more careful thinking, so disabling it for now.
69-
// test.ConditionalSelect_FalseOp();
59+
test.ConditionalSelect_FalseOp();
7060

7161
// Validates using inside ConditionalSelect with zero falseValue
7262
test.ConditionalSelect_ZeroOp();

src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveGatherVectorIndices.template

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
5656
test.RunStructFldScenario();
5757

5858
// Validates using inside ConditionalSelect with value falseValue
59-
// Currently, using this operation in ConditionalSelect() gives incorrect result
60-
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
61-
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
62-
// instructions should be marked similar to RMW (a different flag name) to make sure that
63-
// we do not assign falseReg/targetReg same. Then, we would do something like this:
64-
//
65-
// ldnf1sh target, pg/z, [x0]
66-
// sel mask, target, target, falseReg
67-
//
68-
// This needs more careful thinking, so disabling it for now.
69-
// test.ConditionalSelect_FalseOp();
59+
test.ConditionalSelect_FalseOp();
7060

7161
// Validates using inside ConditionalSelect with zero falseValue
7262
test.ConditionalSelect_ZeroOp();

src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveGatherVectorVectorBases.template

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
5656
test.RunStructFldScenario();
5757

5858
// Validates using inside ConditionalSelect with value falseValue
59-
// Currently, using this operation in ConditionalSelect() gives incorrect result
60-
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
61-
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
62-
// instructions should be marked similar to RMW (a different flag name) to make sure that
63-
// we do not assign falseReg/targetReg same. Then, we would do something like this:
64-
//
65-
// ldnf1sh target, pg/z, [x0]
66-
// sel mask, target, target, falseReg
67-
//
68-
// This needs more careful thinking, so disabling it for now.
69-
// test.ConditionalSelect_FalseOp();
59+
test.ConditionalSelect_FalseOp();
7060

7161
// Validates using inside ConditionalSelect with zero falseValue
7262
test.ConditionalSelect_ZeroOp();

src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,7 @@ namespace JIT.HardwareIntrinsics.Arm
4747
test.RunStructFldScenario();
4848

4949
// Validates using inside ConditionalSelect with value falseValue
50-
// Currently, using this operation in ConditionalSelect() gives incorrect result
51-
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
52-
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
53-
// instructions should be marked similar to RMW (a different flag name) to make sure that
54-
// we do not assign falseReg/targetReg same. Then, we would do something like this:
55-
//
56-
// ldnf1sh target, pg/z, [x0]
57-
// sel mask, target, target, falseReg
58-
//
59-
// This needs more careful thinking, so disabling it for now.
60-
// test.ConditionalSelect_FalseOp();
50+
test.ConditionalSelect_FalseOp();
6151

6252
// Validates using inside ConditionalSelect with zero falseValue
6353
test.ConditionalSelect_ZeroOp();

0 commit comments

Comments
 (0)