Skip to content

Commit 329bff4

Browse files
authored
ARM64-SVE: Fix conditional select for Zeroing predicates #102904 (#105737)
1 parent ae49f9e commit 329bff4

7 files changed

+113
-137
lines changed

src/coreclr/jit/hwintrinsic.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -236,16 +236,18 @@ enum HWIntrinsicFlag : unsigned int
236236
// then the intrinsic should be switched to a scalar only version.
237237
HW_Flag_HasScalarInputVariant = 0x2000000,
238238

239+
// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register.
240+
HW_Flag_LowVectorOperation = 0x4000000,
241+
242+
// The intrinsic uses a mask in arg1 to select elements present in the result, which zeros inactive elements
243+
// (instead of merging).
244+
HW_Flag_ZeroingMaskedOperation = 0x8000000,
245+
239246
#endif // TARGET_XARCH
240247

241248
// The intrinsic is a FusedMultiplyAdd intrinsic
242249
HW_Flag_FmaIntrinsic = 0x40000000,
243250

244-
#if defined(TARGET_ARM64)
245-
// The intrinsic uses a mask in arg1 to select elements present in the result, and must use a low vector register.
246-
HW_Flag_LowVectorOperation = 0x4000000,
247-
#endif
248-
249251
HW_Flag_CanBenefitFromConstantProp = 0x80000000,
250252
};
251253

@@ -984,6 +986,12 @@ struct HWIntrinsicInfo
984986
return (flags & HW_Flag_HasScalarInputVariant) != 0;
985987
}
986988

989+
static bool IsZeroingMaskedOperation(NamedIntrinsic id)
990+
{
991+
const HWIntrinsicFlag flags = lookupFlags(id);
992+
return (flags & HW_Flag_ZeroingMaskedOperation) != 0;
993+
}
994+
987995
static NamedIntrinsic GetScalarInputVariant(NamedIntrinsic id)
988996
{
989997
assert(HasScalarInputVariant(id));

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 87 additions & 87 deletions
Large diffs are not rendered by default.

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4016,9 +4016,17 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode)
40164016
// `trueValue`
40174017
GenTreeHWIntrinsic* nestedCndSel = op2->AsHWIntrinsic();
40184018
GenTree* nestedOp1 = nestedCndSel->Op(1);
4019+
GenTree* nestedOp2 = nestedCndSel->Op(2);
40194020
assert(varTypeIsMask(nestedOp1));
4021+
assert(nestedOp2->OperIsHWIntrinsic());
40204022

4021-
if (nestedOp1->IsMaskAllBitsSet())
4023+
NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId();
4024+
4025+
// If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if
4026+
// op3 is all zeros.
4027+
4028+
if (nestedOp1->IsMaskAllBitsSet() &&
4029+
(!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero()))
40224030
{
40234031
GenTree* nestedOp2 = nestedCndSel->Op(2);
40244032
GenTree* nestedOp3 = nestedCndSel->Op(3);

src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveGatherVectorByteOffsets.template

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
5656
test.RunStructFldScenario();
5757

5858
// Validates using inside ConditionalSelect with value falseValue
59-
// Currently, using this operation in ConditionalSelect() gives incorrect result
60-
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
61-
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
62-
// instructions should be marked similar to RMW (a different flag name) to make sure that
63-
// we do not assign falseReg/targetReg same. Then, we would do something like this:
64-
//
65-
// ldnf1sh target, pg/z, [x0]
66-
// sel mask, target, target, falseReg
67-
//
68-
// This needs more careful thinking, so disabling it for now.
69-
// test.ConditionalSelect_FalseOp();
59+
test.ConditionalSelect_FalseOp();
7060

7161
// Validates using inside ConditionalSelect with zero falseValue
7262
test.ConditionalSelect_ZeroOp();

src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveGatherVectorIndices.template

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
5656
test.RunStructFldScenario();
5757

5858
// Validates using inside ConditionalSelect with value falseValue
59-
// Currently, using this operation in ConditionalSelect() gives incorrect result
60-
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
61-
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
62-
// instructions should be marked similar to RMW (a different flag name) to make sure that
63-
// we do not assign falseReg/targetReg same. Then, we would do something like this:
64-
//
65-
// ldnf1sh target, pg/z, [x0]
66-
// sel mask, target, target, falseReg
67-
//
68-
// This needs more careful thinking, so disabling it for now.
69-
// test.ConditionalSelect_FalseOp();
59+
test.ConditionalSelect_FalseOp();
7060

7161
// Validates using inside ConditionalSelect with zero falseValue
7262
test.ConditionalSelect_ZeroOp();

src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveGatherVectorVectorBases.template

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm
5656
test.RunStructFldScenario();
5757

5858
// Validates using inside ConditionalSelect with value falseValue
59-
// Currently, using this operation in ConditionalSelect() gives incorrect result
60-
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
61-
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
62-
// instructions should be marked similar to RMW (a different flag name) to make sure that
63-
// we do not assign falseReg/targetReg same. Then, we would do something like this:
64-
//
65-
// ldnf1sh target, pg/z, [x0]
66-
// sel mask, target, target, falseReg
67-
//
68-
// This needs more careful thinking, so disabling it for now.
69-
// test.ConditionalSelect_FalseOp();
59+
test.ConditionalSelect_FalseOp();
7060

7161
// Validates using inside ConditionalSelect with zero falseValue
7262
test.ConditionalSelect_ZeroOp();

src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,7 @@ namespace JIT.HardwareIntrinsics.Arm
4747
test.RunStructFldScenario();
4848

4949
// Validates using inside ConditionalSelect with value falseValue
50-
// Currently, using this operation in ConditionalSelect() gives incorrect result
51-
// when falseReg == targetReg because this instruction uses Pg/Z to update the targetReg
52-
// instead of Pg/M to merge it. As such, the value of falseReg is lost. Ideally, such
53-
// instructions should be marked similar to RMW (a different flag name) to make sure that
54-
// we do not assign falseReg/targetReg same. Then, we would do something like this:
55-
//
56-
// ldnf1sh target, pg/z, [x0]
57-
// sel mask, target, target, falseReg
58-
//
59-
// This needs more careful thinking, so disabling it for now.
60-
// test.ConditionalSelect_FalseOp();
50+
test.ConditionalSelect_FalseOp();
6151

6252
// Validates using inside ConditionalSelect with zero falseValue
6353
test.ConditionalSelect_ZeroOp();

0 commit comments

Comments
 (0)