Skip to content

Commit 0e74147

Browse files
authored
ARM64-SVE: Ensure MOVPRFX is next to SVE instruction in imm tables (#106125)
* ARM64-SVE: Ensure MOVPRFX is next to SVE instruction in immediate jump tables * Add emitInsMovPrfxHelper * Fix formatting * Restore a predicated movprfx use * Fix use of predicated movprfx
1 parent 506e749 commit 0e74147

File tree

2 files changed

+99
-46
lines changed

2 files changed

+99
-46
lines changed

src/coreclr/jit/codegen.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,7 @@ class CodeGen final : public CodeGenInterface
10101010
class HWIntrinsicImmOpHelper final
10111011
{
10121012
public:
1013-
HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin);
1013+
HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin, int numInstrs = 1);
10141014

10151015
HWIntrinsicImmOpHelper(
10161016
CodeGen* codeGen, regNumber immReg, int immLowerBound, int immUpperBound, GenTreeHWIntrinsic* intrin);
@@ -1058,6 +1058,7 @@ class CodeGen final : public CodeGenInterface
10581058
int immUpperBound;
10591059
regNumber nonConstImmReg;
10601060
regNumber branchTargetReg;
1061+
int numInstrs;
10611062
};
10621063

10631064
#endif // TARGET_ARM64

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

+97-45
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
// codeGen -- an instance of CodeGen class.
2424
// immOp -- an immediate operand of the intrinsic.
2525
// intrin -- a hardware intrinsic tree node.
26+
// numInstrs -- number of instructions that will be in each switch entry. Default 1.
2627
//
2728
// Note: This class is designed to be used in the following way
2829
// HWIntrinsicImmOpHelper helper(this, immOp, intrin);
@@ -35,11 +36,15 @@
3536
// This allows to combine logic for cases when immOp->isContainedIntOrIImmed() is either true or false in a form
3637
// of a for-loop.
3738
//
38-
CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin)
39+
CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen,
40+
GenTree* immOp,
41+
GenTreeHWIntrinsic* intrin,
42+
int numInstrs)
3943
: codeGen(codeGen)
4044
, endLabel(nullptr)
4145
, nonZeroLabel(nullptr)
4246
, branchTargetReg(REG_NA)
47+
, numInstrs(numInstrs)
4348
{
4449
assert(codeGen != nullptr);
4550
assert(varTypeIsIntegral(immOp));
@@ -132,6 +137,7 @@ CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(
132137
, immUpperBound(immUpperBound)
133138
, nonConstImmReg(immReg)
134139
, branchTargetReg(REG_NA)
140+
, numInstrs(1)
135141
{
136142
assert(codeGen != nullptr);
137143

@@ -181,18 +187,32 @@ void CodeGen::HWIntrinsicImmOpHelper::EmitBegin()
181187
}
182188
else
183189
{
184-
// Here we assume that each case consists of one arm64 instruction followed by "b endLabel".
190+
assert(numInstrs == 1 || numInstrs == 2);
191+
192+
// Here we assume that each case consists of numInstrs arm64 instructions followed by "b endLabel".
185193
// Since an arm64 instruction is 4 bytes, we branch to AddressOf(beginLabel) + (nonConstImmReg << 3).
186194
GetEmitter()->emitIns_R_L(INS_adr, EA_8BYTE, beginLabel, branchTargetReg);
187195
GetEmitter()->emitIns_R_R_R_I(INS_add, EA_8BYTE, branchTargetReg, branchTargetReg, nonConstImmReg, 3,
188196
INS_OPTS_LSL);
189197

198+
// For two instructions, add the extra one.
199+
if (numInstrs == 2)
200+
{
201+
GetEmitter()->emitIns_R_R_R_I(INS_add, EA_8BYTE, branchTargetReg, branchTargetReg, nonConstImmReg, 2,
202+
INS_OPTS_LSL);
203+
}
204+
190205
// If the lower bound is non zero we need to adjust the branch target value by subtracting
191-
// (immLowerBound << 3).
206+
// the lower bound
192207
if (immLowerBound != 0)
193208
{
194-
GetEmitter()->emitIns_R_R_I(INS_sub, EA_8BYTE, branchTargetReg, branchTargetReg,
195-
((ssize_t)immLowerBound << 3));
209+
ssize_t lowerReduce = ((ssize_t)immLowerBound << 3);
210+
if (numInstrs == 2)
211+
{
212+
lowerReduce += ((ssize_t)immLowerBound << 2);
213+
}
214+
215+
GetEmitter()->emitIns_R_R_I(INS_sub, EA_8BYTE, branchTargetReg, branchTargetReg, lowerReduce);
196216
}
197217

198218
GetEmitter()->emitIns_R(INS_br, EA_8BYTE, branchTargetReg);
@@ -516,6 +536,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
516536
}
517537

518538
// Shared code for setting up embedded mask arg for intrinsics with 3+ operands
539+
540+
auto emitEmbeddedMaskSetupInstrs = [&] {
541+
if (intrin.op3->IsVectorZero() || (targetReg != falseReg) || (targetReg != embMaskOp1Reg))
542+
{
543+
return 1;
544+
}
545+
return 0;
546+
};
547+
519548
auto emitEmbeddedMaskSetup = [&] {
520549
if (intrin.op3->IsVectorZero())
521550
{
@@ -721,6 +750,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
721750
}
722751
};
723752

753+
auto emitInsMovPrfxHelper = [&](regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4) {
754+
if (hasShift)
755+
{
756+
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2);
757+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
758+
{
759+
GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, reg2, reg3, opt);
760+
GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, helper.ImmValue(),
761+
embOpt, sopt);
762+
}
763+
}
764+
else
765+
{
766+
GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, reg2, reg3, opt);
767+
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, reg1, reg2, reg4, embOpt, sopt);
768+
}
769+
};
770+
724771
if (intrin.op3->IsVectorZero())
725772
{
726773
// If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the
@@ -739,12 +786,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
739786

740787
default:
741788
assert(targetReg != embMaskOp2Reg);
742-
GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg,
743-
embMaskOp1Reg, opt);
744789

745790
// Finally, perform the actual "predicated" operation so that `targetReg` is the first
746791
// operand and `embMaskOp2Reg` is the second operand.
747-
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
792+
793+
emitInsMovPrfxHelper(targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg);
748794
break;
749795
}
750796
}
@@ -768,30 +814,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
768814
// into targetReg. Next, do the predicated operation on the targetReg and last,
769815
// use "sel" to select the active lanes based on mask, and set inactive lanes
770816
// to falseReg.
771-
772817
assert(targetReg != embMaskOp2Reg);
773818
assert(HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinEmbMask.id));
774819

775-
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);
776-
777-
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
820+
emitInsMovPrfxHelper(targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg);
778821
}
779822

780823
GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
781824
falseReg, opt);
782-
break;
783825
}
784826
else if (targetReg != embMaskOp1Reg)
785827
{
786828
// embMaskOp1Reg is same as `falseReg`, but not same as `targetReg`. Move the
787829
// `embMaskOp1Reg` i.e. `falseReg` in `targetReg`, using "unpredicated movprfx", so the
788830
// subsequent `insEmbMask` operation can be merged on top of it.
789-
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg);
831+
emitInsMovPrfxHelper(targetReg, maskReg, falseReg, embMaskOp2Reg);
832+
}
833+
else
834+
{
835+
// Finally, perform the actual "predicated" operation so that `targetReg` is the first
836+
// operand and `embMaskOp2Reg` is the second operand.
837+
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
790838
}
791-
792-
// Finally, perform the actual "predicated" operation so that `targetReg` is the first operand
793-
// and `embMaskOp2Reg` is the second operand.
794-
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
795839
}
796840
else
797841
{
@@ -907,21 +951,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
907951
}
908952
}
909953

910-
emitEmbeddedMaskSetup();
911-
912954
// Finally, perform the desired operation.
913955
if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id))
914956
{
915-
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic());
957+
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic(),
958+
emitEmbeddedMaskSetupInstrs() + 1);
916959
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
917960
{
961+
emitEmbeddedMaskSetup();
918962
GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
919963
helper.ImmValue(), opt);
920964
}
921965
}
922966
else
923967
{
924968
assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id));
969+
emitEmbeddedMaskSetup();
925970
GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
926971
embMaskOp3Reg, opt);
927972
}
@@ -935,11 +980,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
935980
assert(intrinEmbMask.op4->isContained() == (embMaskOp4Reg == REG_NA));
936981
assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id));
937982

938-
emitEmbeddedMaskSetup();
939-
940-
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic());
983+
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic(),
984+
emitEmbeddedMaskSetupInstrs() + 1);
941985
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
942986
{
987+
emitEmbeddedMaskSetup();
943988
GetEmitter()->emitInsSve_R_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
944989
embMaskOp3Reg, helper.ImmValue(), opt);
945990
}
@@ -2333,17 +2378,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
23332378
{
23342379
assert(isRMW);
23352380

2336-
if (targetReg != op1Reg)
2337-
{
2338-
assert(targetReg != op2Reg);
2339-
2340-
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2341-
}
2342-
23432381
HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
23442382

23452383
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
23462384
{
2385+
if (targetReg != op1Reg)
2386+
{
2387+
assert(targetReg != op2Reg);
2388+
2389+
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2390+
}
2391+
23472392
const int elementIndex = helper.ImmValue();
23482393
const int byteIndex = genTypeSize(intrin.baseType) * elementIndex;
23492394

@@ -2483,17 +2528,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
24832528
{
24842529
assert(isRMW);
24852530

2486-
if (targetReg != op1Reg)
2487-
{
2488-
assert(targetReg != op2Reg);
2489-
2490-
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2491-
}
2492-
24932531
HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
24942532

24952533
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
24962534
{
2535+
if (targetReg != op1Reg)
2536+
{
2537+
assert(targetReg != op2Reg);
2538+
2539+
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2540+
}
2541+
24972542
GetEmitter()->emitInsSve_R_R_I(ins, emitSize, targetReg, op2Reg, helper.ImmValue(), opt);
24982543
}
24992544
break;
@@ -2504,16 +2549,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
25042549
assert(isRMW);
25052550
assert(hasImmediateOperand);
25062551

2507-
if (targetReg != op1Reg)
2508-
{
2509-
assert(targetReg != op2Reg);
2510-
assert(targetReg != op3Reg);
2511-
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2512-
}
2513-
25142552
// If both immediates are constant, we don't need a jump table
25152553
if (intrin.op4->IsCnsIntOrI() && intrin.op5->IsCnsIntOrI())
25162554
{
2555+
if (targetReg != op1Reg)
2556+
{
2557+
assert(targetReg != op2Reg);
2558+
assert(targetReg != op3Reg);
2559+
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2560+
}
2561+
25172562
assert(intrin.op4->isContainedIntOrIImmed() && intrin.op5->isContainedIntOrIImmed());
25182563
GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg,
25192564
intrin.op4->AsIntCon()->gtIconVal,
@@ -2537,6 +2582,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
25372582
HWIntrinsicImmOpHelper helper(this, op4Reg, 0, 7, node);
25382583
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
25392584
{
2585+
if (targetReg != op1Reg)
2586+
{
2587+
assert(targetReg != op2Reg);
2588+
assert(targetReg != op3Reg);
2589+
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2590+
}
2591+
25402592
// Extract index and rotation from the immediate
25412593
const int value = helper.ImmValue();
25422594
const ssize_t index = value & 1;

0 commit comments

Comments
 (0)