23
23
// codeGen -- an instance of CodeGen class.
24
24
// immOp -- an immediate operand of the intrinsic.
25
25
// intrin -- a hardware intrinsic tree node.
26
+ // numInstrs -- number of instructions that will be in each switch entry. Default 1.
26
27
//
27
28
// Note: This class is designed to be used in the following way
28
29
// HWIntrinsicImmOpHelper helper(this, immOp, intrin);
35
36
// This allows to combine logic for cases when immOp->isContainedIntOrIImmed() is either true or false in a form
36
37
// of a for-loop.
37
38
//
38
- CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper (CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin)
39
+ CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper (CodeGen* codeGen,
40
+ GenTree* immOp,
41
+ GenTreeHWIntrinsic* intrin,
42
+ int numInstrs)
39
43
: codeGen(codeGen)
40
44
, endLabel(nullptr )
41
45
, nonZeroLabel(nullptr )
42
46
, branchTargetReg(REG_NA)
47
+ , numInstrs(numInstrs)
43
48
{
44
49
assert (codeGen != nullptr );
45
50
assert (varTypeIsIntegral (immOp));
@@ -132,6 +137,7 @@ CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(
132
137
, immUpperBound(immUpperBound)
133
138
, nonConstImmReg(immReg)
134
139
, branchTargetReg(REG_NA)
140
+ , numInstrs(1 )
135
141
{
136
142
assert (codeGen != nullptr );
137
143
@@ -181,18 +187,32 @@ void CodeGen::HWIntrinsicImmOpHelper::EmitBegin()
181
187
}
182
188
else
183
189
{
184
- // Here we assume that each case consists of one arm64 instruction followed by "b endLabel".
190
+ assert (numInstrs == 1 || numInstrs == 2 );
191
+
192
+ // Here we assume that each case consists of numInstrs arm64 instructions followed by "b endLabel".
185
193
// Since an arm64 instruction is 4 bytes, we branch to AddressOf(beginLabel) + (nonConstImmReg << 3).
186
194
GetEmitter ()->emitIns_R_L (INS_adr, EA_8BYTE, beginLabel, branchTargetReg);
187
195
GetEmitter ()->emitIns_R_R_R_I (INS_add, EA_8BYTE, branchTargetReg, branchTargetReg, nonConstImmReg, 3 ,
188
196
INS_OPTS_LSL);
189
197
198
+ // For two instructions, add the extra one.
199
+ if (numInstrs == 2 )
200
+ {
201
+ GetEmitter ()->emitIns_R_R_R_I (INS_add, EA_8BYTE, branchTargetReg, branchTargetReg, nonConstImmReg, 2 ,
202
+ INS_OPTS_LSL);
203
+ }
204
+
190
205
// If the lower bound is non zero we need to adjust the branch target value by subtracting
191
- // (immLowerBound << 3).
206
+ // the lower bound
192
207
if (immLowerBound != 0 )
193
208
{
194
- GetEmitter ()->emitIns_R_R_I (INS_sub, EA_8BYTE, branchTargetReg, branchTargetReg,
195
- ((ssize_t )immLowerBound << 3 ));
209
+ ssize_t lowerReduce = ((ssize_t )immLowerBound << 3 );
210
+ if (numInstrs == 2 )
211
+ {
212
+ lowerReduce += ((ssize_t )immLowerBound << 2 );
213
+ }
214
+
215
+ GetEmitter ()->emitIns_R_R_I (INS_sub, EA_8BYTE, branchTargetReg, branchTargetReg, lowerReduce);
196
216
}
197
217
198
218
GetEmitter ()->emitIns_R (INS_br, EA_8BYTE, branchTargetReg);
@@ -516,6 +536,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
516
536
}
517
537
518
538
// Shared code for setting up embedded mask arg for intrinsics with 3+ operands
539
+
540
+ auto emitEmbeddedMaskSetupInstrs = [&] {
541
+ if (intrin.op3 ->IsVectorZero () || (targetReg != falseReg) || (targetReg != embMaskOp1Reg))
542
+ {
543
+ return 1 ;
544
+ }
545
+ return 0 ;
546
+ };
547
+
519
548
auto emitEmbeddedMaskSetup = [&] {
520
549
if (intrin.op3 ->IsVectorZero ())
521
550
{
@@ -721,6 +750,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
721
750
}
722
751
};
723
752
753
+ auto emitInsMovPrfxHelper = [&](regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4) {
754
+ if (hasShift)
755
+ {
756
+ HWIntrinsicImmOpHelper helper (this , intrinEmbMask.op2 , op2->AsHWIntrinsic (), 2 );
757
+ for (helper.EmitBegin (); !helper.Done (); helper.EmitCaseEnd ())
758
+ {
759
+ GetEmitter ()->emitIns_R_R_R (INS_sve_movprfx, EA_SCALABLE, reg1, reg2, reg3, opt);
760
+ GetEmitter ()->emitInsSve_R_R_I (insEmbMask, emitSize, reg1, reg2, helper.ImmValue (),
761
+ embOpt, sopt);
762
+ }
763
+ }
764
+ else
765
+ {
766
+ GetEmitter ()->emitIns_R_R_R (INS_sve_movprfx, EA_SCALABLE, reg1, reg2, reg3, opt);
767
+ GetEmitter ()->emitIns_R_R_R (insEmbMask, emitSize, reg1, reg2, reg4, embOpt, sopt);
768
+ }
769
+ };
770
+
724
771
if (intrin.op3 ->IsVectorZero ())
725
772
{
726
773
// If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the
@@ -739,12 +786,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
739
786
740
787
default :
741
788
assert (targetReg != embMaskOp2Reg);
742
- GetEmitter ()->emitIns_R_R_R (INS_sve_movprfx, emitSize, targetReg, maskReg,
743
- embMaskOp1Reg, opt);
744
789
745
790
// Finally, perform the actual "predicated" operation so that `targetReg` is the first
746
791
// operand and `embMaskOp2Reg` is the second operand.
747
- emitInsHelper (targetReg, maskReg, embMaskOp2Reg);
792
+
793
+ emitInsMovPrfxHelper (targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg);
748
794
break ;
749
795
}
750
796
}
@@ -768,30 +814,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
768
814
// into targetReg. Next, do the predicated operation on the targetReg and last,
769
815
// use "sel" to select the active lanes based on mask, and set inactive lanes
770
816
// to falseReg.
771
-
772
817
assert (targetReg != embMaskOp2Reg);
773
818
assert (HWIntrinsicInfo::IsEmbeddedMaskedOperation (intrinEmbMask.id ));
774
819
775
- GetEmitter ()->emitIns_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);
776
-
777
- emitInsHelper (targetReg, maskReg, embMaskOp2Reg);
820
+ emitInsMovPrfxHelper (targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg);
778
821
}
779
822
780
823
GetEmitter ()->emitIns_R_R_R_R (INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
781
824
falseReg, opt);
782
- break ;
783
825
}
784
826
else if (targetReg != embMaskOp1Reg)
785
827
{
786
828
// embMaskOp1Reg is same as `falseReg`, but not same as `targetReg`. Move the
787
829
// `embMaskOp1Reg` i.e. `falseReg` in `targetReg`, using "unpredicated movprfx", so the
788
830
// subsequent `insEmbMask` operation can be merged on top of it.
789
- GetEmitter ()->emitIns_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg);
831
+ emitInsMovPrfxHelper (targetReg, maskReg, falseReg, embMaskOp2Reg);
832
+ }
833
+ else
834
+ {
835
+ // Finally, perform the actual "predicated" operation so that `targetReg` is the first
836
+ // operand and `embMaskOp2Reg` is the second operand.
837
+ emitInsHelper (targetReg, maskReg, embMaskOp2Reg);
790
838
}
791
-
792
- // Finally, perform the actual "predicated" operation so that `targetReg` is the first operand
793
- // and `embMaskOp2Reg` is the second operand.
794
- emitInsHelper (targetReg, maskReg, embMaskOp2Reg);
795
839
}
796
840
else
797
841
{
@@ -907,21 +951,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
907
951
}
908
952
}
909
953
910
- emitEmbeddedMaskSetup ();
911
-
912
954
// Finally, perform the desired operation.
913
955
if (HWIntrinsicInfo::HasImmediateOperand (intrinEmbMask.id ))
914
956
{
915
- HWIntrinsicImmOpHelper helper (this , intrinEmbMask.op3 , op2->AsHWIntrinsic ());
957
+ HWIntrinsicImmOpHelper helper (this , intrinEmbMask.op3 , op2->AsHWIntrinsic (),
958
+ emitEmbeddedMaskSetupInstrs () + 1 );
916
959
for (helper.EmitBegin (); !helper.Done (); helper.EmitCaseEnd ())
917
960
{
961
+ emitEmbeddedMaskSetup ();
918
962
GetEmitter ()->emitInsSve_R_R_R_I (insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
919
963
helper.ImmValue (), opt);
920
964
}
921
965
}
922
966
else
923
967
{
924
968
assert (HWIntrinsicInfo::IsFmaIntrinsic (intrinEmbMask.id ));
969
+ emitEmbeddedMaskSetup ();
925
970
GetEmitter ()->emitInsSve_R_R_R_R (insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
926
971
embMaskOp3Reg, opt);
927
972
}
@@ -935,11 +980,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
935
980
assert (intrinEmbMask.op4 ->isContained () == (embMaskOp4Reg == REG_NA));
936
981
assert (HWIntrinsicInfo::HasImmediateOperand (intrinEmbMask.id ));
937
982
938
- emitEmbeddedMaskSetup ();
939
-
940
- HWIntrinsicImmOpHelper helper (this , intrinEmbMask.op4 , op2->AsHWIntrinsic ());
983
+ HWIntrinsicImmOpHelper helper (this , intrinEmbMask.op4 , op2->AsHWIntrinsic (),
984
+ emitEmbeddedMaskSetupInstrs () + 1 );
941
985
for (helper.EmitBegin (); !helper.Done (); helper.EmitCaseEnd ())
942
986
{
987
+ emitEmbeddedMaskSetup ();
943
988
GetEmitter ()->emitInsSve_R_R_R_R_I (insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
944
989
embMaskOp3Reg, helper.ImmValue (), opt);
945
990
}
@@ -2333,17 +2378,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
2333
2378
{
2334
2379
assert (isRMW);
2335
2380
2336
- if (targetReg != op1Reg)
2337
- {
2338
- assert (targetReg != op2Reg);
2339
-
2340
- GetEmitter ()->emitIns_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2341
- }
2342
-
2343
2381
HWIntrinsicImmOpHelper helper (this , intrin.op3 , node);
2344
2382
2345
2383
for (helper.EmitBegin (); !helper.Done (); helper.EmitCaseEnd ())
2346
2384
{
2385
+ if (targetReg != op1Reg)
2386
+ {
2387
+ assert (targetReg != op2Reg);
2388
+
2389
+ GetEmitter ()->emitIns_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2390
+ }
2391
+
2347
2392
const int elementIndex = helper.ImmValue ();
2348
2393
const int byteIndex = genTypeSize (intrin.baseType ) * elementIndex;
2349
2394
@@ -2483,17 +2528,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
2483
2528
{
2484
2529
assert (isRMW);
2485
2530
2486
- if (targetReg != op1Reg)
2487
- {
2488
- assert (targetReg != op2Reg);
2489
-
2490
- GetEmitter ()->emitInsSve_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2491
- }
2492
-
2493
2531
HWIntrinsicImmOpHelper helper (this , intrin.op3 , node);
2494
2532
2495
2533
for (helper.EmitBegin (); !helper.Done (); helper.EmitCaseEnd ())
2496
2534
{
2535
+ if (targetReg != op1Reg)
2536
+ {
2537
+ assert (targetReg != op2Reg);
2538
+
2539
+ GetEmitter ()->emitInsSve_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2540
+ }
2541
+
2497
2542
GetEmitter ()->emitInsSve_R_R_I (ins, emitSize, targetReg, op2Reg, helper.ImmValue (), opt);
2498
2543
}
2499
2544
break ;
@@ -2504,16 +2549,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
2504
2549
assert (isRMW);
2505
2550
assert (hasImmediateOperand);
2506
2551
2507
- if (targetReg != op1Reg)
2508
- {
2509
- assert (targetReg != op2Reg);
2510
- assert (targetReg != op3Reg);
2511
- GetEmitter ()->emitInsSve_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2512
- }
2513
-
2514
2552
// If both immediates are constant, we don't need a jump table
2515
2553
if (intrin.op4 ->IsCnsIntOrI () && intrin.op5 ->IsCnsIntOrI ())
2516
2554
{
2555
+ if (targetReg != op1Reg)
2556
+ {
2557
+ assert (targetReg != op2Reg);
2558
+ assert (targetReg != op3Reg);
2559
+ GetEmitter ()->emitInsSve_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2560
+ }
2561
+
2517
2562
assert (intrin.op4 ->isContainedIntOrIImmed () && intrin.op5 ->isContainedIntOrIImmed ());
2518
2563
GetEmitter ()->emitInsSve_R_R_R_I_I (ins, emitSize, targetReg, op2Reg, op3Reg,
2519
2564
intrin.op4 ->AsIntCon ()->gtIconVal ,
@@ -2537,6 +2582,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
2537
2582
HWIntrinsicImmOpHelper helper (this , op4Reg, 0 , 7 , node);
2538
2583
for (helper.EmitBegin (); !helper.Done (); helper.EmitCaseEnd ())
2539
2584
{
2585
+ if (targetReg != op1Reg)
2586
+ {
2587
+ assert (targetReg != op2Reg);
2588
+ assert (targetReg != op3Reg);
2589
+ GetEmitter ()->emitInsSve_R_R (INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2590
+ }
2591
+
2540
2592
// Extract index and rotation from the immediate
2541
2593
const int value = helper.ImmValue ();
2542
2594
const ssize_t index = value & 1 ;
0 commit comments