Skip to content

Commit c4ebf36

Browse files
Add support for Sve.Splice() (#103567)
* Add support for Sve.ReverseBits() and Sve.Splice() * Remove Sve.Reverse() * Fix uses of op1 in lsra * Fix formatting issues * Fix comments for constructive splice * Ensure only destructive version of splice is emitted * Disable constructive splice tests
1 parent c14e5dd commit c4ebf36

File tree

12 files changed

+927
-19
lines changed

12 files changed

+927
-19
lines changed

src/coreclr/jit/codegenarm64test.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6237,10 +6237,12 @@ void CodeGen::genArm64EmitterUnitTestsSve()
62376237
INS_OPTS_SCALABLE_D); // REVW <Zd>.D, <Pg>/M, <Zn>.D
62386238

62396239
// IF_SVE_CV_3A
6240-
theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V0, REG_P0, REG_V30, INS_OPTS_SCALABLE_B,
6241-
INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // SPLICE <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
6242-
theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V3, REG_P7, REG_V27, INS_OPTS_SCALABLE_D,
6243-
INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // SPLICE <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
6240+
// TODO-SVE: Currently not supporting the constructive version of splice. Uncomment the tests on closing
6241+
// https://github.com/dotnet/runtime/issues/103850.
6242+
// theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V0,REG_P0, REG_V30, INS_OPTS_SCALABLE_B,
6243+
// INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // SPLICE <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
6244+
// theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V3, REG_P7, REG_V27, INS_OPTS_SCALABLE_D,
6245+
// INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // SPLICE <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
62446246

62456247
// IF_SVE_CV_3B
62466248
theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V1, REG_P1, REG_V29,

src/coreclr/jit/emitarm64sve.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3859,6 +3859,9 @@ void emitter::emitInsSve_R_R_R(instruction ins,
38593859
assert(isLowPredicateRegister(reg2));
38603860
assert(isVectorRegister(reg3));
38613861
assert(insOptsScalableStandard(opt));
3862+
// TODO-SVE: We currently support only the destructive version of splice. Remove the following assert when
3863+
// the constructive version is added, as described in https://github.com/dotnet/runtime/issues/103850.
3864+
assert(sopt != INS_SCALABLE_OPTS_WITH_VECTOR_PAIR);
38623865
fmt = (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR) ? IF_SVE_CV_3A : IF_SVE_CV_3B;
38633866
break;
38643867

@@ -10295,7 +10298,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
1029510298
dst += emitOutput_Instr(dst, code);
1029610299
break;
1029710300

10298-
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
10301+
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
1029910302
case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
1030010303
code = emitInsCodeSve(ins, fmt);
1030110304
code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
@@ -13258,7 +13261,7 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
1325813261
assert(isScalableVectorSize(id->idOpSize()));
1325913262
break;
1326013263

13261-
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
13264+
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
1326213265
case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
1326313266
assert(isScalableVectorSize(id->idOpSize())); // xx
1326413267
assert(insOptsScalableStandard(id->idInsOpt()));
@@ -14945,7 +14948,7 @@ void emitter::emitDispInsSveHelp(instrDesc* id)
1494514948
break;
1494614949

1494714950
// <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
14948-
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
14951+
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
1494914952
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
1495014953
emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // VVV
1495114954
emitDispSveConsecutiveRegList(id->idReg3(), insGetSveReg1ListSize(ins), id->idInsOpt(), false); // nnnnn
@@ -16806,7 +16809,7 @@ void emitter::getInsSveExecutionCharacteristics(instrDesc* id, insExecutionChara
1680616809
result.insLatency = PERFSCORE_LATENCY_140C;
1680716810
break;
1680816811

16809-
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
16812+
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
1681016813
case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
1681116814
result.insLatency = PERFSCORE_LATENCY_3C;
1681216815
result.insThroughput = PERFSCORE_THROUGHPUT_1C;

src/coreclr/jit/emitfmtsarm64sve.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ IF_DEF(SVE_CR_3A, IS_NONE, NONE) // SVE_CR_3A ........xx...... ...gggnnnnnddd
221221
IF_DEF(SVE_CS_3A, IS_NONE, NONE) // SVE_CS_3A ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
222222
IF_DEF(SVE_CT_3A, IS_NONE, NONE) // SVE_CT_3A ................ ...gggnnnnnddddd -- SVE reverse doublewords
223223
IF_DEF(SVE_CU_3A, IS_NONE, NONE) // SVE_CU_3A ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
224-
IF_DEF(SVE_CV_3A, IS_NONE, NONE) // SVE_CV_3A ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
224+
IF_DEF(SVE_CV_3A, IS_NONE, NONE) // SVE_CV_3A ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
225225
IF_DEF(SVE_CV_3B, IS_NONE, NONE) // SVE_CV_3B ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
226226
IF_DEF(SVE_CW_4A, IS_NONE, NONE) // SVE_CW_4A ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated)
227227
IF_DEF(SVE_CX_4A, IS_NONE, NONE) // SVE_CX_4A ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -853,15 +853,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
853853
case 3:
854854
if (isRMW)
855855
{
856-
if (targetReg != op1Reg)
856+
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
857857
{
858-
assert(targetReg != op2Reg);
859-
assert(targetReg != op3Reg);
858+
if (targetReg != op2Reg)
859+
{
860+
assert(targetReg != op1Reg);
861+
assert(targetReg != op3Reg);
860862

861-
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
862-
/* canSkip */ true);
863+
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg,
864+
/* canSkip */ true);
865+
}
866+
867+
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt);
868+
}
869+
else
870+
{
871+
if (targetReg != op1Reg)
872+
{
873+
assert(targetReg != op2Reg);
874+
assert(targetReg != op3Reg);
875+
876+
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
877+
/* canSkip */ true);
878+
}
879+
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
863880
}
864-
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
865881
}
866882
else
867883
{

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend32,
199199
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
200200
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
201201
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
202+
HARDWARE_INTRINSIC(Sve, Splice, -1, 3, true, {INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
202203
HARDWARE_INTRINSIC(Sve, Sqrt, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fsqrt, INS_sve_fsqrt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
203204
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
204205
HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)

src/coreclr/jit/lsraarm64.cpp

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
15091509
const bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler);
15101510

15111511
bool tgtPrefOp1 = false;
1512+
bool tgtPrefOp2 = false;
15121513
bool delayFreeMultiple = false;
15131514
if (intrin.op1 != nullptr)
15141515
{
@@ -1563,9 +1564,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
15631564

15641565
// If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers,
15651566
// we want to preference op1Reg to the target if op1 is not contained.
1566-
if (isRMW || simdRegToSimdRegMove)
1567+
1568+
if ((isRMW || simdRegToSimdRegMove))
15671569
{
1568-
tgtPrefOp1 = !intrin.op1->isContained();
1570+
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
1571+
{
1572+
assert(!simdRegToSimdRegMove);
1573+
// Prefer op2Reg for the masked operation as mask would be the op1Reg
1574+
tgtPrefOp2 = !intrin.op1->isContained();
1575+
}
1576+
else
1577+
{
1578+
tgtPrefOp1 = !intrin.op1->isContained();
1579+
}
15691580
}
15701581

15711582
if (delayFreeMultiple)
@@ -1948,6 +1959,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
19481959
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
19491960
}
19501961
}
1962+
else if (tgtPrefOp2)
1963+
{
1964+
if (!intrin.op2->isContained())
1965+
{
1966+
assert(tgtPrefUse == nullptr);
1967+
tgtPrefUse2 = BuildUse(intrin.op2);
1968+
srcCount++;
1969+
}
1970+
else
1971+
{
1972+
srcCount += BuildOperandUses(intrin.op2);
1973+
}
1974+
}
19511975
else
19521976
{
19531977
switch (intrin.id)
@@ -1991,12 +2015,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
19912015
{
19922016
SingleTypeRegSet candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE;
19932017

1994-
srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates)
1995-
: BuildOperandUses(intrin.op3, candidates);
2018+
if (isRMW)
2019+
{
2020+
srcCount += BuildDelayFreeUses(intrin.op3, (tgtPrefOp2 ? intrin.op2 : intrin.op1), candidates);
2021+
}
2022+
else
2023+
{
2024+
srcCount += BuildOperandUses(intrin.op3, candidates);
2025+
}
19962026

19972027
if (intrin.op4 != nullptr)
19982028
{
19992029
assert(lowVectorOperandNum != 4);
2030+
assert(!tgtPrefOp2);
20002031
srcCount += isRMW ? BuildDelayFreeUses(intrin.op4, intrin.op1) : BuildOperandUses(intrin.op4);
20012032
}
20022033
}

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5558,6 +5558,69 @@ internal Arm64() { }
55585558
public static unsafe Vector<long> SignExtend8(Vector<long> value) { throw new PlatformNotSupportedException(); }
55595559

55605560

5561+
/// Splice two vectors under predicate control
5562+
5563+
/// <summary>
5564+
/// svuint8_t svsplice[_u8](svbool_t pg, svuint8_t op1, svuint8_t op2)
5565+
/// SPLICE Ztied1.B, Pg, Ztied1.B, Zop2.B
5566+
/// </summary>
5567+
public static unsafe Vector<byte> Splice(Vector<byte> mask, Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }
5568+
5569+
/// <summary>
5570+
/// svfloat64_t svsplice[_f64](svbool_t pg, svfloat64_t op1, svfloat64_t op2)
5571+
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
5572+
/// </summary>
5573+
public static unsafe Vector<double> Splice(Vector<double> mask, Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }
5574+
5575+
/// <summary>
5576+
/// svint16_t svsplice[_s16](svbool_t pg, svint16_t op1, svint16_t op2)
5577+
/// SPLICE Ztied1.H, Pg, Ztied1.H, Zop2.H
5578+
/// </summary>
5579+
public static unsafe Vector<short> Splice(Vector<short> mask, Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }
5580+
5581+
/// <summary>
5582+
/// svint32_t svsplice[_s32](svbool_t pg, svint32_t op1, svint32_t op2)
5583+
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
5584+
/// </summary>
5585+
public static unsafe Vector<int> Splice(Vector<int> mask, Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }
5586+
5587+
/// <summary>
5588+
/// svint64_t svsplice[_s64](svbool_t pg, svint64_t op1, svint64_t op2)
5589+
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
5590+
/// </summary>
5591+
public static unsafe Vector<long> Splice(Vector<long> mask, Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }
5592+
5593+
/// <summary>
5594+
/// svint8_t svsplice[_s8](svbool_t pg, svint8_t op1, svint8_t op2)
5595+
/// SPLICE Ztied1.B, Pg, Ztied1.B, Zop2.B
5596+
/// </summary>
5597+
public static unsafe Vector<sbyte> Splice(Vector<sbyte> mask, Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }
5598+
5599+
/// <summary>
5600+
/// svfloat32_t svsplice[_f32](svbool_t pg, svfloat32_t op1, svfloat32_t op2)
5601+
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
5602+
/// </summary>
5603+
public static unsafe Vector<float> Splice(Vector<float> mask, Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }
5604+
5605+
/// <summary>
5606+
/// svuint16_t svsplice[_u16](svbool_t pg, svuint16_t op1, svuint16_t op2)
5607+
/// SPLICE Ztied1.H, Pg, Ztied1.H, Zop2.H
5608+
/// </summary>
5609+
public static unsafe Vector<ushort> Splice(Vector<ushort> mask, Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }
5610+
5611+
/// <summary>
5612+
/// svuint32_t svsplice[_u32](svbool_t pg, svuint32_t op1, svuint32_t op2)
5613+
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
5614+
/// </summary>
5615+
public static unsafe Vector<uint> Splice(Vector<uint> mask, Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }
5616+
5617+
/// <summary>
5618+
/// svuint64_t svsplice[_u64](svbool_t pg, svuint64_t op1, svuint64_t op2)
5619+
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
5620+
/// </summary>
5621+
public static unsafe Vector<ulong> Splice(Vector<ulong> mask, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }
5622+
5623+
55615624
/// Sqrt : Square root
55625625

55635626
/// <summary>

0 commit comments

Comments
 (0)