Skip to content

Commit d77b37b

Browse files
authored
Arm64/Sve: Add SignExtendWidening* and ZeroExtendWidening* math APIs (#101743)
* Add SignExtendWidening* and ZeroExtendWidening* APIs * Map API to instruction * Add missing SignExtendWidening* APIs * Add test cases * fix bugs * Forgot to push the template * jit format * Fix the LargeVectorSize
1 parent 6cb4820 commit d77b37b

File tree

6 files changed

+229
-29
lines changed

6 files changed

+229
-29
lines changed

src/coreclr/jit/emitarm64sve.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2185,7 +2185,7 @@ void emitter::emitInsSve_R_R(instruction ins,
21852185
case INS_sve_uunpkhi:
21862186
case INS_sve_uunpklo:
21872187
assert(insScalableOptsNone(sopt));
2188-
assert(insOptsScalableAtLeastHalf(opt));
2188+
assert(insOptsScalableWide(opt));
21892189
assert(isVectorRegister(reg1));
21902190
assert(isVectorRegister(reg2));
21912191
assert(isScalableVectorSize(size));
@@ -11805,7 +11805,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
1180511805

1180611806
case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
1180711807
case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements
11808-
case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
1180911808
case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
1181011809
code = emitInsCodeSve(ins, fmt);
1181111810
code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
@@ -11814,6 +11813,13 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
1181411813
dst += emitOutput_Instr(dst, code);
1181511814
break;
1181611815

11816+
case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
11817+
code = emitInsCodeSve(ins, fmt);
11818+
code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
11819+
code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
11820+
code |= insEncodeSveElemsize(optGetSveElemsize((insOpts)(id->idInsOpt() + 1))); // xx
11821+
dst += emitOutput_Instr(dst, code);
11822+
break;
1181711823
case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated)
1181811824
case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert
1181911825
case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate
@@ -14389,7 +14395,7 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
1438914395
case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator
1439014396
case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
1439114397
case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated)
14392-
assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
14398+
assert(insOptsScalableWide(id->idInsOpt()));
1439314399
assert(isVectorRegister(id->idReg1()));
1439414400
assert(isVectorRegister(id->idReg2()));
1439514401
break;
@@ -16231,8 +16237,8 @@ void emitter::emitDispInsSveHelp(instrDesc* id)
1623116237
case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts
1623216238
// <Zd>.<T>, <Zn>.<Tb>
1623316239
case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements
16234-
emitDispSveReg(id->idReg1(), id->idInsOpt(), true);
16235-
emitDispSveReg(id->idReg2(), (insOpts)((unsigned)id->idInsOpt() - 1), false);
16240+
emitDispSveReg(id->idReg1(), (insOpts)(id->idInsOpt() + 1), true);
16241+
emitDispSveReg(id->idReg2(), id->idInsOpt(), false);
1623616242
break;
1623716243

1623816244
// <Zd>.<T>, <Zn>.<T>

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,16 @@ HARDWARE_INTRINSIC(Sve, Multiply,
7373
HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
7474
HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
7575
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
76+
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
77+
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
7678
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
7779
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
7880
HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
7981
HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
8082
HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
8183
HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
84+
HARDWARE_INTRINSIC(Sve, ZeroExtendWideningLower, -1, 1, true, {INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_sve_uunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
85+
HARDWARE_INTRINSIC(Sve, ZeroExtendWideningUpper, -1, 1, true, {INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_sve_uunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
8286
HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, true, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
8387
HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, true, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
8488

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,48 @@ internal Arm64() { }
12761276
/// </summary>
12771277
public static unsafe Vector<double> Subtract(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }
12781278

1279+
/// SignExtendWideningLower : Unpack and extend low half
1280+
1281+
/// <summary>
1282+
/// svint16_t svunpklo[_s16](svint8_t op)
1283+
/// SUNPKLO Zresult.H, Zop.B
1284+
/// </summary>
1285+
public static unsafe Vector<short> SignExtendWideningLower(Vector<sbyte> value) { throw new PlatformNotSupportedException(); }
1286+
1287+
/// <summary>
1288+
/// svint32_t svunpklo[_s32](svint16_t op)
1289+
/// SUNPKLO Zresult.S, Zop.H
1290+
/// </summary>
1291+
public static unsafe Vector<int> SignExtendWideningLower(Vector<short> value) { throw new PlatformNotSupportedException(); }
1292+
1293+
/// <summary>
1294+
/// svint64_t svunpklo[_s64](svint32_t op)
1295+
/// SUNPKLO Zresult.D, Zop.S
1296+
/// </summary>
1297+
public static unsafe Vector<long> SignExtendWideningLower(Vector<int> value) { throw new PlatformNotSupportedException(); }
1298+
1299+
1300+
/// SignExtendWideningUpper : Unpack and extend high half
1301+
1302+
/// <summary>
1303+
/// svint16_t svunpkhi[_s16](svint8_t op)
1304+
/// SUNPKHI Zresult.H, Zop.B
1305+
/// </summary>
1306+
public static unsafe Vector<short> SignExtendWideningUpper(Vector<sbyte> value) { throw new PlatformNotSupportedException(); }
1307+
1308+
/// <summary>
1309+
/// svint32_t svunpkhi[_s32](svint16_t op)
1310+
/// SUNPKHI Zresult.S, Zop.H
1311+
/// </summary>
1312+
public static unsafe Vector<int> SignExtendWideningUpper(Vector<short> value) { throw new PlatformNotSupportedException(); }
1313+
1314+
/// <summary>
1315+
/// svint64_t svunpkhi[_s64](svint32_t op)
1316+
/// SUNPKHI Zresult.D, Zop.S
1317+
/// </summary>
1318+
public static unsafe Vector<long> SignExtendWideningUpper(Vector<int> value) { throw new PlatformNotSupportedException(); }
1319+
1320+
12791321
/// UnzipEven : Concatenate even elements from two inputs
12801322

12811323
/// <summary>
@@ -1469,13 +1511,52 @@ internal Arm64() { }
14691511
/// </summary>
14701512
public static unsafe Vector<ulong> ZeroExtend8(Vector<ulong> value) { throw new PlatformNotSupportedException(); }
14711513

1514+
/// ZeroExtendWideningLower : Unpack and extend low half
1515+
1516+
/// <summary>
1517+
/// svuint16_t svunpklo[_u16](svuint8_t op)
1518+
/// UUNPKLO Zresult.H, Zop.B
1519+
/// </summary>
1520+
public static unsafe Vector<ushort> ZeroExtendWideningLower(Vector<byte> value) { throw new PlatformNotSupportedException(); }
1521+
1522+
/// <summary>
1523+
/// svuint32_t svunpklo[_u32](svuint16_t op)
1524+
/// UUNPKLO Zresult.S, Zop.H
1525+
/// </summary>
1526+
public static unsafe Vector<uint> ZeroExtendWideningLower(Vector<ushort> value) { throw new PlatformNotSupportedException(); }
1527+
1528+
/// <summary>
1529+
/// svuint64_t svunpklo[_u64](svuint32_t op)
1530+
/// UUNPKLO Zresult.D, Zop.S
1531+
/// </summary>
1532+
public static unsafe Vector<ulong> ZeroExtendWideningLower(Vector<uint> value) { throw new PlatformNotSupportedException(); }
1533+
1534+
1535+
/// ZeroExtendWideningUpper : Unpack and extend high half
1536+
1537+
/// <summary>
1538+
/// svuint16_t svunpkhi[_u16](svuint8_t op)
1539+
/// UUNPKHI Zresult.H, Zop.B
1540+
/// </summary>
1541+
public static unsafe Vector<ushort> ZeroExtendWideningUpper(Vector<byte> value) { throw new PlatformNotSupportedException(); }
1542+
1543+
/// <summary>
1544+
/// svuint32_t svunpkhi[_u32](svuint16_t op)
1545+
/// UUNPKHI Zresult.S, Zop.H
1546+
/// </summary>
1547+
public static unsafe Vector<uint> ZeroExtendWideningUpper(Vector<ushort> value) { throw new PlatformNotSupportedException(); }
1548+
1549+
/// <summary>
1550+
/// svuint64_t svunpkhi[_u64](svuint32_t op)
1551+
/// UUNPKHI Zresult.D, Zop.S
1552+
/// </summary>
1553+
public static unsafe Vector<ulong> ZeroExtendWideningUpper(Vector<uint> value) { throw new PlatformNotSupportedException(); }
1554+
14721555
/// ZipHigh : Interleave elements from high halves of two inputs
14731556

14741557
/// <summary>
14751558
/// svuint8_t svzip2[_u8](svuint8_t op1, svuint8_t op2)
14761559
/// ZIP2 Zresult.B, Zop1.B, Zop2.B
1477-
/// svbool_t svzip2_b8(svbool_t op1, svbool_t op2)
1478-
/// ZIP2 Presult.B, Pop1.B, Pop2.B
14791560
/// </summary>
14801561
public static unsafe Vector<byte> ZipHigh(Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }
14811562

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,48 @@ internal Arm64() { }
12071207
/// </summary>
12081208
public static unsafe Vector<long> SignExtend8(Vector<long> value) => SignExtend8(value);
12091209

1210+
1211+
/// SignExtendWideningLower : Unpack and extend low half
1212+
1213+
/// <summary>
1214+
/// svint16_t svunpklo[_s16](svint8_t op)
1215+
/// SUNPKLO Zresult.H, Zop.B
1216+
/// </summary>
1217+
public static unsafe Vector<short> SignExtendWideningLower(Vector<sbyte> value) => SignExtendWideningLower(value);
1218+
1219+
/// <summary>
1220+
/// svint32_t svunpklo[_s32](svint16_t op)
1221+
/// SUNPKLO Zresult.S, Zop.H
1222+
/// </summary>
1223+
public static unsafe Vector<int> SignExtendWideningLower(Vector<short> value) => SignExtendWideningLower(value);
1224+
1225+
/// <summary>
1226+
/// svint64_t svunpklo[_s64](svint32_t op)
1227+
/// SUNPKLO Zresult.D, Zop.S
1228+
/// </summary>
1229+
public static unsafe Vector<long> SignExtendWideningLower(Vector<int> value) => SignExtendWideningLower(value);
1230+
1231+
1232+
/// SignExtendWideningUpper : Unpack and extend high half
1233+
1234+
/// <summary>
1235+
/// svint16_t svunpkhi[_s16](svint8_t op)
1236+
/// SUNPKHI Zresult.H, Zop.B
1237+
/// </summary>
1238+
public static unsafe Vector<short> SignExtendWideningUpper(Vector<sbyte> value) => SignExtendWideningUpper(value);
1239+
1240+
/// <summary>
1241+
/// svint32_t svunpkhi[_s32](svint16_t op)
1242+
/// SUNPKHI Zresult.S, Zop.H
1243+
/// </summary>
1244+
public static unsafe Vector<int> SignExtendWideningUpper(Vector<short> value) => SignExtendWideningUpper(value);
1245+
1246+
/// <summary>
1247+
/// svint64_t svunpkhi[_s64](svint32_t op)
1248+
/// SUNPKHI Zresult.D, Zop.S
1249+
/// </summary>
1250+
public static unsafe Vector<long> SignExtendWideningUpper(Vector<int> value) => SignExtendWideningUpper(value);
1251+
12101252
/// Subtract : Subtract
12111253

12121254
/// <summary>
@@ -1552,16 +1594,55 @@ internal Arm64() { }
15521594
/// </summary>
15531595
public static unsafe Vector<ulong> ZeroExtend8(Vector<ulong> value) => ZeroExtend8(value);
15541596

1597+
/// ZeroExtendWideningLower : Unpack and extend low half
1598+
1599+
/// <summary>
1600+
/// svuint16_t svunpklo[_u16](svuint8_t op)
1601+
/// UUNPKLO Zresult.H, Zop.B
1602+
/// </summary>
1603+
public static unsafe Vector<ushort> ZeroExtendWideningLower(Vector<byte> value) => ZeroExtendWideningLower(value);
1604+
1605+
/// <summary>
1606+
/// svuint32_t svunpklo[_u32](svuint16_t op)
1607+
/// UUNPKLO Zresult.S, Zop.H
1608+
/// </summary>
1609+
public static unsafe Vector<uint> ZeroExtendWideningLower(Vector<ushort> value) => ZeroExtendWideningLower(value);
1610+
1611+
/// <summary>
1612+
/// svuint64_t svunpklo[_u64](svuint32_t op)
1613+
/// UUNPKLO Zresult.D, Zop.S
1614+
/// </summary>
1615+
public static unsafe Vector<ulong> ZeroExtendWideningLower(Vector<uint> value) => ZeroExtendWideningLower(value);
1616+
1617+
1618+
/// ZeroExtendWideningUpper : Unpack and extend high half
1619+
1620+
/// <summary>
1621+
/// svuint16_t svunpkhi[_u16](svuint8_t op)
1622+
/// UUNPKHI Zresult.H, Zop.B
1623+
/// </summary>
1624+
public static unsafe Vector<ushort> ZeroExtendWideningUpper(Vector<byte> value) => ZeroExtendWideningUpper(value);
1625+
1626+
/// <summary>
1627+
/// svuint32_t svunpkhi[_u32](svuint16_t op)
1628+
/// UUNPKHI Zresult.S, Zop.H
1629+
/// svbool_t svunpkhi[_b](svbool_t op)
1630+
/// PUNPKHI Presult.H, Pop.B
1631+
/// </summary>
1632+
public static unsafe Vector<uint> ZeroExtendWideningUpper(Vector<ushort> value) => ZeroExtendWideningUpper(value);
1633+
1634+
/// <summary>
1635+
/// svuint64_t svunpkhi[_u64](svuint32_t op)
1636+
/// UUNPKHI Zresult.D, Zop.S
1637+
/// </summary>
1638+
public static unsafe Vector<ulong> ZeroExtendWideningUpper(Vector<uint> value) => ZeroExtendWideningUpper(value);
1639+
15551640
/// ZipHigh : Interleave elements from high halves of two inputs
15561641

15571642
/// <summary>
15581643
/// svuint8_t svzip2[_u8](svuint8_t op1, svuint8_t op2)
15591644
/// ZIP2 Zresult.B, Zop1.B, Zop2.B
1560-
/// svbool_t svzip2_b8(svbool_t op1, svbool_t op2)
1561-
/// ZIP2 Presult.B, Pop1.B, Pop2.B
15621645
/// </summary>
1563-
1564-
15651646
public static unsafe Vector<byte> ZipHigh(Vector<byte> left, Vector<byte> right) => ZipHigh(left, right);
15661647

15671648
/// <summary>

0 commit comments

Comments
 (0)