Skip to content

Commit 8d0ac71

Browse files
committed
ARM64-SVE: gathervector
1 parent 7393b6e commit 8d0ac71

File tree

9 files changed

+1020
-0
lines changed

9 files changed

+1020
-0
lines changed

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,6 +1607,12 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
16071607
assert(varTypeIsSIMD(op2->TypeGet()));
16081608
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd));
16091609
}
1610+
#elif defined(TARGET_ARM64)
1611+
if (intrinsic == NI_Sve_GatherVector)
1612+
{
1613+
assert(varTypeIsSIMD(op3->TypeGet()));
1614+
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd));
1615+
}
16101616
#endif
16111617
break;
16121618
}

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,6 +1845,37 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
18451845
break;
18461846
}
18471847

1848+
case NI_Sve_GatherVector:
1849+
{
1850+
if (!varTypeIsSIMD(intrin.op2->gtType))
1851+
{
1852+
// GatherVector(Vector<T> mask, T* address, Vector<T2> indices)
1853+
1854+
var_types auxType = node->GetAuxiliaryType();
1855+
emitAttr auxSize = emitActualTypeSize(auxType);
1856+
1857+
if (auxSize == EA_8BYTE)
1858+
{
1859+
opt = varTypeIsUnsigned(auxType) ? INS_OPTS_SCALABLE_D_UXTW : INS_OPTS_SCALABLE_D_SXTW;
1860+
}
1861+
else
1862+
{
1863+
assert(auxSize == EA_4BYTE);
1864+
opt = varTypeIsUnsigned(auxType) ? INS_OPTS_SCALABLE_S_UXTW : INS_OPTS_SCALABLE_S_SXTW;
1865+
}
1866+
1867+
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, INS_SCALABLE_OPTS_MOD_N);
1868+
}
1869+
else
1870+
{
1871+
// GatherVector(Vector<T> mask, Vector<T2> addresses)
1872+
1873+
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt);
1874+
}
1875+
1876+
break;
1877+
}
1878+
18481879
case NI_Sve_ReverseElement:
18491880
// Use non-predicated version explicitly
18501881
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt, INS_SCALABLE_OPTS_UNPREDICATED);

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated,
7373
HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
7474
HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation)
7575
HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
76+
77+
HARDWARE_INTRINSIC(Sve, GatherVector, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation)
78+
7679
HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, true, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation)
7780
HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
7881
HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,6 +1466,117 @@ internal Arm64() { }
14661466
public static unsafe Vector<float> FusedMultiplySubtractNegated(Vector<float> minuend, Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }
14671467

14681468

1469+
/// Unextended load
1470+
1471+
/// <summary>
1472+
/// svfloat64_t svld1_gather_[s64]index[_f64](svbool_t pg, const float64_t *base, svint64_t indices)
1473+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1474+
/// </summary>
1475+
public static unsafe Vector<double> GatherVector(Vector<double> mask, double* address, Vector<long> indices) { throw new PlatformNotSupportedException(); }
1476+
1477+
/// <summary>
1478+
/// svfloat64_t svld1_gather[_u64base]_f64(svbool_t pg, svuint64_t bases)
1479+
/// LD1D Zresult.D, Pg/Z, [Zbases.D, #0]
1480+
/// </summary>
1481+
public static unsafe Vector<double> GatherVector(Vector<double> mask, Vector<ulong> addresses) { throw new PlatformNotSupportedException(); }
1482+
1483+
/// <summary>
1484+
/// svfloat64_t svld1_gather_[u64]index[_f64](svbool_t pg, const float64_t *base, svuint64_t indices)
1485+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1486+
/// </summary>
1487+
public static unsafe Vector<double> GatherVector(Vector<double> mask, double* address, Vector<ulong> indices) { throw new PlatformNotSupportedException(); }
1488+
1489+
/// <summary>
1490+
/// svint32_t svld1_gather_[s32]index[_s32](svbool_t pg, const int32_t *base, svint32_t indices)
1491+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, SXTW #2]
1492+
/// </summary>
1493+
public static unsafe Vector<int> GatherVector(Vector<int> mask, int* address, Vector<int> indices) { throw new PlatformNotSupportedException(); }
1494+
1495+
/// <summary>
1496+
/// svint32_t svld1_gather[_u32base]_s32(svbool_t pg, svuint32_t bases)
1497+
/// LD1W Zresult.S, Pg/Z, [Zbases.S, #0]
1498+
/// </summary>
1499+
public static unsafe Vector<int> GatherVector(Vector<int> mask, Vector<uint> addresses) { throw new PlatformNotSupportedException(); }
1500+
1501+
/// <summary>
1502+
/// svint32_t svld1_gather_[u32]index[_s32](svbool_t pg, const int32_t *base, svuint32_t indices)
1503+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, UXTW #2]
1504+
/// </summary>
1505+
public static unsafe Vector<int> GatherVector(Vector<int> mask, int* address, Vector<uint> indices) { throw new PlatformNotSupportedException(); }
1506+
1507+
/// <summary>
1508+
/// svint64_t svld1_gather_[s64]index[_s64](svbool_t pg, const int64_t *base, svint64_t indices)
1509+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1510+
/// </summary>
1511+
public static unsafe Vector<long> GatherVector(Vector<long> mask, long* address, Vector<long> indices) { throw new PlatformNotSupportedException(); }
1512+
1513+
/// <summary>
1514+
/// svint64_t svld1_gather[_u64base]_s64(svbool_t pg, svuint64_t bases)
1515+
/// LD1D Zresult.D, Pg/Z, [Zbases.D, #0]
1516+
/// </summary>
1517+
public static unsafe Vector<long> GatherVector(Vector<long> mask, Vector<ulong> addresses) { throw new PlatformNotSupportedException(); }
1518+
1519+
/// <summary>
1520+
/// svint64_t svld1_gather_[u64]index[_s64](svbool_t pg, const int64_t *base, svuint64_t indices)
1521+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1522+
/// </summary>
1523+
public static unsafe Vector<long> GatherVector(Vector<long> mask, long* address, Vector<ulong> indices) { throw new PlatformNotSupportedException(); }
1524+
1525+
/// <summary>
1526+
/// svfloat32_t svld1_gather_[s32]index[_f32](svbool_t pg, const float32_t *base, svint32_t indices)
1527+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, SXTW #2]
1528+
/// </summary>
1529+
public static unsafe Vector<float> GatherVector(Vector<float> mask, float* address, Vector<int> indices) { throw new PlatformNotSupportedException(); }
1530+
1531+
/// <summary>
1532+
/// svfloat32_t svld1_gather[_u32base]_f32(svbool_t pg, svuint32_t bases)
1533+
/// LD1W Zresult.S, Pg/Z, [Zbases.S, #0]
1534+
/// </summary>
1535+
public static unsafe Vector<float> GatherVector(Vector<float> mask, Vector<uint> addresses) { throw new PlatformNotSupportedException(); }
1536+
1537+
/// <summary>
1538+
/// svfloat32_t svld1_gather_[u32]index[_f32](svbool_t pg, const float32_t *base, svuint32_t indices)
1539+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, UXTW #2]
1540+
/// </summary>
1541+
public static unsafe Vector<float> GatherVector(Vector<float> mask, float* address, Vector<uint> indices) { throw new PlatformNotSupportedException(); }
1542+
1543+
/// <summary>
1544+
/// svuint32_t svld1_gather_[s32]index[_u32](svbool_t pg, const uint32_t *base, svint32_t indices)
1545+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, SXTW #2]
1546+
/// </summary>
1547+
public static unsafe Vector<uint> GatherVector(Vector<uint> mask, uint* address, Vector<int> indices) { throw new PlatformNotSupportedException(); }
1548+
1549+
/// <summary>
1550+
/// svuint32_t svld1_gather[_u32base]_u32(svbool_t pg, svuint32_t bases)
1551+
/// LD1W Zresult.S, Pg/Z, [Zbases.S, #0]
1552+
/// </summary>
1553+
public static unsafe Vector<uint> GatherVector(Vector<uint> mask, Vector<uint> addresses) { throw new PlatformNotSupportedException(); }
1554+
1555+
/// <summary>
1556+
/// svuint32_t svld1_gather_[u32]index[_u32](svbool_t pg, const uint32_t *base, svuint32_t indices)
1557+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, UXTW #2]
1558+
/// </summary>
1559+
public static unsafe Vector<uint> GatherVector(Vector<uint> mask, uint* address, Vector<uint> indices) { throw new PlatformNotSupportedException(); }
1560+
1561+
/// <summary>
1562+
/// svuint64_t svld1_gather_[s64]index[_u64](svbool_t pg, const uint64_t *base, svint64_t indices)
1563+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1564+
/// </summary>
1565+
public static unsafe Vector<ulong> GatherVector(Vector<ulong> mask, ulong* address, Vector<long> indices) { throw new PlatformNotSupportedException(); }
1566+
1567+
/// <summary>
1568+
/// svuint64_t svld1_gather[_u64base]_u64(svbool_t pg, svuint64_t bases)
1569+
/// LD1D Zresult.D, Pg/Z, [Zbases.D, #0]
1570+
/// </summary>
1571+
public static unsafe Vector<ulong> GatherVector(Vector<ulong> mask, Vector<ulong> addresses) { throw new PlatformNotSupportedException(); }
1572+
1573+
/// <summary>
1574+
/// svuint64_t svld1_gather_[u64]index[_u64](svbool_t pg, const uint64_t *base, svuint64_t indices)
1575+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1576+
/// </summary>
1577+
public static unsafe Vector<ulong> GatherVector(Vector<ulong> mask, ulong* address, Vector<ulong> indices) { throw new PlatformNotSupportedException(); }
1578+
1579+
14691580
/// Count set predicate bits
14701581

14711582
/// <summary>

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,6 +1522,117 @@ internal Arm64() { }
15221522
public static unsafe Vector<float> FusedMultiplySubtractNegated(Vector<float> minuend, Vector<float> left, Vector<float> right) => FusedMultiplySubtractNegated(minuend, left, right);
15231523

15241524

1525+
/// Unextended load
1526+
1527+
/// <summary>
1528+
/// svfloat64_t svld1_gather_[s64]index[_f64](svbool_t pg, const float64_t *base, svint64_t indices)
1529+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1530+
/// </summary>
1531+
public static unsafe Vector<double> GatherVector(Vector<double> mask, double* address, Vector<long> indices) => GatherVector(mask, address, indices);
1532+
1533+
/// <summary>
1534+
/// svfloat64_t svld1_gather[_u64base]_f64(svbool_t pg, svuint64_t bases)
1535+
/// LD1D Zresult.D, Pg/Z, [Zbases.D, #0]
1536+
/// </summary>
1537+
public static unsafe Vector<double> GatherVector(Vector<double> mask, Vector<ulong> addresses) => GatherVector(mask, addresses);
1538+
1539+
/// <summary>
1540+
/// svfloat64_t svld1_gather_[u64]index[_f64](svbool_t pg, const float64_t *base, svuint64_t indices)
1541+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1542+
/// </summary>
1543+
public static unsafe Vector<double> GatherVector(Vector<double> mask, double* address, Vector<ulong> indices) => GatherVector(mask, address, indices);
1544+
1545+
/// <summary>
1546+
/// svint32_t svld1_gather_[s32]index[_s32](svbool_t pg, const int32_t *base, svint32_t indices)
1547+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, SXTW #2]
1548+
/// </summary>
1549+
public static unsafe Vector<int> GatherVector(Vector<int> mask, int* address, Vector<int> indices) => GatherVector(mask, address, indices);
1550+
1551+
/// <summary>
1552+
/// svint32_t svld1_gather[_u32base]_s32(svbool_t pg, svuint32_t bases)
1553+
/// LD1W Zresult.S, Pg/Z, [Zbases.S, #0]
1554+
/// </summary>
1555+
public static unsafe Vector<int> GatherVector(Vector<int> mask, Vector<uint> addresses) => GatherVector(mask, addresses);
1556+
1557+
/// <summary>
1558+
/// svint32_t svld1_gather_[u32]index[_s32](svbool_t pg, const int32_t *base, svuint32_t indices)
1559+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, UXTW #2]
1560+
/// </summary>
1561+
public static unsafe Vector<int> GatherVector(Vector<int> mask, int* address, Vector<uint> indices) => GatherVector(mask, address, indices);
1562+
1563+
/// <summary>
1564+
/// svint64_t svld1_gather_[s64]index[_s64](svbool_t pg, const int64_t *base, svint64_t indices)
1565+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1566+
/// </summary>
1567+
public static unsafe Vector<long> GatherVector(Vector<long> mask, long* address, Vector<long> indices) => GatherVector(mask, address, indices);
1568+
1569+
/// <summary>
1570+
/// svint64_t svld1_gather[_u64base]_s64(svbool_t pg, svuint64_t bases)
1571+
/// LD1D Zresult.D, Pg/Z, [Zbases.D, #0]
1572+
/// </summary>
1573+
public static unsafe Vector<long> GatherVector(Vector<long> mask, Vector<ulong> addresses) => GatherVector(mask, addresses);
1574+
1575+
/// <summary>
1576+
/// svint64_t svld1_gather_[u64]index[_s64](svbool_t pg, const int64_t *base, svuint64_t indices)
1577+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1578+
/// </summary>
1579+
public static unsafe Vector<long> GatherVector(Vector<long> mask, long* address, Vector<ulong> indices) => GatherVector(mask, address, indices);
1580+
1581+
/// <summary>
1582+
/// svfloat32_t svld1_gather_[s32]index[_f32](svbool_t pg, const float32_t *base, svint32_t indices)
1583+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, SXTW #2]
1584+
/// </summary>
1585+
public static unsafe Vector<float> GatherVector(Vector<float> mask, float* address, Vector<int> indices) => GatherVector(mask, address, indices);
1586+
1587+
/// <summary>
1588+
/// svfloat32_t svld1_gather[_u32base]_f32(svbool_t pg, svuint32_t bases)
1589+
/// LD1W Zresult.S, Pg/Z, [Zbases.S, #0]
1590+
/// </summary>
1591+
public static unsafe Vector<float> GatherVector(Vector<float> mask, Vector<uint> addresses) => GatherVector(mask, addresses);
1592+
1593+
/// <summary>
1594+
/// svfloat32_t svld1_gather_[u32]index[_f32](svbool_t pg, const float32_t *base, svuint32_t indices)
1595+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, UXTW #2]
1596+
/// </summary>
1597+
public static unsafe Vector<float> GatherVector(Vector<float> mask, float* address, Vector<uint> indices) => GatherVector(mask, address, indices);
1598+
1599+
/// <summary>
1600+
/// svuint32_t svld1_gather_[s32]index[_u32](svbool_t pg, const uint32_t *base, svint32_t indices)
1601+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, SXTW #2]
1602+
/// </summary>
1603+
public static unsafe Vector<uint> GatherVector(Vector<uint> mask, uint* address, Vector<int> indices) => GatherVector(mask, address, indices);
1604+
1605+
/// <summary>
1606+
/// svuint32_t svld1_gather[_u32base]_u32(svbool_t pg, svuint32_t bases)
1607+
/// LD1W Zresult.S, Pg/Z, [Zbases.S, #0]
1608+
/// </summary>
1609+
public static unsafe Vector<uint> GatherVector(Vector<uint> mask, Vector<uint> addresses) => GatherVector(mask, addresses);
1610+
1611+
/// <summary>
1612+
/// svuint32_t svld1_gather_[u32]index[_u32](svbool_t pg, const uint32_t *base, svuint32_t indices)
1613+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zindices.S, UXTW #2]
1614+
/// </summary>
1615+
public static unsafe Vector<uint> GatherVector(Vector<uint> mask, uint* address, Vector<uint> indices) => GatherVector(mask, address, indices);
1616+
1617+
/// <summary>
1618+
/// svuint64_t svld1_gather_[s64]index[_u64](svbool_t pg, const uint64_t *base, svint64_t indices)
1619+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1620+
/// </summary>
1621+
public static unsafe Vector<ulong> GatherVector(Vector<ulong> mask, ulong* address, Vector<long> indices) => GatherVector(mask, address, indices);
1622+
1623+
/// <summary>
1624+
/// svuint64_t svld1_gather[_u64base]_u64(svbool_t pg, svuint64_t bases)
1625+
/// LD1D Zresult.D, Pg/Z, [Zbases.D, #0]
1626+
/// </summary>
1627+
public static unsafe Vector<ulong> GatherVector(Vector<ulong> mask, Vector<ulong> addresses) => GatherVector(mask, addresses);
1628+
1629+
/// <summary>
1630+
/// svuint64_t svld1_gather_[u64]index[_u64](svbool_t pg, const uint64_t *base, svuint64_t indices)
1631+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zindices.D, LSL #3]
1632+
/// </summary>
1633+
public static unsafe Vector<ulong> GatherVector(Vector<ulong> mask, ulong* address, Vector<ulong> indices) => GatherVector(mask, address, indices);
1634+
1635+
15251636
/// Count set predicate bits
15261637

15271638
/// <summary>

0 commit comments

Comments
 (0)