Skip to content

Commit abaa66a

Browse files
committed
ARM64-SVE: GatherVectorWithByteOffsets
1 parent 9df96f9 commit abaa66a

File tree

10 files changed

+899
-2
lines changed

10 files changed

+899
-2
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27476,6 +27476,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
2747627476
addr = Op(3);
2747727477
break;
2747827478

27479+
case NI_Sve_GatherVectorWithByteOffsets:
2747927480
case NI_Sve_LoadVector:
2748027481
case NI_Sve_LoadVectorNonTemporal:
2748127482
case NI_Sve_LoadVector128AndReplicateToVector:

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1598,6 +1598,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
15981598
case NI_Sve_GatherVectorUInt16ZeroExtend:
15991599
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
16001600
case NI_Sve_GatherVectorUInt32ZeroExtend:
1601+
case NI_Sve_GatherVectorWithByteOffsets:
16011602
assert(varTypeIsSIMD(op3->TypeGet()));
16021603
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd));
16031604
break;

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1922,6 +1922,27 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
19221922
break;
19231923
}
19241924

1925+
case NI_Sve_GatherVectorWithByteOffsets:
1926+
{
1927+
assert(!varTypeIsSIMD(intrin.op2->gtType));
1928+
assert(intrin.numOperands == 3);
1929+
emitAttr baseSize = emitActualTypeSize(intrin.baseType);
1930+
1931+
if (baseSize == EA_4BYTE)
1932+
{
1933+
// Index is sign or zero extended to 64bits.
1934+
opt = varTypeIsUnsigned(node->GetAuxiliaryType()) ? INS_OPTS_SCALABLE_S_UXTW
1935+
: INS_OPTS_SCALABLE_S_SXTW;
1936+
}
1937+
else
1938+
{
1939+
assert(baseSize == EA_8BYTE);
1940+
}
1941+
1942+
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt);
1943+
break;
1944+
}
1945+
19251946
case NI_Sve_ReverseElement:
19261947
// Use non-predicated version explicitly
19271948
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt, INS_SCALABLE_OPTS_UNPREDICATED);

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ HARDWARE_INTRINSIC(Sve, GatherVectorUInt16WithByteOffsetsZeroExtend,
8686
HARDWARE_INTRINSIC(Sve, GatherVectorUInt16ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
8787
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
8888
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
89+
HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsets, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
8990
HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, true, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation)
9091
HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
9192
HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2227,6 +2227,81 @@ internal Arm64() { }
22272227
public static unsafe Vector<ulong> GatherVectorUInt32ZeroExtend(Vector<ulong> mask, uint* address, Vector<ulong> indices) { throw new PlatformNotSupportedException(); }
22282228

22292229

2230+
/// Unextended load
2231+
2232+
/// <summary>
2233+
/// svfloat64_t svld1_gather_[s64]offset[_f64](svbool_t pg, const float64_t *base, svint64_t offsets)
2234+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2235+
/// </summary>
2236+
public static unsafe Vector<double> GatherVectorWithByteOffsets(Vector<double> mask, double* address, Vector<long> offsets) { throw new PlatformNotSupportedException(); }
2237+
2238+
/// <summary>
2239+
/// svfloat64_t svld1_gather_[u64]offset[_f64](svbool_t pg, const float64_t *base, svuint64_t offsets)
2240+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2241+
/// </summary>
2242+
public static unsafe Vector<double> GatherVectorWithByteOffsets(Vector<double> mask, double* address, Vector<ulong> offsets) { throw new PlatformNotSupportedException(); }
2243+
2244+
/// <summary>
2245+
/// svint32_t svld1_gather_[s32]offset[_s32](svbool_t pg, const int32_t *base, svint32_t offsets)
2246+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, SXTW]
2247+
/// </summary>
2248+
public static unsafe Vector<int> GatherVectorWithByteOffsets(Vector<int> mask, int* address, Vector<int> offsets) { throw new PlatformNotSupportedException(); }
2249+
2250+
/// <summary>
2251+
/// svint32_t svld1_gather_[u32]offset[_s32](svbool_t pg, const int32_t *base, svuint32_t offsets)
2252+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, UXTW]
2253+
/// </summary>
2254+
public static unsafe Vector<int> GatherVectorWithByteOffsets(Vector<int> mask, int* address, Vector<uint> offsets) { throw new PlatformNotSupportedException(); }
2255+
2256+
/// <summary>
2257+
/// svint64_t svld1_gather_[s64]offset[_s64](svbool_t pg, const int64_t *base, svint64_t offsets)
2258+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2259+
/// </summary>
2260+
public static unsafe Vector<long> GatherVectorWithByteOffsets(Vector<long> mask, long* address, Vector<long> offsets) { throw new PlatformNotSupportedException(); }
2261+
2262+
/// <summary>
2263+
/// svint64_t svld1_gather_[u64]offset[_s64](svbool_t pg, const int64_t *base, svuint64_t offsets)
2264+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2265+
/// </summary>
2266+
public static unsafe Vector<long> GatherVectorWithByteOffsets(Vector<long> mask, long* address, Vector<ulong> offsets) { throw new PlatformNotSupportedException(); }
2267+
2268+
/// <summary>
2269+
/// svfloat32_t svld1_gather_[s32]offset[_f32](svbool_t pg, const float32_t *base, svint32_t offsets)
2270+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, SXTW]
2271+
/// </summary>
2272+
public static unsafe Vector<float> GatherVectorWithByteOffsets(Vector<float> mask, float* address, Vector<int> offsets) { throw new PlatformNotSupportedException(); }
2273+
2274+
/// <summary>
2275+
/// svfloat32_t svld1_gather_[u32]offset[_f32](svbool_t pg, const float32_t *base, svuint32_t offsets)
2276+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, UXTW]
2277+
/// </summary>
2278+
public static unsafe Vector<float> GatherVectorWithByteOffsets(Vector<float> mask, float* address, Vector<uint> offsets) { throw new PlatformNotSupportedException(); }
2279+
2280+
/// <summary>
2281+
/// svuint32_t svld1_gather_[s32]offset[_u32](svbool_t pg, const uint32_t *base, svint32_t offsets)
2282+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, SXTW]
2283+
/// </summary>
2284+
public static unsafe Vector<uint> GatherVectorWithByteOffsets(Vector<uint> mask, uint* address, Vector<int> offsets) { throw new PlatformNotSupportedException(); }
2285+
2286+
/// <summary>
2287+
/// svuint32_t svld1_gather_[u32]offset[_u32](svbool_t pg, const uint32_t *base, svuint32_t offsets)
2288+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, UXTW]
2289+
/// </summary>
2290+
public static unsafe Vector<uint> GatherVectorWithByteOffsets(Vector<uint> mask, uint* address, Vector<uint> offsets) { throw new PlatformNotSupportedException(); }
2291+
2292+
/// <summary>
2293+
/// svuint64_t svld1_gather_[s64]offset[_u64](svbool_t pg, const uint64_t *base, svint64_t offsets)
2294+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2295+
/// </summary>
2296+
public static unsafe Vector<ulong> GatherVectorWithByteOffsets(Vector<ulong> mask, ulong* address, Vector<long> offsets) { throw new PlatformNotSupportedException(); }
2297+
2298+
/// <summary>
2299+
/// svuint64_t svld1_gather_[u64]offset[_u64](svbool_t pg, const uint64_t *base, svuint64_t offsets)
2300+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2301+
/// </summary>
2302+
public static unsafe Vector<ulong> GatherVectorWithByteOffsets(Vector<ulong> mask, ulong* address, Vector<ulong> offsets) { throw new PlatformNotSupportedException(); }
2303+
2304+
22302305
/// Count set predicate bits
22312306

22322307
/// <summary>

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2283,6 +2283,81 @@ internal Arm64() { }
22832283
public static unsafe Vector<ulong> GatherVectorUInt32ZeroExtend(Vector<ulong> mask, uint* address, Vector<ulong> indices) => GatherVectorUInt32ZeroExtend(mask, address, indices);
22842284

22852285

2286+
/// Unextended load
2287+
2288+
/// <summary>
2289+
/// svfloat64_t svld1_gather_[s64]offset[_f64](svbool_t pg, const float64_t *base, svint64_t offsets)
2290+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2291+
/// </summary>
2292+
public static unsafe Vector<double> GatherVectorWithByteOffsets(Vector<double> mask, double* address, Vector<long> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2293+
2294+
/// <summary>
2295+
/// svfloat64_t svld1_gather_[u64]offset[_f64](svbool_t pg, const float64_t *base, svuint64_t offsets)
2296+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2297+
/// </summary>
2298+
public static unsafe Vector<double> GatherVectorWithByteOffsets(Vector<double> mask, double* address, Vector<ulong> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2299+
2300+
/// <summary>
2301+
/// svint32_t svld1_gather_[s32]offset[_s32](svbool_t pg, const int32_t *base, svint32_t offsets)
2302+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, SXTW]
2303+
/// </summary>
2304+
public static unsafe Vector<int> GatherVectorWithByteOffsets(Vector<int> mask, int* address, Vector<int> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2305+
2306+
/// <summary>
2307+
/// svint32_t svld1_gather_[u32]offset[_s32](svbool_t pg, const int32_t *base, svuint32_t offsets)
2308+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, UXTW]
2309+
/// </summary>
2310+
public static unsafe Vector<int> GatherVectorWithByteOffsets(Vector<int> mask, int* address, Vector<uint> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2311+
2312+
/// <summary>
2313+
/// svint64_t svld1_gather_[s64]offset[_s64](svbool_t pg, const int64_t *base, svint64_t offsets)
2314+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2315+
/// </summary>
2316+
public static unsafe Vector<long> GatherVectorWithByteOffsets(Vector<long> mask, long* address, Vector<long> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2317+
2318+
/// <summary>
2319+
/// svint64_t svld1_gather_[u64]offset[_s64](svbool_t pg, const int64_t *base, svuint64_t offsets)
2320+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2321+
/// </summary>
2322+
public static unsafe Vector<long> GatherVectorWithByteOffsets(Vector<long> mask, long* address, Vector<ulong> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2323+
2324+
/// <summary>
2325+
/// svfloat32_t svld1_gather_[s32]offset[_f32](svbool_t pg, const float32_t *base, svint32_t offsets)
2326+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, SXTW]
2327+
/// </summary>
2328+
public static unsafe Vector<float> GatherVectorWithByteOffsets(Vector<float> mask, float* address, Vector<int> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2329+
2330+
/// <summary>
2331+
/// svfloat32_t svld1_gather_[u32]offset[_f32](svbool_t pg, const float32_t *base, svuint32_t offsets)
2332+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, UXTW]
2333+
/// </summary>
2334+
public static unsafe Vector<float> GatherVectorWithByteOffsets(Vector<float> mask, float* address, Vector<uint> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2335+
2336+
/// <summary>
2337+
/// svuint32_t svld1_gather_[s32]offset[_u32](svbool_t pg, const uint32_t *base, svint32_t offsets)
2338+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, SXTW]
2339+
/// </summary>
2340+
public static unsafe Vector<uint> GatherVectorWithByteOffsets(Vector<uint> mask, uint* address, Vector<int> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2341+
2342+
/// <summary>
2343+
/// svuint32_t svld1_gather_[u32]offset[_u32](svbool_t pg, const uint32_t *base, svuint32_t offsets)
2344+
/// LD1W Zresult.S, Pg/Z, [Xbase, Zoffsets.S, UXTW]
2345+
/// </summary>
2346+
public static unsafe Vector<uint> GatherVectorWithByteOffsets(Vector<uint> mask, uint* address, Vector<uint> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2347+
2348+
/// <summary>
2349+
/// svuint64_t svld1_gather_[s64]offset[_u64](svbool_t pg, const uint64_t *base, svint64_t offsets)
2350+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2351+
/// </summary>
2352+
public static unsafe Vector<ulong> GatherVectorWithByteOffsets(Vector<ulong> mask, ulong* address, Vector<long> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2353+
2354+
/// <summary>
2355+
/// svuint64_t svld1_gather_[u64]offset[_u64](svbool_t pg, const uint64_t *base, svuint64_t offsets)
2356+
/// LD1D Zresult.D, Pg/Z, [Xbase, Zoffsets.D]
2357+
/// </summary>
2358+
public static unsafe Vector<ulong> GatherVectorWithByteOffsets(Vector<ulong> mask, ulong* address, Vector<ulong> offsets) => GatherVectorWithByteOffsets(mask, address, offsets);
2359+
2360+
22862361
/// Count set predicate bits
22872362

22882363
/// <summary>

src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4517,6 +4517,19 @@ internal Arm64() { }
45174517
public static unsafe System.Numerics.Vector<ulong> GatherVectorUInt32ZeroExtend(System.Numerics.Vector<ulong> mask, uint* address, System.Numerics.Vector<long> indices) { throw null; }
45184518
public static System.Numerics.Vector<ulong> GatherVectorUInt32ZeroExtend(System.Numerics.Vector<ulong> mask, System.Numerics.Vector<ulong> addresses) { throw null; }
45194519
public static unsafe System.Numerics.Vector<ulong> GatherVectorUInt32ZeroExtend(System.Numerics.Vector<ulong> mask, uint* address, System.Numerics.Vector<ulong> indices) { throw null; }
4520+
public static unsafe System.Numerics.Vector<double> GatherVectorWithByteOffsets(System.Numerics.Vector<double> mask, double* address, System.Numerics.Vector<long> offsets) { throw null; }
4521+
public static unsafe System.Numerics.Vector<double> GatherVectorWithByteOffsets(System.Numerics.Vector<double> mask, double* address, System.Numerics.Vector<ulong> offsets) { throw null; }
4522+
4523+
public static unsafe System.Numerics.Vector<int> GatherVectorWithByteOffsets(System.Numerics.Vector<int> mask, int* address, System.Numerics.Vector<int> offsets) { throw null; }
4524+
public static unsafe System.Numerics.Vector<int> GatherVectorWithByteOffsets(System.Numerics.Vector<int> mask, int* address, System.Numerics.Vector<uint> offsets) { throw null; }
4525+
public static unsafe System.Numerics.Vector<long> GatherVectorWithByteOffsets(System.Numerics.Vector<long> mask, long* address, System.Numerics.Vector<long> offsets) { throw null; }
4526+
public static unsafe System.Numerics.Vector<long> GatherVectorWithByteOffsets(System.Numerics.Vector<long> mask, long* address, System.Numerics.Vector<ulong> offsets) { throw null; }
4527+
public static unsafe System.Numerics.Vector<float> GatherVectorWithByteOffsets(System.Numerics.Vector<float> mask, float* address, System.Numerics.Vector<int> offsets) { throw null; }
4528+
public static unsafe System.Numerics.Vector<float> GatherVectorWithByteOffsets(System.Numerics.Vector<float> mask, float* address, System.Numerics.Vector<uint> offsets) { throw null; }
4529+
public static unsafe System.Numerics.Vector<uint> GatherVectorWithByteOffsets(System.Numerics.Vector<uint> mask, uint* address, System.Numerics.Vector<int> offsets) { throw null; }
4530+
public static unsafe System.Numerics.Vector<uint> GatherVectorWithByteOffsets(System.Numerics.Vector<uint> mask, uint* address, System.Numerics.Vector<uint> offsets) { throw null; }
4531+
public static unsafe System.Numerics.Vector<ulong> GatherVectorWithByteOffsets(System.Numerics.Vector<ulong> mask, ulong* address, System.Numerics.Vector<long> offsets) { throw null; }
4532+
public static unsafe System.Numerics.Vector<ulong> GatherVectorWithByteOffsets(System.Numerics.Vector<ulong> mask, ulong* address, System.Numerics.Vector<ulong> offsets) { throw null; }
45204533

45214534
public static ulong GetActiveElementCount(System.Numerics.Vector<byte> mask, System.Numerics.Vector<byte> from) { throw null; }
45224535
public static ulong GetActiveElementCount(System.Numerics.Vector<double> mask, System.Numerics.Vector<double> from) { throw null; }

0 commit comments

Comments
 (0)