Skip to content

Commit 0588f24

Browse files
Add support for Sve.StoreNarrowing() (#102605)
* Add Sve.StoreNarrowing() * Incorporate review comments for Sve.StoreAndZip() * Fix formatting issues
1 parent 35e4aad commit 0588f24

File tree

13 files changed

+609
-80
lines changed

13 files changed

+609
-80
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26711,6 +26711,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const
2671126711
case NI_Sve_StoreAndZipx2:
2671226712
case NI_Sve_StoreAndZipx3:
2671326713
case NI_Sve_StoreAndZipx4:
26714+
case NI_Sve_StoreNarrowing:
2671426715
addr = Op(2);
2671526716
break;
2671626717
#endif // TARGET_ARM64

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2479,6 +2479,34 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
24792479
break;
24802480
}
24812481

2482+
case NI_Sve_StoreNarrowing:
2483+
{
2484+
assert(sig->numArgs == 3);
2485+
assert(retType == TYP_VOID);
2486+
2487+
CORINFO_ARG_LIST_HANDLE arg = sig->args;
2488+
arg = info.compCompHnd->getArgNext(arg);
2489+
CORINFO_CLASS_HANDLE argClass = info.compCompHnd->getArgClass(sig, arg);
2490+
CorInfoType ptrType = getBaseJitTypeAndSizeOfSIMDType(argClass);
2491+
CORINFO_CLASS_HANDLE tmpClass = NO_CLASS_HANDLE;
2492+
2493+
// The size of narrowed target elements is determined from the second argument of StoreNarrowing().
2494+
// Thus, we first extract the datatype of a pointer passed in the second argument and then store it as the
2495+
// auxiliary type of intrinsic. This auxiliary type is then used in the codegen to choose the correct
2496+
// instruction to emit.
2497+
ptrType = strip(info.compCompHnd->getArgType(sig, arg, &tmpClass));
2498+
assert(ptrType == CORINFO_TYPE_PTR);
2499+
ptrType = info.compCompHnd->getChildType(argClass, &tmpClass);
2500+
assert(ptrType < simdBaseJitType);
2501+
2502+
op3 = impPopStack().val;
2503+
op2 = impPopStack().val;
2504+
op1 = impPopStack().val;
2505+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
2506+
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(ptrType);
2507+
break;
2508+
}
2509+
24822510
default:
24832511
{
24842512
return nullptr;

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
884884
ins = varTypeIsUnsigned(intrin.baseType) ? INS_umsubl : INS_smsubl;
885885
break;
886886

887+
case NI_Sve_StoreNarrowing:
888+
ins = HWIntrinsicInfo::lookupIns(intrin.id, node->GetAuxiliaryType());
889+
break;
890+
887891
default:
888892
ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType);
889893
break;
@@ -1773,6 +1777,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
17731777
break;
17741778
}
17751779

1780+
case NI_Sve_StoreNarrowing:
1781+
opt = emitter::optGetSveInsOpt(emitTypeSize(intrin.baseType));
1782+
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
1783+
break;
1784+
17761785
case NI_Sve_UnzipEven:
17771786
case NI_Sve_UnzipOdd:
17781787
case NI_Sve_ZipHigh:

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend8,
118118
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
119119
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
120120
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
121+
HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
121122
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
122123
HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
123124
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2998,6 +2998,80 @@ internal Arm64() { }
29982998
/// ST4D {Zdata0.D - Zdata3.D}, Pg, [Xbase, #0, MUL VL]
29992999
/// </summary>
30003000
public static unsafe void StoreAndZip(Vector<ulong> mask, ulong* address, (Vector<ulong> Value1, Vector<ulong> Value2, Vector<ulong> Value3, Vector<ulong> Value4) data) { throw new PlatformNotSupportedException(); }
3001+
/// Truncate to 8 bits and store
3002+
3003+
/// <summary>
3004+
/// void svst1b[_s16](svbool_t pg, int8_t *base, svint16_t data)
3005+
/// ST1B Zdata.H, Pg, [Xbase, #0, MUL VL]
3006+
/// </summary>
3007+
public static unsafe void StoreNarrowing(Vector<short> mask, sbyte* address, Vector<short> data) { throw new PlatformNotSupportedException(); }
3008+
3009+
3010+
/// <summary>
3011+
/// void svst1b[_s32](svbool_t pg, int8_t *base, svint32_t data)
3012+
/// ST1B Zdata.S, Pg, [Xbase, #0, MUL VL]
3013+
/// </summary>
3014+
public static unsafe void StoreNarrowing(Vector<int> mask, sbyte* address, Vector<int> data) { throw new PlatformNotSupportedException(); }
3015+
3016+
/// <summary>
3017+
/// void svst1h[_s32](svbool_t pg, int16_t *base, svint32_t data)
3018+
/// ST1H Zdata.S, Pg, [Xbase, #0, MUL VL]
3019+
/// </summary>
3020+
public static unsafe void StoreNarrowing(Vector<int> mask, short* address, Vector<int> data) { throw new PlatformNotSupportedException(); }
3021+
3022+
/// <summary>
3023+
/// void svst1b[_s64](svbool_t pg, int8_t *base, svint64_t data)
3024+
/// ST1B Zdata.D, Pg, [Xbase, #0, MUL VL]
3025+
/// </summary>
3026+
public static unsafe void StoreNarrowing(Vector<long> mask, sbyte* address, Vector<long> data) { throw new PlatformNotSupportedException(); }
3027+
3028+
/// <summary>
3029+
/// void svst1h[_s64](svbool_t pg, int16_t *base, svint64_t data)
3030+
/// ST1H Zdata.D, Pg, [Xbase, #0, MUL VL]
3031+
/// </summary>
3032+
public static unsafe void StoreNarrowing(Vector<long> mask, short* address, Vector<long> data) { throw new PlatformNotSupportedException(); }
3033+
3034+
/// <summary>
3035+
/// void svst1w[_s64](svbool_t pg, int32_t *base, svint64_t data)
3036+
/// ST1W Zdata.D, Pg, [Xbase, #0, MUL VL]
3037+
/// </summary>
3038+
public static unsafe void StoreNarrowing(Vector<long> mask, int* address, Vector<long> data) { throw new PlatformNotSupportedException(); }
3039+
3040+
/// <summary>
3041+
/// void svst1b[_u16](svbool_t pg, uint8_t *base, svuint16_t data)
3042+
/// ST1B Zdata.H, Pg, [Xbase, #0, MUL VL]
3043+
/// </summary>
3044+
public static unsafe void StoreNarrowing(Vector<ushort> mask, byte* address, Vector<ushort> data) { throw new PlatformNotSupportedException(); }
3045+
3046+
/// <summary>
3047+
/// void svst1b[_u32](svbool_t pg, uint8_t *base, svuint32_t data)
3048+
/// ST1B Zdata.S, Pg, [Xbase, #0, MUL VL]
3049+
/// </summary>
3050+
public static unsafe void StoreNarrowing(Vector<uint> mask, byte* address, Vector<uint> data) { throw new PlatformNotSupportedException(); }
3051+
3052+
/// <summary>
3053+
/// void svst1h[_u32](svbool_t pg, uint16_t *base, svuint32_t data)
3054+
/// ST1H Zdata.S, Pg, [Xbase, #0, MUL VL]
3055+
/// </summary>
3056+
public static unsafe void StoreNarrowing(Vector<uint> mask, ushort* address, Vector<uint> data) { throw new PlatformNotSupportedException(); }
3057+
3058+
/// <summary>
3059+
/// void svst1b[_u64](svbool_t pg, uint8_t *base, svuint64_t data)
3060+
/// ST1B Zdata.D, Pg, [Xbase, #0, MUL VL]
3061+
/// </summary>
3062+
public static unsafe void StoreNarrowing(Vector<ulong> mask, byte* address, Vector<ulong> data) { throw new PlatformNotSupportedException(); }
3063+
3064+
/// <summary>
3065+
/// void svst1h[_u64](svbool_t pg, uint16_t *base, svuint64_t data)
3066+
/// ST1H Zdata.D, Pg, [Xbase, #0, MUL VL]
3067+
/// </summary>
3068+
public static unsafe void StoreNarrowing(Vector<ulong> mask, ushort* address, Vector<ulong> data) { throw new PlatformNotSupportedException(); }
3069+
3070+
/// <summary>
3071+
/// void svst1w[_u64](svbool_t pg, uint32_t *base, svuint64_t data)
3072+
/// ST1W Zdata.D, Pg, [Xbase, #0, MUL VL]
3073+
/// </summary>
3074+
public static unsafe void StoreNarrowing(Vector<ulong> mask, uint* address, Vector<ulong> data) { throw new PlatformNotSupportedException(); }
30013075

30023076

30033077
/// Subtract : Subtract

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3094,6 +3094,80 @@ internal Arm64() { }
30943094
/// ST4D {Zdata0.D - Zdata3.D}, Pg, [Xbase, #0, MUL VL]
30953095
/// </summary>
30963096
public static unsafe void StoreAndZip(Vector<ulong> mask, ulong* address, (Vector<ulong> Value1, Vector<ulong> Value2, Vector<ulong> Value3, Vector<ulong> Value4) data) => StoreAndZip(mask, address, data);
3097+
/// Truncate to 8 bits and store
3098+
3099+
3100+
/// <summary>
3101+
/// void svst1b[_s16](svbool_t pg, int8_t *base, svint16_t data)
3102+
/// ST1B Zdata.H, Pg, [Xbase, #0, MUL VL]
3103+
/// </summary>
3104+
public static unsafe void StoreNarrowing(Vector<short> mask, sbyte* address, Vector<short> data) => StoreNarrowing(mask, address, data);
3105+
3106+
/// <summary>
3107+
/// void svst1b[_s32](svbool_t pg, int8_t *base, svint32_t data)
3108+
/// ST1B Zdata.S, Pg, [Xbase, #0, MUL VL]
3109+
/// </summary>
3110+
public static unsafe void StoreNarrowing(Vector<int> mask, sbyte* address, Vector<int> data) => StoreNarrowing(mask, address, data);
3111+
3112+
/// <summary>
3113+
/// void svst1h[_s32](svbool_t pg, int16_t *base, svint32_t data)
3114+
/// ST1H Zdata.S, Pg, [Xbase, #0, MUL VL]
3115+
/// </summary>
3116+
public static unsafe void StoreNarrowing(Vector<int> mask, short* address, Vector<int> data) => StoreNarrowing(mask, address, data);
3117+
3118+
/// <summary>
3119+
/// void svst1b[_s64](svbool_t pg, int8_t *base, svint64_t data)
3120+
/// ST1B Zdata.D, Pg, [Xbase, #0, MUL VL]
3121+
/// </summary>
3122+
public static unsafe void StoreNarrowing(Vector<long> mask, sbyte* address, Vector<long> data) => StoreNarrowing(mask, address, data);
3123+
3124+
/// <summary>
3125+
/// void svst1h[_s64](svbool_t pg, int16_t *base, svint64_t data)
3126+
/// ST1H Zdata.D, Pg, [Xbase, #0, MUL VL]
3127+
/// </summary>
3128+
public static unsafe void StoreNarrowing(Vector<long> mask, short* address, Vector<long> data) => StoreNarrowing(mask, address, data);
3129+
3130+
/// <summary>
3131+
/// void svst1w[_s64](svbool_t pg, int32_t *base, svint64_t data)
3132+
/// ST1W Zdata.D, Pg, [Xbase, #0, MUL VL]
3133+
/// </summary>
3134+
public static unsafe void StoreNarrowing(Vector<long> mask, int* address, Vector<long> data) => StoreNarrowing(mask, address, data);
3135+
3136+
/// <summary>
3137+
/// void svst1b[_u16](svbool_t pg, uint8_t *base, svuint16_t data)
3138+
/// ST1B Zdata.H, Pg, [Xbase, #0, MUL VL]
3139+
/// </summary>
3140+
public static unsafe void StoreNarrowing(Vector<ushort> mask, byte* address, Vector<ushort> data) => StoreNarrowing(mask, address, data);
3141+
3142+
/// <summary>
3143+
/// void svst1b[_u32](svbool_t pg, uint8_t *base, svuint32_t data)
3144+
/// ST1B Zdata.S, Pg, [Xbase, #0, MUL VL]
3145+
/// </summary>
3146+
public static unsafe void StoreNarrowing(Vector<uint> mask, byte* address, Vector<uint> data) => StoreNarrowing(mask, address, data);
3147+
3148+
/// <summary>
3149+
/// void svst1h[_u32](svbool_t pg, uint16_t *base, svuint32_t data)
3150+
/// ST1H Zdata.S, Pg, [Xbase, #0, MUL VL]
3151+
/// </summary>
3152+
public static unsafe void StoreNarrowing(Vector<uint> mask, ushort* address, Vector<uint> data) => StoreNarrowing(mask, address, data);
3153+
3154+
/// <summary>
3155+
/// void svst1b[_u64](svbool_t pg, uint8_t *base, svuint64_t data)
3156+
/// ST1B Zdata.D, Pg, [Xbase, #0, MUL VL]
3157+
/// </summary>
3158+
public static unsafe void StoreNarrowing(Vector<ulong> mask, byte* address, Vector<ulong> data) => StoreNarrowing(mask, address, data);
3159+
3160+
/// <summary>
3161+
/// void svst1h[_u64](svbool_t pg, uint16_t *base, svuint64_t data)
3162+
/// ST1H Zdata.D, Pg, [Xbase, #0, MUL VL]
3163+
/// </summary>
3164+
public static unsafe void StoreNarrowing(Vector<ulong> mask, ushort* address, Vector<ulong> data) => StoreNarrowing(mask, address, data);
3165+
3166+
/// <summary>
3167+
/// void svst1w[_u64](svbool_t pg, uint32_t *base, svuint64_t data)
3168+
/// ST1W Zdata.D, Pg, [Xbase, #0, MUL VL]
3169+
/// </summary>
3170+
public static unsafe void StoreNarrowing(Vector<ulong> mask, uint* address, Vector<ulong> data) => StoreNarrowing(mask, address, data);
30973171

30983172

30993173
/// Subtract : Subtract

src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4588,6 +4588,19 @@ internal Arm64() { }
45884588
public static unsafe void StoreAndZip(System.Numerics.Vector<ulong> mask, ulong* address, (System.Numerics.Vector<ulong> Value1, System.Numerics.Vector<ulong> Value2, System.Numerics.Vector<ulong> Value3) data) { throw null; }
45894589
public static unsafe void StoreAndZip(System.Numerics.Vector<ulong> mask, ulong* address, (System.Numerics.Vector<ulong> Value1, System.Numerics.Vector<ulong> Value2, System.Numerics.Vector<ulong> Value3, System.Numerics.Vector<ulong> Value4) data) { throw null; }
45904590

4591+
public static unsafe void StoreNarrowing(System.Numerics.Vector<short> mask, sbyte* address, System.Numerics.Vector<short> data) { throw null; }
4592+
public static unsafe void StoreNarrowing(System.Numerics.Vector<int> mask, sbyte* address, System.Numerics.Vector<int> data) { throw null; }
4593+
public static unsafe void StoreNarrowing(System.Numerics.Vector<int> mask, short* address, System.Numerics.Vector<int> data) { throw null; }
4594+
public static unsafe void StoreNarrowing(System.Numerics.Vector<long> mask, sbyte* address, System.Numerics.Vector<long> data) { throw null; }
4595+
public static unsafe void StoreNarrowing(System.Numerics.Vector<long> mask, short* address, System.Numerics.Vector<long> data) { throw null; }
4596+
public static unsafe void StoreNarrowing(System.Numerics.Vector<long> mask, int* address, System.Numerics.Vector<long> data) { throw null; }
4597+
public static unsafe void StoreNarrowing(System.Numerics.Vector<ushort> mask, byte* address, System.Numerics.Vector<ushort> data) { throw null; }
4598+
public static unsafe void StoreNarrowing(System.Numerics.Vector<uint> mask, byte* address, System.Numerics.Vector<uint> data) { throw null; }
4599+
public static unsafe void StoreNarrowing(System.Numerics.Vector<uint> mask, ushort* address, System.Numerics.Vector<uint> data) { throw null; }
4600+
public static unsafe void StoreNarrowing(System.Numerics.Vector<ulong> mask, byte* address, System.Numerics.Vector<ulong> data) { throw null; }
4601+
public static unsafe void StoreNarrowing(System.Numerics.Vector<ulong> mask, ushort* address, System.Numerics.Vector<ulong> data) { throw null; }
4602+
public static unsafe void StoreNarrowing(System.Numerics.Vector<ulong> mask, uint* address, System.Numerics.Vector<ulong> data) { throw null; }
4603+
45914604
public static System.Numerics.Vector<sbyte> Subtract(System.Numerics.Vector<sbyte> left, System.Numerics.Vector<sbyte> right) { throw null; }
45924605
public static System.Numerics.Vector<short> Subtract(System.Numerics.Vector<short> left, System.Numerics.Vector<short> right) { throw null; }
45934606
public static System.Numerics.Vector<int> Subtract(System.Numerics.Vector<int> left, System.Numerics.Vector<int> right) { throw null; }

0 commit comments

Comments
 (0)