Skip to content

[mono] Intrinsify multiple StoreVector API's #98514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3949,6 +3949,7 @@ internal Arm64() { }
/// A64: ST3 { Vt.2D, Vt+1.2D, Vt+2.2D, Vt+3.2D }[index], [Xn]
/// </summary>
public static unsafe void StoreSelectedScalar(double* address, (Vector128<double> value1, Vector128<double> value2, Vector128<double> value3, Vector128<double> value4) value, [ConstantExpected(Max = (byte)(1))] byte index) { throw new PlatformNotSupportedException(); }
#endif

/// <summary>
/// A64: ST2 { Vn.16B, Vn+1.16B }, [Xn]
Expand Down Expand Up @@ -4249,7 +4250,6 @@ internal Arm64() { }
/// A64: ST1 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
/// </summary>
public static unsafe void StoreVector128x4(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) value) { throw new PlatformNotSupportedException(); }
#endif

/// <summary>
/// float64x2_t vsubq_f64 (float64x2_t a, float64x2_t b)
Expand Down Expand Up @@ -16039,6 +16039,7 @@ internal Arm64() { }
/// A64: ST4 { Vt.2S, Vt+1.2S, Vt+2.2S, Vt+3.2S }[index], [Xn]
/// </summary>
public static unsafe void StoreSelectedScalar(float* address, (Vector64<float> value1, Vector64<float> value2, Vector64<float> value3, Vector64<float> value4) value, [ConstantExpected(Max = (byte)(1))] byte index) { throw new PlatformNotSupportedException(); }
#endif

/// <summary>
/// A64: ST2 { Vn.8B, Vn+1.8B }, [Xn]
Expand Down Expand Up @@ -16249,7 +16250,6 @@ internal Arm64() { }
/// A64: ST1 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
/// </summary>
public static unsafe void StoreVector64x4(float* address, (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) value) { throw new PlatformNotSupportedException(); }
#endif

/// <summary>
/// uint8x8_t vsub_u8 (uint8x8_t a, uint8x8_t b)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3947,6 +3947,7 @@ internal Arm64() { }
/// A64: ST4 { Vt.2D, Vt+1.2D, Vt+2.2D, Vt+3.2D }[index], [Xn]
/// </summary>
public static unsafe void StoreSelectedScalar(double* address, (Vector128<double> value1, Vector128<double> value2, Vector128<double> value3, Vector128<double> value4) value, [ConstantExpected(Max = (byte)(1))] byte index) => StoreSelectedScalar(address, value, index);
#endif

/// <summary>
/// A64: ST2 { Vn.16B, Vn+1.16B }, [Xn]
Expand Down Expand Up @@ -4247,7 +4248,6 @@ internal Arm64() { }
/// A64: ST1 { Vn.2D, Vn+1.2D, Vn+2.2D, Vn+3.2D }, [Xn]
/// </summary>
public static unsafe void StoreVector128x4(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) value) => StoreVector128x4(address, value);
#endif

/// <summary>
/// float64x2_t vsubq_f64 (float64x2_t a, float64x2_t b)
Expand Down Expand Up @@ -16036,6 +16036,7 @@ internal Arm64() { }
/// A64: ST4 { Vt.2S, Vt+1.2S, Vt+2.2S, Vt+3.2S }[index], [Xn]
/// </summary>
public static unsafe void StoreSelectedScalar(float* address, (Vector64<float> value1, Vector64<float> value2, Vector64<float> value3, Vector64<float> value4) value, [ConstantExpected(Max = (byte)(1))] byte index) => StoreSelectedScalar(address, value, index);
#endif

/// <summary>
/// A64: ST2 { Vn.8B, Vn+1.8B }, [Xn]
Expand Down Expand Up @@ -16246,7 +16247,6 @@ internal Arm64() { }
/// A64: ST1 { Vn.2S, Vn+1.2S, Vn+2.2S, Vn+3.2S }, [Xn]
/// </summary>
public static unsafe void StoreVector64x4(float* address, (Vector64<float> Value1, Vector64<float> Value2, Vector64<float> Value3, Vector64<float> Value4) value) => StoreVector64x4(address, value);
#endif

/// <summary>
/// uint8x8_t vsub_u8 (uint8x8_t a, uint8x8_t b)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2927,6 +2927,7 @@ public static unsafe void StoreSelectedScalar(ulong* address, System.Runtime.Int
public static unsafe void StoreSelectedScalar(int* address, (System.Runtime.Intrinsics.Vector64<int> value1, System.Runtime.Intrinsics.Vector64<int> value2, System.Runtime.Intrinsics.Vector64<int> value3, System.Runtime.Intrinsics.Vector64<int> value4) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index) { throw new PlatformNotSupportedException(); }
public static unsafe void StoreSelectedScalar(uint* address, (System.Runtime.Intrinsics.Vector64<uint> value1, System.Runtime.Intrinsics.Vector64<uint> value2, System.Runtime.Intrinsics.Vector64<uint> value3, System.Runtime.Intrinsics.Vector64<uint> value4) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index) { throw new PlatformNotSupportedException(); }
public static unsafe void StoreSelectedScalar(float* address, (System.Runtime.Intrinsics.Vector64<float> value1, System.Runtime.Intrinsics.Vector64<float> value2, System.Runtime.Intrinsics.Vector64<float> value3, System.Runtime.Intrinsics.Vector64<float> value4) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index) { throw new PlatformNotSupportedException(); }
#endif
public unsafe static void StoreVector64x2AndZip(byte* address, (System.Runtime.Intrinsics.Vector64<byte> Value1, System.Runtime.Intrinsics.Vector64<byte> Value2) value) { throw null; }
public unsafe static void StoreVector64x2AndZip(sbyte* address, (System.Runtime.Intrinsics.Vector64<sbyte> Value1, System.Runtime.Intrinsics.Vector64<sbyte> Value2) value) { throw null; }
public unsafe static void StoreVector64x2AndZip(short* address, (System.Runtime.Intrinsics.Vector64<short> Value1, System.Runtime.Intrinsics.Vector64<short> Value2) value) { throw null; }
Expand Down Expand Up @@ -2969,7 +2970,6 @@ public static unsafe void StoreSelectedScalar(ulong* address, System.Runtime.Int
public static unsafe void StoreVector64x4(int* address, (System.Runtime.Intrinsics.Vector64<int> Value1, System.Runtime.Intrinsics.Vector64<int> Value2, System.Runtime.Intrinsics.Vector64<int> Value3, System.Runtime.Intrinsics.Vector64<int> Value4) value) { throw null; }
public static unsafe void StoreVector64x4(uint* address, (System.Runtime.Intrinsics.Vector64<uint> Value1, System.Runtime.Intrinsics.Vector64<uint> Value2, System.Runtime.Intrinsics.Vector64<uint> Value3, System.Runtime.Intrinsics.Vector64<uint> Value4) value) { throw null; }
public static unsafe void StoreVector64x4(float* address, (System.Runtime.Intrinsics.Vector64<float> Value1, System.Runtime.Intrinsics.Vector64<float> Value2, System.Runtime.Intrinsics.Vector64<float> Value3, System.Runtime.Intrinsics.Vector64<float> Value4) value) { throw null; }
#endif
public static System.Runtime.Intrinsics.Vector128<byte> Subtract(System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<byte> right) { throw null; }
public static System.Runtime.Intrinsics.Vector128<short> Subtract(System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
public static System.Runtime.Intrinsics.Vector128<int> Subtract(System.Runtime.Intrinsics.Vector128<int> left, System.Runtime.Intrinsics.Vector128<int> right) { throw null; }
Expand Down Expand Up @@ -3780,6 +3780,7 @@ public static unsafe void StorePairScalarNonTemporal(uint* address, System.Runti
public static unsafe void StoreSelectedScalar(ulong* address, (System.Runtime.Intrinsics.Vector128<ulong> value1, System.Runtime.Intrinsics.Vector128<ulong> value2, System.Runtime.Intrinsics.Vector128<ulong> value3, System.Runtime.Intrinsics.Vector128<ulong> value4) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index) { throw null; }
public static unsafe void StoreSelectedScalar(float* address, (System.Runtime.Intrinsics.Vector128<float> value1, System.Runtime.Intrinsics.Vector128<float> value2, System.Runtime.Intrinsics.Vector128<float> value3, System.Runtime.Intrinsics.Vector128<float> value4) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(3))] byte index) { throw null; }
public static unsafe void StoreSelectedScalar(double* address, (System.Runtime.Intrinsics.Vector128<double> value1, System.Runtime.Intrinsics.Vector128<double> value2, System.Runtime.Intrinsics.Vector128<double> value3, System.Runtime.Intrinsics.Vector128<double> value4) value, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = (byte)(1))] byte index) { throw null; }
#endif
public unsafe static void StoreVector128x2AndZip(byte* address, (System.Runtime.Intrinsics.Vector128<byte> Value1, System.Runtime.Intrinsics.Vector128<byte> Value2) value) { throw null; }
public unsafe static void StoreVector128x2AndZip(sbyte* address, (System.Runtime.Intrinsics.Vector128<sbyte> Value1, System.Runtime.Intrinsics.Vector128<sbyte> Value2) value) { throw null; }
public unsafe static void StoreVector128x2AndZip(short* address, (System.Runtime.Intrinsics.Vector128<short> Value1, System.Runtime.Intrinsics.Vector128<short> Value2) value) { throw null; }
Expand Down Expand Up @@ -3840,7 +3841,6 @@ public static unsafe void StorePairScalarNonTemporal(uint* address, System.Runti
public static unsafe void StoreVector128x4(ulong* address, (System.Runtime.Intrinsics.Vector128<ulong> Value1, System.Runtime.Intrinsics.Vector128<ulong> Value2, System.Runtime.Intrinsics.Vector128<ulong> Value3, System.Runtime.Intrinsics.Vector128<ulong> Value4) value) { throw null; }
public static unsafe void StoreVector128x4(float* address, (System.Runtime.Intrinsics.Vector128<float> Value1, System.Runtime.Intrinsics.Vector128<float> Value2, System.Runtime.Intrinsics.Vector128<float> Value3, System.Runtime.Intrinsics.Vector128<float> Value4) value) { throw null; }
public static unsafe void StoreVector128x4(double* address, (System.Runtime.Intrinsics.Vector128<double> Value1, System.Runtime.Intrinsics.Vector128<double> Value2, System.Runtime.Intrinsics.Vector128<double> Value3, System.Runtime.Intrinsics.Vector128<double> Value4) value) { throw null; }
#endif
public static System.Runtime.Intrinsics.Vector128<double> Subtract(System.Runtime.Intrinsics.Vector128<double> left, System.Runtime.Intrinsics.Vector128<double> right) { throw null; }
public static System.Runtime.Intrinsics.Vector64<byte> SubtractSaturateScalar(System.Runtime.Intrinsics.Vector64<byte> left, System.Runtime.Intrinsics.Vector64<byte> right) { throw null; }
public static System.Runtime.Intrinsics.Vector64<short> SubtractSaturateScalar(System.Runtime.Intrinsics.Vector64<short> left, System.Runtime.Intrinsics.Vector64<short> right) { throw null; }
Expand Down
13 changes: 12 additions & 1 deletion src/mono/mono/mini/llvm-intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,18 @@ INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4LANE_V64, aarch64_neon_ld4lane, Arm64,
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD2LANE_V128, aarch64_neon_ld2lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD3LANE_V128, aarch64_neon_ld3lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_LD4LANE_V128, aarch64_neon_ld4lane, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)

INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X2_V64, aarch64_neon_st1x2, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X3_V64, aarch64_neon_st1x3, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X4_V64, aarch64_neon_st1x4, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X2_V128, aarch64_neon_st1x2, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X3_V128, aarch64_neon_st1x3, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST1X4_V128, aarch64_neon_st1x4, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST2_V64, aarch64_neon_st2, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST3_V64, aarch64_neon_st3, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST4_V64, aarch64_neon_st4, Arm64, AddPointer, V64 | I1 | I2 | I4 | R4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST2_V128, aarch64_neon_st2, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST3_V128, aarch64_neon_st3, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_ST4_V128, aarch64_neon_st4, Arm64, AddPointer, V128 | I1 | I2 | I4 | I8 | R4 | R8)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SMAXV, aarch64_neon_smaxv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_UMAXV, aarch64_neon_umaxv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SMINV, aarch64_neon_sminv, Arm64, Across, V64 | V128 | I1 | I2 | I4)
Expand Down
22 changes: 22 additions & 0 deletions src/mono/mono/mini/mini-llvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -11749,6 +11749,28 @@ MONO_RESTORE_WARNING
values [ins->dreg] = result;
break;
}
case OP_ARM64_STM: {
LLVMTypeRef tuple_t = simd_valuetuple_to_llvm_type (ctx, ins->klass);
LLVMTypeRef vec_t = LLVMGetElementType (tuple_t);

IntrinsicId iid = (IntrinsicId) ins->inst_c0;
llvm_ovr_tag_t ovr_tag = ovr_tag_from_llvm_type (vec_t);

LLVMValueRef value_tuple = LLVMBuildLoad2 (builder, tuple_t, addresses [ins->sreg2]->value, "load_param");

int len = LLVMGetArrayLength (tuple_t);

LLVMValueRef *args = g_alloca ((len + 1) * sizeof (LLVMValueRef));

for (int i = 0; i < len; i++) {
LLVMValueRef elem = LLVMBuildExtractValue (builder, value_tuple, i, "extract_elem");
args [i] = elem;
}
args [len] = lhs;

call_overloaded_intrins (ctx, iid, ovr_tag, args, "");
break;
}
case OP_ARM64_ST1: {
LLVMTypeRef t = LLVMTypeOf (rhs);
LLVMValueRef address = convert (ctx, lhs, pointer_type (t));
Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1650,6 +1650,8 @@ MINI_OP(OP_ARM64_ST1, "arm64_st1", NONE, IREG, XREG)
MINI_OP(OP_ARM64_SXTL, "arm64_sxtl", XREG, XREG, NONE)
MINI_OP(OP_ARM64_SXTL2, "arm64_sxtl2", XREG, XREG, NONE)

MINI_OP(OP_ARM64_STM, "arm64_stm", NONE, IREG, VREG)

MINI_OP(OP_ARM64_SMULH, "arm64_smulh", LREG, LREG, LREG)
MINI_OP(OP_ARM64_SQRT_SCALAR, "arm64_sqrt_scalar", XREG, XREG, NONE)
MINI_OP(OP_ARM64_TRN1, "arm64_trn1", XREG, XREG, XREG)
Expand Down
44 changes: 44 additions & 0 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -3957,6 +3957,18 @@ static SimdIntrinsic advsimd_methods [] = {
{SN_StorePairScalar, OP_ARM64_STP_SCALAR},
{SN_StorePairScalarNonTemporal, OP_ARM64_STNP_SCALAR},
{SN_StoreSelectedScalar},
{SN_StoreVector128x2},
{SN_StoreVector128x2AndZip},
{SN_StoreVector128x3},
{SN_StoreVector128x3AndZip},
{SN_StoreVector128x4},
{SN_StoreVector128x4AndZip},
{SN_StoreVector64x2},
{SN_StoreVector64x2AndZip},
{SN_StoreVector64x3},
{SN_StoreVector64x3AndZip},
{SN_StoreVector64x4},
{SN_StoreVector64x4AndZip},
{SN_Subtract, OP_XBINOP, OP_ISUB, None, None, OP_XBINOP, OP_FSUB},
{SN_SubtractHighNarrowingLower, OP_ARM64_SUBHN},
{SN_SubtractHighNarrowingUpper, OP_ARM64_SUBHN2},
Expand Down Expand Up @@ -4355,6 +4367,38 @@ emit_arm64_intrinsics (
MONO_ADD_INS (cfg->cbb, ins);
return ins;
}
case SN_StoreVector128x2:
case SN_StoreVector128x3:
case SN_StoreVector128x4:
case SN_StoreVector64x2:
case SN_StoreVector64x3:
case SN_StoreVector64x4:
case SN_StoreVector128x2AndZip:
case SN_StoreVector128x3AndZip:
case SN_StoreVector128x4AndZip:
case SN_StoreVector64x2AndZip:
case SN_StoreVector64x3AndZip:
case SN_StoreVector64x4AndZip: {
int iid = 0;
switch (id) {
case SN_StoreVector128x2: iid = INTRINS_AARCH64_ADV_SIMD_ST1X2_V128; break;
case SN_StoreVector128x3: iid = INTRINS_AARCH64_ADV_SIMD_ST1X3_V128; break;
case SN_StoreVector128x4: iid = INTRINS_AARCH64_ADV_SIMD_ST1X4_V128; break;
case SN_StoreVector64x2: iid = INTRINS_AARCH64_ADV_SIMD_ST1X2_V64; break;
case SN_StoreVector64x3: iid = INTRINS_AARCH64_ADV_SIMD_ST1X3_V64; break;
case SN_StoreVector64x4: iid = INTRINS_AARCH64_ADV_SIMD_ST1X4_V64; break;
case SN_StoreVector128x2AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST2_V128; break;
case SN_StoreVector128x3AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST3_V128; break;
case SN_StoreVector128x4AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST4_V128; break;
case SN_StoreVector64x2AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST2_V64; break;
case SN_StoreVector64x3AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST3_V64; break;
case SN_StoreVector64x4AndZip: iid = INTRINS_AARCH64_ADV_SIMD_ST4_V64; break;
default: g_assert_not_reached ();
}

MonoClass* klass_tuple_var = mono_class_from_mono_type_internal (fsig->params [1]);
return emit_simd_ins_for_sig (cfg, klass_tuple_var, OP_ARM64_STM, iid, arg0_type, fsig, args);
}
default:
g_assert_not_reached ();
}
Expand Down
12 changes: 12 additions & 0 deletions src/mono/mono/mini/simd-methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,18 @@ METHOD(StorePairNonTemporal)
METHOD(StorePairScalar)
METHOD(StorePairScalarNonTemporal)
METHOD(StoreSelectedScalar)
METHOD(StoreVector128x2)
METHOD(StoreVector128x3)
METHOD(StoreVector128x4)
METHOD(StoreVector128x2AndZip)
METHOD(StoreVector128x3AndZip)
METHOD(StoreVector128x4AndZip)
METHOD(StoreVector64x2)
METHOD(StoreVector64x3)
METHOD(StoreVector64x4)
METHOD(StoreVector64x2AndZip)
METHOD(StoreVector64x3AndZip)
METHOD(StoreVector64x4AndZip)
METHOD(SubtractHighNarrowingLower)
METHOD(SubtractHighNarrowingUpper)
METHOD(SubtractRoundedHighNarrowingLower)
Expand Down
Loading