[API Proposal]: Add AVX10v2 API to add Avx10.2 support #109083
Open
Description
Background and motivation
Intel has announced the features available in the next version of Avx10
(10.2). In order to support this, .NET needs to expand the Avx10
library to include the new APIs.
Avx10.2 spec. Section 7 - 14 in this spec goes over the newly added instructions. A couple of interesting features here are MinMax
and saturating conversions
As part of the original API Proposal, the proposed design was for future Avx10
versions to have their own classes which inherits from Avx10v1
API Proposal
namespace System.Runtime.Intrinsics.X86
{
/// <summary>Provides access to X86 AVX10.1 hardware instructions via intrinsics</summary>
[Intrinsic]
[CLSCompliant(false)]
public abstract class Avx10v2 : Avx10v1
{
internal Avx10v2() { }
public static new bool IsSupported { get => IsSupported; }
// VMINMAXPD xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8
public static Vector128<double> MinMax(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
// VMINMAXPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {sae}, imm8
public static Vector256<double> MinMax(Vector256<double> left, Vector256<double> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
// VMINMAXPS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst, imm8
public static Vector128<float> MinMax(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
// VMINMAXPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {sae}, imm8
public static Vector256<float> MinMax(Vector256<float> left, Vector256<float> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
// VMINMAXSD xmm1{k1}{z}, xmm2, xmm3/m64 {sae}, imm8
public static double MinMaxScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) => MinMaxScalar(left, right, mode);
// VMINMAXSS xmm1{k1}{z}, xmm2, xmm3/m32 {sae}, imm8
public static float MinMaxScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) => MinMaxScalar(left, right, mode);
// VADDPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
public static Vector256<double> Add(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Add(left, right, mode);
// VADDPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
public static Vector256<float> Add(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Add(left, right, mode);
// VDIVPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
public static Vector256<double> Divide(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Divide(left, right, mode);
// VDIVPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
public static Vector256<float> Divide(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Divide(left, right, mode);
// VCVTPS2IBS xmm1{k1}{z}, xmm2/m128/m32bcst
public static Vector128<int> ConvertToByteWithSaturationAndWidenToInt32(Vector128<float> value) => ConvertToByteWithSaturationAndWidenToInt32(value);
// VCVTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<int> ConvertToByteWithSaturationAndWidenToInt32(Vector256<float> value) => ConvertToByteWithSaturationAndWidenToInt32(value);
// VCVTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<int> ConvertToByteWithSaturationAndWidenToInt32(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndWidenToInt32(value, mode);
// VCVTPS2IUBS xmm1{k1}{z}, xmm2/m128/m32bcst
public static Vector128<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector128<float> value) => ConvertToByteWithSaturationAndWidenToUInt32(value);
// VCVTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector256<float> value) => ConvertToByteWithSaturationAndWidenToUInt32(value);
// VCVTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndWidenToUInt32(value, mode);
// VCVTTPS2IBS xmm1{k1}{z}, xmm2/m128/m32bcst
public static Vector128<int> ConvertToByteWithTruncationSaturationAndWidenToInt32(Vector128<float> value) => ConvertToByteWithTruncationSaturationAndWidenToInt32(value);
// VCVTTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {sae}
public static Vector256<int> ConvertToByteWithTruncationSaturationAndWidenToInt32(Vector256<float> value) => ConvertToVector256SByteWithTruncationSaturation(value);
// VCVTTPS2IUBS xmm1{k1}{z}, xmm2/m128/m32bcst
public static Vector128<uint> ConvertToByteWithTruncationSaturationAndWidenToUInt32(Vector128<float> value) => ConvertToByteWithTruncationSaturationAndWidenToUInt32(value);
// VCVTTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {sae}
public static Vector256<uint> ConvertToByteWithTruncationSaturationAndWidenToUInt32(Vector256<float> value) => ConvertToByteWithTruncationSaturationAndWidenToUInt32(value);
// VMOVD xmm1, xmm2/m32
public static Vector128<uint> ConvertToVector128UInt32(Vector128<uint> value) => ConvertToVector128UInt32(value);
// VMOVW xmm1, xmm2/m16
public static Vector128<ushort> ConvertToVector128UInt16(Vector128<ushort> value) => ConvertToVector128UInt16(value);
//The below instructions are those where
//embedded rouding support have been added
//to the existing API
// VCVTDQ2PS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<float> ConvertToVector256Single(Vector256<int> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
// VCVTPD2DQ xmm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector128<int> ConvertToVector128Int32(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Int32(value, mode);
// VCVTPD2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector128<float> ConvertToVector128Single(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);
// VCVTPD2QQ ymm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector256<long> ConvertToVector256Int64(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int64(value, mode);
// VCVTPD2UDQ xmm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector128<uint> ConvertToVector128UInt32(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128UInt32(value, mode);
// VCVTPD2UQQ ymm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector256<ulong> ConvertToVector256UInt64(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt64(value, mode);
// VCVTPS2DQ ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<int> ConvertToVector256Int32(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int32(value, mode);
// VCVTPS2QQ ymm1{k1}{z}, xmm2/m128/m32bcst {er}
public static Vector256<long> ConvertToVector256Int64(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int64(value, mode);
// VCVTPS2UDQ ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<uint> ConvertToVector256UInt32(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt32(value, mode);
// VCVTPS2UQQ ymm1{k1}{z}, xmm2/m128/m32bcst {er}
public static Vector256<ulong> ConvertToVector256UInt64(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt64(value, mode);
// VCVTQQ2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector128<float> ConvertToVector128Single(Vector256<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);
// VCVTQQ2PD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector256<double> ConvertToVector256Double(Vector256<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Double(value, mode);
// VCVTUDQ2PS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<float> ConvertToVector256Single(Vector256<uint> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
// VCVTUQQ2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector128<float> ConvertToVector128Single(Vector256<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);
// VCVTUQQ2PD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector256<double> ConvertToVector256Double(Vector256<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Double(value, mode);
// VMULPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
public static Vector256<double> Multiply(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Multiply(left, right, mode);
// VMULPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
public static Vector256<float> Multiply(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Multiply(left, right, mode);
// VSCALEFPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
public static Vector256<double> Scale(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Scale(left, right, mode);
// VSCALEFPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
public static Vector256<float> Scale(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Scale(left, right, mode);
// VSQRTPD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
public static Vector256<double> Sqrt(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Sqrt(value, mode);
// VSQRTPS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
public static Vector256<float> Sqrt(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Sqrt(value, mode);
// VSUBPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
public static Vector256<double> Subtract(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Subtract(left, right, mode);
// VSUBPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
public static Vector256<float> Subtract(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Subtract(left, right, mode);
[Intrinsic]
public new abstract class X64 : Avx10v1.X64
{
internal X64() { }
public static new bool IsSupported { get => IsSupported; }
}
[Intrinsic]
public abstract class V512 : Avx10v1.V512
{
internal V512() { }
public static new bool IsSupported { get => IsSupported; }
// VMINMAXPD zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst {sae}, imm8
public static Vector512<double> MinMax(Vector512<double> left, Vector512<double> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
// VMINMAXPS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst {sae}, imm8
public static Vector512<float> MinMax(Vector512<float> left, Vector512<float> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
// VCVTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
public static Vector512<int> ConvertToByteWithSaturationAndWidenToInt32(Vector512<float> value) => ConvertToByteWithSaturationAndWidenToInt32(value);
// VCVTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
public static Vector512<int> ConvertToByteWithSaturationAndWidenToInt32(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndWidenToInt32(value, mode);
// VCVTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
public static Vector512<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector512<float> value) => ConvertToByteWithSaturationAndWidenToUInt32(value);
// VCVTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
public static Vector512<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndWidenToUInt32(value, mode);
// VCVTTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {sae}
public static Vector512<int> ConvertToByteWithTruncationSaturationAndWidenToInt32(Vector512<float> value) => ConvertToByteWithTruncationSaturationAndWidenToInt32(value);
// VCVTTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {sae}
public static Vector512<uint> ConvertToByteWithTruncationSaturationAndWidenToUInt32(Vector512<float> value) => ConvertToByteWithTruncationSaturationAndWidenToUInt32(value);
// This is a 512 extension of previously existing 128/26 inrinsic
// VMPSADBW zmm1{k1}{z}, zmm2, zmm3/m512, imm8
public static Vector512<ushort> MultipleSumAbsoluteDifferences(Vector512<byte> left, Vector512<byte> right, [ConstantExpected] byte mask) => MultipleSumAbsoluteDifferences(left, right, mask);
[Intrinsic]
public new abstract class X64 : Avx10v1.V512.X64
{
internal X64() { }
public static new bool IsSupported { get => IsSupported; }
}
}
}
}
API Usage
Vector128<float> v1 = Vector512.Create((float)someParam1);
Vector128<float> v2 = Vector512.Create((float)someParam2);
if (Avx10v2.IsSupported()) {
Vector128<float> v3 = Avx10v2.MinMaxVector(v1, v2, 0b00000000);
// etc
}
Alternative Designs
No response
Risks
No response
Activity