Skip to content

[API Proposal]: Add AVX10v2 API to add Avx10.2 support #109083

Open
@DeepakRajendrakumaran

Description

Background and motivation

Intel has announced the features available in the next version of Avx10 (10.2). In order to support this, .NET needs to expand the Avx10 library to include the new APIs.

Avx10.2 spec. Section 7 - 14 in this spec goes over the newly added instructions. A couple of interesting features here are MinMax and saturating conversions

As part of the original API Proposal, the proposed design was for future Avx10 versions to have their own classes which inherits from Avx10v1

API Proposal

namespace System.Runtime.Intrinsics.X86
{
    /// <summary>Provides access to X86 AVX10.1 hardware instructions via intrinsics</summary>
    [Intrinsic]
    [CLSCompliant(false)]
    public abstract class Avx10v2 : Avx10v1
    {
        internal Avx10v2() { }

        public static new bool IsSupported { get => IsSupported; }

        // VMINMAXPD xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8
        public static Vector128<double> MinMax(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
        
        // VMINMAXPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {sae}, imm8
        public static Vector256<double> MinMax(Vector256<double> left, Vector256<double> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
        
        // VMINMAXPS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst, imm8
        public static Vector128<float> MinMax(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
        
        // VMINMAXPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {sae}, imm8
        public static Vector256<float> MinMax(Vector256<float> left, Vector256<float> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
        
        // VMINMAXSD xmm1{k1}{z}, xmm2, xmm3/m64 {sae}, imm8
        public static double MinMaxScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) => MinMaxScalar(left, right, mode);

        // VMINMAXSS xmm1{k1}{z}, xmm2, xmm3/m32 {sae}, imm8
        public static float MinMaxScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) => MinMaxScalar(left, right, mode);

        // VADDPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
        public static Vector256<double> Add(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Add(left, right, mode);

        // VADDPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
        public static Vector256<float> Add(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Add(left, right, mode);
                
        // VDIVPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
        public static Vector256<double> Divide(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Divide(left, right, mode);
        
        // VDIVPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
        public static Vector256<float> Divide(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Divide(left, right, mode);
        
        // VCVTPS2IBS xmm1{k1}{z}, xmm2/m128/m32bcst
        public static Vector128<int> ConvertToByteWithSaturationAndWidenToInt32(Vector128<float> value) => ConvertToByteWithSaturationAndWidenToInt32(value);
                
        // VCVTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<int> ConvertToByteWithSaturationAndWidenToInt32(Vector256<float> value) => ConvertToByteWithSaturationAndWidenToInt32(value);
        
        // VCVTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<int> ConvertToByteWithSaturationAndWidenToInt32(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndWidenToInt32(value, mode);
        
        // VCVTPS2IUBS xmm1{k1}{z}, xmm2/m128/m32bcst
        public static Vector128<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector128<float> value) => ConvertToByteWithSaturationAndWidenToUInt32(value);
                
        // VCVTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector256<float> value) => ConvertToByteWithSaturationAndWidenToUInt32(value);
        
        // VCVTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndWidenToUInt32(value, mode);

        // VCVTTPS2IBS xmm1{k1}{z}, xmm2/m128/m32bcst
        public static Vector128<int> ConvertToByteWithTruncationSaturationAndWidenToInt32(Vector128<float> value) => ConvertToByteWithTruncationSaturationAndWidenToInt32(value);
                
        // VCVTTPS2IBS ymm1{k1}{z}, ymm2/m256/m32bcst {sae}
        public static Vector256<int> ConvertToByteWithTruncationSaturationAndWidenToInt32(Vector256<float> value) => ConvertToVector256SByteWithTruncationSaturation(value);
                
        // VCVTTPS2IUBS xmm1{k1}{z}, xmm2/m128/m32bcst
        public static Vector128<uint> ConvertToByteWithTruncationSaturationAndWidenToUInt32(Vector128<float> value) => ConvertToByteWithTruncationSaturationAndWidenToUInt32(value);
                
        // VCVTTPS2IUBS ymm1{k1}{z}, ymm2/m256/m32bcst {sae}
        public static Vector256<uint> ConvertToByteWithTruncationSaturationAndWidenToUInt32(Vector256<float> value) => ConvertToByteWithTruncationSaturationAndWidenToUInt32(value);
        
        // VMOVD xmm1, xmm2/m32
        public static Vector128<uint> ConvertToVector128UInt32(Vector128<uint> value) => ConvertToVector128UInt32(value);
        
        // VMOVW xmm1, xmm2/m16
        public static Vector128<ushort> ConvertToVector128UInt16(Vector128<ushort> value) => ConvertToVector128UInt16(value);
        
        //The below instructions are those where 
        //embedded rouding support have been added 
        //to the existing API

        // VCVTDQ2PS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<float> ConvertToVector256Single(Vector256<int> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
        
        // VCVTPD2DQ xmm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector128<int> ConvertToVector128Int32(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Int32(value, mode);

        // VCVTPD2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector128<float> ConvertToVector128Single(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);

        // VCVTPD2QQ ymm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector256<long> ConvertToVector256Int64(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int64(value, mode);

        // VCVTPD2UDQ xmm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector128<uint> ConvertToVector128UInt32(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128UInt32(value, mode);

        // VCVTPD2UQQ ymm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector256<ulong> ConvertToVector256UInt64(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt64(value, mode);

        // VCVTPS2DQ ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<int> ConvertToVector256Int32(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int32(value, mode);

        // VCVTPS2QQ ymm1{k1}{z}, xmm2/m128/m32bcst {er}
        public static Vector256<long> ConvertToVector256Int64(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Int64(value, mode);

        // VCVTPS2UDQ ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<uint> ConvertToVector256UInt32(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt32(value, mode);

        // VCVTPS2UQQ ymm1{k1}{z}, xmm2/m128/m32bcst {er}
        public static Vector256<ulong> ConvertToVector256UInt64(Vector128<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256UInt64(value, mode);

        // VCVTQQ2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector128<float> ConvertToVector128Single(Vector256<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);
        
        // VCVTQQ2PD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector256<double> ConvertToVector256Double(Vector256<ulong> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Double(value, mode);
        
        // VCVTUDQ2PS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<float> ConvertToVector256Single(Vector256<uint> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Single(value, mode);
        
        // VCVTUQQ2PS xmm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector128<float> ConvertToVector128Single(Vector256<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector128Single(value, mode);
        
        // VCVTUQQ2PD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector256<double> ConvertToVector256Double(Vector256<long> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToVector256Double(value, mode);
        
        // VMULPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
        public static Vector256<double> Multiply(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Multiply(left, right, mode);
        
        // VMULPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
        public static Vector256<float> Multiply(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Multiply(left, right, mode);
        
        // VSCALEFPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
        public static Vector256<double> Scale(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Scale(left, right, mode);
        
        // VSCALEFPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
        public static Vector256<float> Scale(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Scale(left, right, mode);
        
        // VSQRTPD ymm1{k1}{z}, ymm2/m256/m64bcst {er}
        public static Vector256<double> Sqrt(Vector256<double> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Sqrt(value, mode);
        
        // VSQRTPS ymm1{k1}{z}, ymm2/m256/m32bcst {er}
        public static Vector256<float> Sqrt(Vector256<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Sqrt(value, mode);
        
        // VSUBPD ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst {er}
        public static Vector256<double> Subtract(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Subtract(left, right, mode);
        
        // VSUBPS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst {er}
        public static Vector256<float> Subtract(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => Subtract(left, right, mode);
        
        [Intrinsic]
        public new abstract class X64 : Avx10v1.X64
        {
            internal X64() { }

            public static new bool IsSupported { get => IsSupported; }
        }

        [Intrinsic]
        public abstract class V512 : Avx10v1.V512
        {
            internal V512() { }

            public static new bool IsSupported { get => IsSupported; }
    
            // VMINMAXPD zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst {sae}, imm8
            public static Vector512<double> MinMax(Vector512<double> left, Vector512<double> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
            
            // VMINMAXPS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst {sae}, imm8
            public static Vector512<float> MinMax(Vector512<float> left, Vector512<float> right, [ConstantExpected] byte control) => MinMax(left, right, mode);
            
            // VCVTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
            public static Vector512<int> ConvertToByteWithSaturationAndWidenToInt32(Vector512<float> value) => ConvertToByteWithSaturationAndWidenToInt32(value);
            
            // VCVTPS2IBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
            public static Vector512<int> ConvertToByteWithSaturationAndWidenToInt32(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndWidenToInt32(value, mode);
            
            // VCVTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
            public static Vector512<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector512<float> value) => ConvertToByteWithSaturationAndWidenToUInt32(value);
            
            // VCVTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {er}
            public static Vector512<uint> ConvertToByteWithSaturationAndWidenToUInt32(Vector512<float> value, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => ConvertToByteWithSaturationAndWidenToUInt32(value, mode);

            // VCVTTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {sae}
            public static Vector512<int> ConvertToByteWithTruncationSaturationAndWidenToInt32(Vector512<float> value) => ConvertToByteWithTruncationSaturationAndWidenToInt32(value);
                        
            // VCVTTPS2IUBS zmm1{k1}{z}, zmm2/m512/m32bcst {sae}
            public static Vector512<uint> ConvertToByteWithTruncationSaturationAndWidenToUInt32(Vector512<float> value) => ConvertToByteWithTruncationSaturationAndWidenToUInt32(value);
            
            // This is a 512 extension of previously existing 128/26 inrinsic
            // VMPSADBW zmm1{k1}{z}, zmm2, zmm3/m512, imm8
            public static Vector512<ushort> MultipleSumAbsoluteDifferences(Vector512<byte> left, Vector512<byte> right, [ConstantExpected] byte mask) => MultipleSumAbsoluteDifferences(left, right, mask);

            [Intrinsic]
            public new abstract class X64 : Avx10v1.V512.X64
            {
                internal X64() { }

                public static new bool IsSupported { get => IsSupported; }
            }
        }
    }
}

API Usage

Vector128<float> v1 = Vector512.Create((float)someParam1);
Vector128<float> v2 = Vector512.Create((float)someParam2);
if (Avx10v2.IsSupported()) {
  Vector128<float> v3 = Avx10v2.MinMaxVector(v1, v2, 0b00000000);
  // etc
}

Alternative Designs

No response

Risks

No response

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions