Skip to content

[API Proposal]: Add dot product intrinsics to AVX10v2 API #110032

Open
@DeepakRajendrakumaran

Description

Background and motivation

This is a follow up on #109083.

This proposal adds the VPDPB[SU,UIU,SS]D[,S] to other already approved AVX10v2 API

Avx10.2 spec. Section10 in this spec goes over these intrinsics

API Proposal

namespace System.Runtime.Intrinsics.X86
{
    /// <summary>Provides access to X86 AVX10.1 hardware instructions via intrinsics</summary>
    [Intrinsic]
    [CLSCompliant(false)]
    public abstract class Avx10v2 : Avx10v1
    {
        // VPDPBSSD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAdd(vector128<sbyte> left, Vector128<sbyte> right) => MultiplyWideningAndAdd(left, right,);

        // VPDPBSUD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAdd(vector128<sbyte> left, Vector128<byte> right) => MultiplyWideningAndAdd(left, right,);

        // VPDPBUUD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAdd(vector128<byte> left, Vector128<byte> right) => MultiplyWideningAndAdd(left, right);

        // VPDPBSSD ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAdd(Vector256<sbyte> left, Vector256<sbyte> right) => MultiplyWideningAndAdd(left, right);

        // VPDPBSUD ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAdd(Vector256<sbyte> left, Vector256<byte> right) => MultiplyWideningAndAdd(left, right);

        // VPDPBUUD ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAdd(Vector256<byte> left, Vector256<byte> right) => MultiplyWideningAndAdd(left, right);

        // VPDPBSSDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAddSaturate(vector128<sbyte> left, Vector128<sbyte> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPBSUDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAddSaturate(vector128<sbyte> left, Vector128<byte> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPBUUDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAddSaturate(vector128<byte> left, Vector128<byte> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPBSSDS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<sbyte> left, Vector256<sbyte> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPBSUDS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<sbyte> left, Vector256<byte> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPBUUDS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<byte> left, Vector256<byte> right) => MultiplyWideningAndAddSaturate(left, right, acc);

        // VPDPWSUD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAdd(vector128<short> left, Vector128<ushort> right) => MultiplyWideningAndAdd(left, right, acc);

        // VPDPWUSD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAdd(vector128<ushort> left, Vector128<short> right) => MultiplyWideningAndAdd(left, right);

        // VPDPWUUD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAdd(vector128<ushort> left, Vector128<ushort> right) => MultiplyWideningAndAdd(left, right);

        // VPDPWSUD ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAdd(Vector256<short> left, Vector256<ushort> right) => MultiplyWideningAndAdd(left, right);

        // VPDPWUSD ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAdd(Vector256<ushort> left, Vector256<short> right) => MultiplyWideningAndAdd(left, right);

        // VPDPWUUD ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAdd(Vector256<ushort> left, Vector256<ushort> right) => MultiplyWideningAndAdd(left, right);

        // VPDPWSUDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAddSaturate(vector128<short> left, Vector128<ushort> right) => MultiplyWideningAndAddSaturate(left, right, acc);

        // VPDPWUSDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAddSaturate(vector128<ushort> left, Vector128<short> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPWUUDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
        public static Vector128<int> MultiplyWideningAndAddSaturate(vector128<ushort> left, Vector128<ushort> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPWSUDS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<short> left, Vector256<ushort> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPWUSDS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<ushort> left, Vector256<short> right) => MultiplyWideningAndAddSaturate(left, right);

        // VPDPWUUDS ymm1{k1}{z}, ymm2, ymm3/m256/m32bcst
        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<ushort> left, Vector256<ushort> right) => MultiplyWideningAndAddSaturate(left, right);

        [Intrinsic]
        public abstract class V512 : Avx10v1.V512
        {   
            // VPDPWSUD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
            public static Vector512<int> MultiplyWideningAndAdd(Vector512<short> left, Vector512<ushort> right) => MultiplyWideningAndAdd(left, right);

            // VPDPWUSD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
            public static Vector512<int> MultiplyWideningAndAdd(Vector512<ushort> left, Vector512<short> right) => MultiplyWideningAndAdd(left, right);

            // VPDPWUUD xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
            public static Vector512<int> MultiplyWideningAndAdd(Vector512<ushort> left, Vector512<ushort> right) => MultiplyWideningAndAdd(left, right);

            // VPDPWSUDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<short> left, Vector512<short> right) => MultiplyWideningAndAddSaturate(left, right);

            // VPDPWUSDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<short> left, Vector512<ushort> right) => MultiplyWideningAndAddSaturate(left, right);

            // VPDPWUUDS xmm1{k1}{z}, xmm2, xmm3/m128/m32bcst
            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<ushort> left, Vector512<ushort> right) => MultiplyWideningAndAddSaturate(left, right);

            // VPDPBSSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
            public static Vector512<int> MultiplyWideningAndAdd(Vector512<sbyte> left, Vector512<sbyte> right) => MultiplyWideningAndAdd(left, right);

            // VPDPBSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
            public static Vector512<int> MultiplyWideningAndAdd(Vector512<sbyte> left, Vector512<byte> right) => MultiplyWideningAndAdd(left, right);

            // VPDPBUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
            public static Vector512<int> MultiplyWideningAndAdd(Vector512<byte> left, Vector512<byte> right) => MultiplyWideningAndAdd(left, right);

            // VPDPBSSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<sbyte> left, Vector512<sbyte> right) => MultiplyWideningAndAddSaturate(left, right);

            // VPDPBSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<sbyte> left, Vector512<byte> right) => MultiplyWideningAndAddSaturate(left, right);

            // VPDPBUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<byte> left, Vector512<byte> right) => MultiplyWideningAndAddSaturate(left, right);
        }
    }
}

API Usage

// Fancy the value
Vector128<byte> v1 = Vector512.Create((byte)someParam1);
Vector128<byte> v2 = Vector512.Create((byte)someParam2);
if (Avx10v2.IsSupported()) {
  Vector128<int> v3 = Avx10v2.MultiplyWideningAndAdd(v1, v2, 0b00000000);
  // etc
}

Alternative Designs

No response

Risks

No response

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    api-suggestionEarly API idea and discussion, it is NOT ready for implementationarea-System.Runtime.IntrinsicsuntriagedNew issue has not been triaged by the area owner

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions