Skip to content

[API Proposal]: Expose remaining AVX512-VBMI2 hardware instructions #88946

@MadProbe

Description

@MadProbe

Background and motivation

There are approved and soon to be added AVX512-VBMI2 Compress & Expand intrinsics as part of new vector mask proposal. There is little reason if at all to not to add the left-over instructions from AVX512-VBMI2 instruction set.
Notice:

  1. LeftLower and RightUpeer versions don't exist and can be replaced by lower << count and upper >> count, which produce the same result and the other operand is unused.

API Proposal

namespace System.Runtime.Intrinsics.X86
{
    [Intrinsic]
    public abstract class Avx512Vbmi2 : Avx512BW
    {
        public static new bool IsSupported { get => IsSupported; }

        [Intrinsic]
        public new abstract class X64 : Avx512BW.X64
        {
            public static new bool IsSupported { get => IsSupported; }
        }

        [Intrinsic]
        public new abstract class VL : Avx512BW.VL
        {
            public static new bool IsSupported { get => IsSupported; }

            /// <summary>
            /// __m128i _mm512_shldi_epi16 (__m128i a, __m128i b, int imm8)
            /// VPSHLDW xmm, xmm, xmm/m128, imm8
            /// </summary>
            public static Vector128<ushort> ConcatenateShiftLeftUpper(Vector128<ushort> upper, Vector128<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shldi_epi16 (__m256i a, __m256i b, int imm8)
            /// VPSHLDW ymm, ymm, ymm/m256, imm8
            /// </summary>
            public static Vector256<ushort> ConcatenateShiftLeftUpper(Vector256<ushort> upper, Vector256<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shldi_epi32 (__m128i a, __m128i b, int imm8)
            /// VPSHLDD xmm, xmm, xmm/m128, imm8
            /// </summary>
            public static Vector128<uint> ConcatenateShiftLeftUpper(Vector128<uint> upper, Vector128<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shldi_epi32 (__m256i a, __m256i b, int imm8)
            /// VPSHLDD ymm, ymm, ymm/m256, imm8
            /// </summary>
            public static Vector256<uint> ConcatenateShiftLeftUpper(Vector256<uint> upper, Vector256<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shldi_epi64 (__m128i a, __m128i b, int imm8)
            /// VPSHLDQ xmm, xmm, xmm/m128, imm8
            /// </summary>
            public static Vector128<ulong> ConcatenateShiftLeftUpper(Vector128<ulong> upper, Vector128<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shldi_epi64 (__m256i a, __m256i b, int imm8)
            /// VPSHLDQ ymm, ymm, ymm/m256, imm8
            /// </summary>
            public static Vector256<ulong> ConcatenateShiftLeftUpper(Vector256<ulong> upper, Vector256<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shrdi_epi16 (__m128i a, __m128i b, int imm8)
            /// VPSHRDW xmm, xmm, xmm/m128, imm8
            /// </summary>
            public static Vector128<ushort> ConcatenateShiftRightLower(Vector128<ushort> upper, Vector128<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shrdi_epi16 (__m256i a, __m256i b, int imm8)
            /// VPSHRDW ymm, ymm, ymm/m256, imm8
            /// </summary>
            public static Vector256<ushort> ConcatenateShiftRightLower(Vector256<ushort> upper, Vector256<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shrdi_epi32 (__m128i a, __m128i b, int imm8)
            /// VPSHRDD xmm, xmm, xmm/m128, imm8
            /// </summary>
            public static Vector128<uint> ConcatenateShiftRightLower(Vector128<uint> upper, Vector128<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shrdi_epi32 (__m256i a, __m256i b, int imm8)
            /// VPSHRDD ymm, ymm, ymm/m256, imm8
            /// </summary>
            public static Vector256<uint> ConcatenateShiftRightLower(Vector256<uint> upper, Vector256<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shrdi_epi64 (__m128i a, __m128i b, int imm8)
            /// VPSHRDQ xmm, xmm, xmm/m128, imm8
            /// </summary>
            public static Vector128<ulong> ConcatenateShiftRightLower(Vector128<ulong> upper, Vector128<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shrdi_epi64 (__m256i a, __m256i b, int imm8)
            /// VPSHRDQ ymm, ymm, ymm/m256, imm8
            /// </summary>
            public static Vector256<ulong> ConcatenateShiftRightLower(Vector256<ulong> upper, Vector256<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shldv_epi16 (__m128i a, __m128i b, __m128i c)
            /// VPSHLDVW xmm, xmm, xmm/m128
            /// </summary>
            public static Vector128<ushort> ConcatenateShiftLeftUpperVariable(Vector128<ushort> upper, Vector128<ushort> lower, Vector128<ushort> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shldv_epi16 (__m256i a, __m256i b, __m256i c)
            /// VPSHLDVW ymm, ymm, ymm/m256
            /// </summary>
            public static Vector256<ushort> ConcatenateShiftLeftUpperVariable(Vector256<ushort> upper, Vector256<ushort> lower, Vector256<ushort> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shldv_epi32 (__m128i a, __m128i b, __m128i c)
            /// VPSHLDVD xmm, xmm, xmm/m128
            /// </summary>
            public static Vector128<uint> ConcatenateShiftLeftUpperVariable(Vector128<uint> upper, Vector128<uint> lower, Vector128<uint> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shldv_epi32 (__m256i a, __m256i b, __m256i c)
            /// VPSHLDVD ymm, ymm, ymm/m256
            /// </summary>
            public static Vector256<uint> ConcatenateShiftLeftUpperVariable(Vector256<uint> upper, Vector256<uint> lower, Vector256<uint> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shldv_epi64 (__m128i a, __m128i b, __m128i c)
            /// VPSHLDVQ xmm, xmm, xmm/m128
            /// </summary>
            public static Vector128<ulong> ConcatenateShiftLeftUpperVariable(Vector128<ulong> upper, Vector128<ulong> lower, Vector128<ulong> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shldv_epi64 (__m256i a, __m256i b, __m256i c)
            /// VPSHLDVQ ymm, ymm, ymm/m256
            /// </summary>
            public static Vector256<ulong> ConcatenateShiftLeftUpperVariable(Vector256<ulong> upper, Vector256<ulong> lower, Vector256<ulong> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shrdv_epi16 (__m128i a, __m128i b, __m128i c)
            /// VPSHLDVW xmm, xmm, xmm/m128
            /// </summary>
            public static Vector128<ushort> ConcatenateShiftRightLowerVariable(Vector128<ushort> upper, Vector128<ushort> lower, Vector128<ushort> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shrdv_epi16 (__m256i a, __m256i b, __m256i c)
            /// VPSHLDVW ymm, ymm, ymm/m256
            /// </summary>
            public static Vector256<ushort> ConcatenateShiftRightLowerVariable(Vector256<ushort> upper, Vector256<ushort> lower, Vector256<ushort> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shrdv_epi32 (__m128i a, __m128i b, __m128i c)
            /// VPSHLDVD xmm, xmm, xmm/m128
            /// </summary>
            public static Vector128<uint> ConcatenateShiftRightLowerVariable(Vector128<uint> upper, Vector128<uint> lower, Vector128<uint> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shrdv_epi32 (__m256i a, __m256i b, __m256i c)
            /// VPSHLDVD ymm, ymm, ymm/m256
            /// </summary>
            public static Vector256<uint> ConcatenateShiftRightLowerVariable(Vector256<uint> upper, Vector256<uint> lower, Vector256<uint> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);

            /// <summary>
            /// __m128i _mm512_shrdv_epi64 (__m128i a, __m128i b, __m128i c)
            /// VPSHLDVQ xmm, xmm, xmm/m128
            /// </summary>
            public static Vector128<ulong> ConcatenateShiftRightLowerVariable(Vector128<ulong> upper, Vector128<ulong> lower, Vector128<ulong> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);

            /// <summary>
            /// __m256i _mm512_shrdv_epi64 (__m256i a, __m256i b, __m256i c)
            /// VPSHLDVQ ymm, ymm, ymm/m256
            /// </summary>
            public static Vector256<ulong> ConcatenateShiftRightLowerVariable(Vector256<ulong> upper, Vector256<ulong> lower, Vector256<ulong> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
        }

        /// <summary>
        /// __m512i _mm512_shldi_epi16 (__m512i a, __m512i b, int imm8)
        /// VPSHLDW zmm, zmm, zmm/m512, imm8
        /// </summary>
        public static Vector512<ushort> ConcatenateShiftLeftUpper(Vector512<ushort> upper, Vector512<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shldi_epi32 (__m512i a, __m512i b, int imm8)
        /// VPSHLDD zmm, zmm, zmm/m512, imm8
        /// </summary>
        public static Vector512<uint> ConcatenateShiftLeftUpper(Vector512<uint> upper, Vector512<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shldi_epi64 (__m512i a, __m512i b, int imm8)
        /// VPSHLDQ zmm, zmm, zmm/m512, imm8
        /// </summary>
        public static Vector512<ulong> ConcatenateShiftLeftUpper(Vector512<ulong> upper, Vector512<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shrdi_epi16 (__m512i a, __m512i b, int imm8)
        /// VPSHRDW zmm, zmm, zmm/m512, imm8
        /// </summary>
        public static Vector512<ushort> ConcatenateShiftRightLower(Vector512<ushort> upper, Vector512<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shrdi_epi32 (__m512i a, __m512i b, int imm8)
        /// VPSHRDD zmm, zmm, zmm/m512, imm8
        /// </summary>
        public static Vector512<uint> ConcatenateShiftRightLower(Vector512<uint> upper, Vector512<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shrdi_epi64 (__m512i a, __m512i b, int imm8)
        /// VPSHRDQ zmm, zmm, zmm/m512, imm8
        /// </summary>
        public static Vector512<ulong> ConcatenateShiftRightLower(Vector512<ulong> upper, Vector512<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shldv_epi16 (__m512i a, __m512i b, __m512i c)
        /// VPSHLDVW zmm, zmm, zmm/m512
        /// </summary>
        public static Vector512<ushort> ConcatenateShiftLeftUpperVariable(Vector512<ushort> upper, Vector512<ushort> lower, Vector512<ushort> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shldv_epi32 (__m512i a, __m512i b, __m512i c)
        /// VPSHLDVD zmm, zmm, zmm/m512
        /// </summary>
        public static Vector512<uint> ConcatenateShiftLeftUpperVariable(Vector512<uint> upper, Vector512<uint> lower, Vector512<uint> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shldv_epi64 (__m512i a, __m512i b, __m512i c)
        /// VPSHLDVQ zmm, zmm, zmm/m512
        /// </summary>
        public static Vector512<ulong> ConcatenateShiftLeftUpperVariable(Vector512<ulong> upper, Vector512<ulong> lower, Vector512<ulong> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shrdv_epi16 (__m512i a, __m512i b, __m512i c)
        /// VPSHLDVW zmm, zmm, zmm/m512
        /// </summary>
        public static Vector512<ushort> ConcatenateShiftRightLowerVariable(Vector512<ushort> upper, Vector512<ushort> lower, Vector512<ushort> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shrdv_epi32 (__m512i a, __m512i b, __m512i c)
        /// VPSHLDVD zmm, zmm, zmm/m512
        /// </summary>
        public static Vector512<uint> ConcatenateShiftRightLowerVariable(Vector512<uint> upper, Vector512<uint> lower, Vector512<uint> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);

        /// <summary>
        /// __m512i _mm512_shrdv_epi64 (__m512i a, __m512i b, __m512i c)
        /// VPSHLDVQ zmm, zmm, zmm/m512
        /// </summary>
        public static Vector512<ulong> ConcatenateShiftRightLowerVariable(Vector512<ulong> upper, Vector512<ulong> lower, Vector512<ulong> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
    }
}

API Usage

// Avx512Vbmi2.ConcatenateShift(LeftUpper/RightLower)(...) example
Vector512<ushort> some_data = GetData(), some_data2 = GetData();
// result consists of lower 8 bit part of ushort data in upper_data and upper 8 bit of ushort data in lower_data
// shift count can be changed to get count of lower bits of ushort data in upper_data and upper 16 - count bit of ushort data in lower_data
var result = Avx512Vbmi2.ConcatenateShiftLeftUpper(upper_data, lower_data, 8);

Alternative Designs

N/A

Risks

N/A

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions