-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Open
Labels
api-suggestionEarly API idea and discussion, it is NOT ready for implementationEarly API idea and discussion, it is NOT ready for implementationarea-System.Runtime.Intrinsics
Milestone
Description
Background and motivation
There are approved and soon to be added AVX512-VBMI2 Compress & Expand intrinsics as part of new vector mask proposal. There is little reason if at all to not to add the left-over instructions from AVX512-VBMI2 instruction set.
Notice:
- LeftLower and RightUpeer versions don't exist and can be replaced by
lower << countandupper >> count, which produce the same result and the other operand is unused.
API Proposal
namespace System.Runtime.Intrinsics.X86
{
[Intrinsic]
public abstract class Avx512Vbmi2 : Avx512BW
{
public static new bool IsSupported { get => IsSupported; }
[Intrinsic]
public new abstract class X64 : Avx512BW.X64
{
public static new bool IsSupported { get => IsSupported; }
}
[Intrinsic]
public new abstract class VL : Avx512BW.VL
{
public static new bool IsSupported { get => IsSupported; }
/// <summary>
/// __m128i _mm512_shldi_epi16 (__m128i a, __m128i b, int imm8)
/// VPSHLDW xmm, xmm, xmm/m128, imm8
/// </summary>
public static Vector128<ushort> ConcatenateShiftLeftUpper(Vector128<ushort> upper, Vector128<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m256i _mm512_shldi_epi16 (__m256i a, __m256i b, int imm8)
/// VPSHLDW ymm, ymm, ymm/m256, imm8
/// </summary>
public static Vector256<ushort> ConcatenateShiftLeftUpper(Vector256<ushort> upper, Vector256<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m128i _mm512_shldi_epi32 (__m128i a, __m128i b, int imm8)
/// VPSHLDD xmm, xmm, xmm/m128, imm8
/// </summary>
public static Vector128<uint> ConcatenateShiftLeftUpper(Vector128<uint> upper, Vector128<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m256i _mm512_shldi_epi32 (__m256i a, __m256i b, int imm8)
/// VPSHLDD ymm, ymm, ymm/m256, imm8
/// </summary>
public static Vector256<uint> ConcatenateShiftLeftUpper(Vector256<uint> upper, Vector256<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m128i _mm512_shldi_epi64 (__m128i a, __m128i b, int imm8)
/// VPSHLDQ xmm, xmm, xmm/m128, imm8
/// </summary>
public static Vector128<ulong> ConcatenateShiftLeftUpper(Vector128<ulong> upper, Vector128<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m256i _mm512_shldi_epi64 (__m256i a, __m256i b, int imm8)
/// VPSHLDQ ymm, ymm, ymm/m256, imm8
/// </summary>
public static Vector256<ulong> ConcatenateShiftLeftUpper(Vector256<ulong> upper, Vector256<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m128i _mm512_shrdi_epi16 (__m128i a, __m128i b, int imm8)
/// VPSHRDW xmm, xmm, xmm/m128, imm8
/// </summary>
public static Vector128<ushort> ConcatenateShiftRightLower(Vector128<ushort> upper, Vector128<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m256i _mm512_shrdi_epi16 (__m256i a, __m256i b, int imm8)
/// VPSHRDW ymm, ymm, ymm/m256, imm8
/// </summary>
public static Vector256<ushort> ConcatenateShiftRightLower(Vector256<ushort> upper, Vector256<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m128i _mm512_shrdi_epi32 (__m128i a, __m128i b, int imm8)
/// VPSHRDD xmm, xmm, xmm/m128, imm8
/// </summary>
public static Vector128<uint> ConcatenateShiftRightLower(Vector128<uint> upper, Vector128<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m256i _mm512_shrdi_epi32 (__m256i a, __m256i b, int imm8)
/// VPSHRDD ymm, ymm, ymm/m256, imm8
/// </summary>
public static Vector256<uint> ConcatenateShiftRightLower(Vector256<uint> upper, Vector256<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m128i _mm512_shrdi_epi64 (__m128i a, __m128i b, int imm8)
/// VPSHRDQ xmm, xmm, xmm/m128, imm8
/// </summary>
public static Vector128<ulong> ConcatenateShiftRightLower(Vector128<ulong> upper, Vector128<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m256i _mm512_shrdi_epi64 (__m256i a, __m256i b, int imm8)
/// VPSHRDQ ymm, ymm, ymm/m256, imm8
/// </summary>
public static Vector256<ulong> ConcatenateShiftRightLower(Vector256<ulong> upper, Vector256<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m128i _mm512_shldv_epi16 (__m128i a, __m128i b, __m128i c)
/// VPSHLDVW xmm, xmm, xmm/m128
/// </summary>
public static Vector128<ushort> ConcatenateShiftLeftUpperVariable(Vector128<ushort> upper, Vector128<ushort> lower, Vector128<ushort> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m256i _mm512_shldv_epi16 (__m256i a, __m256i b, __m256i c)
/// VPSHLDVW ymm, ymm, ymm/m256
/// </summary>
public static Vector256<ushort> ConcatenateShiftLeftUpperVariable(Vector256<ushort> upper, Vector256<ushort> lower, Vector256<ushort> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m128i _mm512_shldv_epi32 (__m128i a, __m128i b, __m128i c)
/// VPSHLDVD xmm, xmm, xmm/m128
/// </summary>
public static Vector128<uint> ConcatenateShiftLeftUpperVariable(Vector128<uint> upper, Vector128<uint> lower, Vector128<uint> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m256i _mm512_shldv_epi32 (__m256i a, __m256i b, __m256i c)
/// VPSHLDVD ymm, ymm, ymm/m256
/// </summary>
public static Vector256<uint> ConcatenateShiftLeftUpperVariable(Vector256<uint> upper, Vector256<uint> lower, Vector256<uint> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m128i _mm512_shldv_epi64 (__m128i a, __m128i b, __m128i c)
/// VPSHLDVQ xmm, xmm, xmm/m128
/// </summary>
public static Vector128<ulong> ConcatenateShiftLeftUpperVariable(Vector128<ulong> upper, Vector128<ulong> lower, Vector128<ulong> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m256i _mm512_shldv_epi64 (__m256i a, __m256i b, __m256i c)
/// VPSHLDVQ ymm, ymm, ymm/m256
/// </summary>
public static Vector256<ulong> ConcatenateShiftLeftUpperVariable(Vector256<ulong> upper, Vector256<ulong> lower, Vector256<ulong> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m128i _mm512_shrdv_epi16 (__m128i a, __m128i b, __m128i c)
/// VPSHLDVW xmm, xmm, xmm/m128
/// </summary>
public static Vector128<ushort> ConcatenateShiftRightLowerVariable(Vector128<ushort> upper, Vector128<ushort> lower, Vector128<ushort> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
/// <summary>
/// __m256i _mm512_shrdv_epi16 (__m256i a, __m256i b, __m256i c)
/// VPSHLDVW ymm, ymm, ymm/m256
/// </summary>
public static Vector256<ushort> ConcatenateShiftRightLowerVariable(Vector256<ushort> upper, Vector256<ushort> lower, Vector256<ushort> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
/// <summary>
/// __m128i _mm512_shrdv_epi32 (__m128i a, __m128i b, __m128i c)
/// VPSHLDVD xmm, xmm, xmm/m128
/// </summary>
public static Vector128<uint> ConcatenateShiftRightLowerVariable(Vector128<uint> upper, Vector128<uint> lower, Vector128<uint> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
/// <summary>
/// __m256i _mm512_shrdv_epi32 (__m256i a, __m256i b, __m256i c)
/// VPSHLDVD ymm, ymm, ymm/m256
/// </summary>
public static Vector256<uint> ConcatenateShiftRightLowerVariable(Vector256<uint> upper, Vector256<uint> lower, Vector256<uint> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
/// <summary>
/// __m128i _mm512_shrdv_epi64 (__m128i a, __m128i b, __m128i c)
/// VPSHLDVQ xmm, xmm, xmm/m128
/// </summary>
public static Vector128<ulong> ConcatenateShiftRightLowerVariable(Vector128<ulong> upper, Vector128<ulong> lower, Vector128<ulong> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
/// <summary>
/// __m256i _mm512_shrdv_epi64 (__m256i a, __m256i b, __m256i c)
/// VPSHLDVQ ymm, ymm, ymm/m256
/// </summary>
public static Vector256<ulong> ConcatenateShiftRightLowerVariable(Vector256<ulong> upper, Vector256<ulong> lower, Vector256<ulong> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
}
/// <summary>
/// __m512i _mm512_shldi_epi16 (__m512i a, __m512i b, int imm8)
/// VPSHLDW zmm, zmm, zmm/m512, imm8
/// </summary>
public static Vector512<ushort> ConcatenateShiftLeftUpper(Vector512<ushort> upper, Vector512<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m512i _mm512_shldi_epi32 (__m512i a, __m512i b, int imm8)
/// VPSHLDD zmm, zmm, zmm/m512, imm8
/// </summary>
public static Vector512<uint> ConcatenateShiftLeftUpper(Vector512<uint> upper, Vector512<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m512i _mm512_shldi_epi64 (__m512i a, __m512i b, int imm8)
/// VPSHLDQ zmm, zmm, zmm/m512, imm8
/// </summary>
public static Vector512<ulong> ConcatenateShiftLeftUpper(Vector512<ulong> upper, Vector512<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftLeftUpper(upper, lower, count);
/// <summary>
/// __m512i _mm512_shrdi_epi16 (__m512i a, __m512i b, int imm8)
/// VPSHRDW zmm, zmm, zmm/m512, imm8
/// </summary>
public static Vector512<ushort> ConcatenateShiftRightLower(Vector512<ushort> upper, Vector512<ushort> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m512i _mm512_shrdi_epi32 (__m512i a, __m512i b, int imm8)
/// VPSHRDD zmm, zmm, zmm/m512, imm8
/// </summary>
public static Vector512<uint> ConcatenateShiftRightLower(Vector512<uint> upper, Vector512<uint> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m512i _mm512_shrdi_epi64 (__m512i a, __m512i b, int imm8)
/// VPSHRDQ zmm, zmm, zmm/m512, imm8
/// </summary>
public static Vector512<ulong> ConcatenateShiftRightLower(Vector512<ulong> upper, Vector512<ulong> lower, [ConstantExpected] byte count) => ConcatenateShiftRightLower(upper, lower, count);
/// <summary>
/// __m512i _mm512_shldv_epi16 (__m512i a, __m512i b, __m512i c)
/// VPSHLDVW zmm, zmm, zmm/m512
/// </summary>
public static Vector512<ushort> ConcatenateShiftLeftUpperVariable(Vector512<ushort> upper, Vector512<ushort> lower, Vector512<ushort> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m512i _mm512_shldv_epi32 (__m512i a, __m512i b, __m512i c)
/// VPSHLDVD zmm, zmm, zmm/m512
/// </summary>
public static Vector512<uint> ConcatenateShiftLeftUpperVariable(Vector512<uint> upper, Vector512<uint> lower, Vector512<uint> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m512i _mm512_shldv_epi64 (__m512i a, __m512i b, __m512i c)
/// VPSHLDVQ zmm, zmm, zmm/m512
/// </summary>
public static Vector512<ulong> ConcatenateShiftLeftUpperVariable(Vector512<ulong> upper, Vector512<ulong> lower, Vector512<ulong> count) => ConcatenateShiftLeftUpperVariable(upper, lower, count);
/// <summary>
/// __m512i _mm512_shrdv_epi16 (__m512i a, __m512i b, __m512i c)
/// VPSHLDVW zmm, zmm, zmm/m512
/// </summary>
public static Vector512<ushort> ConcatenateShiftRightLowerVariable(Vector512<ushort> upper, Vector512<ushort> lower, Vector512<ushort> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
/// <summary>
/// __m512i _mm512_shrdv_epi32 (__m512i a, __m512i b, __m512i c)
/// VPSHLDVD zmm, zmm, zmm/m512
/// </summary>
public static Vector512<uint> ConcatenateShiftRightLowerVariable(Vector512<uint> upper, Vector512<uint> lower, Vector512<uint> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
/// <summary>
/// __m512i _mm512_shrdv_epi64 (__m512i a, __m512i b, __m512i c)
/// VPSHLDVQ zmm, zmm, zmm/m512
/// </summary>
public static Vector512<ulong> ConcatenateShiftRightLowerVariable(Vector512<ulong> upper, Vector512<ulong> lower, Vector512<ulong> count) => ConcatenateShiftRightLowerVariable(upper, lower, count);
}
}API Usage
// Avx512Vbmi2.ConcatenateShift(LeftUpper/RightLower)(...) example
Vector512<ushort> some_data = GetData(), some_data2 = GetData();
// result consists of lower 8 bit part of ushort data in upper_data and upper 8 bit of ushort data in lower_data
// shift count can be changed to get count of lower bits of ushort data in upper_data and upper 16 - count bit of ushort data in lower_data
var result = Avx512Vbmi2.ConcatenateShiftLeftUpper(upper_data, lower_data, 8);Alternative Designs
N/A
Risks
N/A
MineCake147E, saucecontrol, PaulusParssinen and ovska
Metadata
Metadata
Assignees
Labels
api-suggestionEarly API idea and discussion, it is NOT ready for implementationEarly API idea and discussion, it is NOT ready for implementationarea-System.Runtime.Intrinsics