Description
Background and Motivation
Over the past few releases of .NET Core
(now called just .NET
), we have released a set of APIs called hardware intrinsics
that provide access to platform/architecture specific functionality in order to allow developers the ability to accelerate their code. This started with x86/x64
support in .NET Core 3 and was extended with Arm64 support in .NET 5.
WASM
is also looking at providing SIMD support and currently has a nearly complete, but not yet approved, draft specification: https://github.com/WebAssembly/simd/blob/main/proposals/simd/SIMD.md. As such, we should look at adding the relevant APIs to support this platform and to eventually tie it into the "cross platform" helpers so we can trivially light up our existing code paths to also run on WASM
: #49397
-- This could be reviewed, approved, and implemented using the [RequiresPreviewFeatures]
attribute as most browsers have some experimental support for the feature already: https://github.com/WebAssembly/simd/blob/main/proposals/simd/ImplementationStatus.md. It would then become stable, with the relevant fixups, when the spec becomes finalized.
Proposed API
namespace System.Runtime.Intrinsics.Wasm
{
public abstract class WasmBase
{
public bool IsSupported { get; }
// Constructing SIMD Values
public static Vector128<T> Constant(ImmByte[16] imm);
public static Vector128<sbyte> Splat(int x);
public static Vector128<byte> Splat(uint x);
public static Vector128<short> Splat(int x);
public static Vector128<ushort> Splat(uint x);
public static Vector128<int> Splat(int x);
public static Vector128<uint> Splat(uint x);
public static Vector128<long> Splat(long x);
public static Vector128<ulong> Splat(ulong x);
public static Vector128<float> Splat(float x);
public static Vector128<double> Splat(double x);
public static Vector128<nint> Splat(nint x);
public static Vector128<nuint> Splat(nuint x);
// Accessing lanes
public static int ExtractLane(Vector128<sbyte> a, byte imm); // takes ImmLaneIdx16
public static uint ExtractLane(Vector128<byte> a, byte imm); // takes ImmLaneIdx16
public static int ExtractLane(Vector128<short> a, byte imm); // takes ImmLaneIdx8
public static uint ExtractLane(Vector128<ushort> a, byte imm); // takes ImmLaneIdx8
public static int ExtractLane(Vector128<int> a, byte imm); // takes ImmLaneIdx4
public static uint ExtractLane(Vector128<uint> a, byte imm); // takes ImmLaneIdx4
public static long ExtractLane(Vector128<long> a, byte imm); // takes ImmLaneIdx2
public static ulong ExtractLane(Vector128<ulong> a, byte imm); // takes ImmLaneIdx2
public static float ExtractLane(Vector128<float> a, byte imm); // takes ImmLaneIdx4
public static double ExtractLane(Vector128<double> a, byte imm); // takes ImmLaneIdx2
public static nint ExtractLane(Vector128<nint> a, byte imm);
public static nuint ExtractLane(Vector128<nuint> a, byte imm);
public static Vector128<sbyte> ReplaceLane(Vector128<sbyte> a, byte imm, int x); // takes ImmLaneIdx16
public static Vector128<byte> ReplaceLane(Vector128<byte> a, byte imm, uint x); // takes ImmLaneIdx16
public static Vector128<short> ReplaceLane(Vector128<short> a, byte imm, int x); // takes ImmLaneIdx8
public static Vector128<ushort> ReplaceLane(Vector128<ushort> a, byte imm, uint x); // takes ImmLaneIdx8
public static Vector128<int> ReplaceLane(Vector128<int> a, byte imm, int x); // takes ImmLaneIdx4
public static Vector128<int> ReplaceLane(Vector128<uint> a, byte imm, uint x); // takes ImmLaneIdx4
public static Vector128<long> ReplaceLane(Vector128<long> a, byte imm, long x); // takes ImmLaneIdx2
public static Vector128<ulong> ReplaceLane(Vector128<ulong> a, byte imm, ulong x); // takes ImmLaneIdx2
public static Vector128<float> ReplaceLane(Vector128<float> a, byte imm, float x); // takes ImmLaneIdx4
public static Vector128<double> ReplaceLane(Vector128<double> a, byte imm, double x); // takes ImmLaneIdx2
public static Vector128<nint> ReplaceLane(Vector128<nint> a, byte imm, nint x);
public static Vector128<nuint> ReplaceLane(Vector128<nuint> a, byte imm, nuint x);
public static Vector128<sbyte> Shuffle(Vector128<sbyte> a, Vector128<sbyte> b, ImmLaneIdx32[16] imm);
public static Vector128<byte> Shuffle(Vector128<byte> a, Vector128<byte> b, ImmLaneIdx32[16] imm);
public static Vector128<sbyte> Swizzle(Vector128<sbyte> a, Vector128<sbyte> s);
public static Vector128<byte> Swizzle(Vector128<byte> a, Vector128<byte> s);
// Integer arithmetic
public static Vector128<sbyte> Add(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> Add(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> Add(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> Add(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> Add(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> Add(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> Add(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> Add(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<nint> Add(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> Add(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> Subtract(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> Subtract(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> Subtract(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> Subtract(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> Subtract(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> Subtract(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> Subtract(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> Subtract(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<nint> Subtract(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> Subtract(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<short> Multiply(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> Multiply(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> Multiply(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> Multiply(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> Multiply(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> Multiply(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<nint> Multiply(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> Multiply(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<int> Dot(Vector128<short> a, Vector128<short> b);
public static Vector128<sbyte> Negate(Vector128<sbyte> a);
public static Vector128<byte> Negate(Vector128<byte> a);
public static Vector128<short> Negate(Vector128<short> a);
public static Vector128<ushort> Negate(Vector128<ushort> a);
public static Vector128<int> Negate(Vector128<int> a);
public static Vector128<uint> Negate(Vector128<uint> a);
public static Vector128<long> Negate(Vector128<long> a);
public static Vector128<ulong> Negate(Vector128<ulong> a);
public static Vector128<nint> Negate(Vector128<nint> a);
public static Vector128<nuint> Negate(Vector128<nuint> a);
// Extended integer arithmetic
public static Vector128<short> MultiplyWideningLower(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<ushort> MultiplyWideningLower(Vector128<byte> a, Vector128<byte> b);
public static Vector128<int> MultiplyWideningLower(Vector128<short> a, Vector128<short> b);
public static Vector128<uint> MultiplyWideningLower(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<long> MultiplyWideningLower(Vector128<int> a, Vector128<int> b);
public static Vector128<ulong> MultiplyWideningLower(Vector128<uint> a, Vector128<uint> b);
public static Vector128<short> MultiplyWideningUpper(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<ushort> MultiplyWideningUpper(Vector128<byte> a, Vector128<byte> b);
public static Vector128<int> MultiplyWideningUpper(Vector128<short> a, Vector128<short> b);
public static Vector128<uint> MultiplyWideningUpper(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<long> MultiplyWideningUpper(Vector128<int> a, Vector128<int> b);
public static Vector128<ulong> MultiplyWideningUpper(Vector128<uint> a, Vector128<uint> b);
public static Vector128<short> AddPairwiseWidening(Vector128<sbyte> a);
public static Vector128<ushort> AddPairwiseWidening(Vector128<byte> a);
public static Vector128<int> AddPairwiseWidening(Vector128<short> a);
public static Vector128<uint> AddPairwiseWidening(Vector128<ushort> a);
// Saturating integer arithmetic
public static Vector128<sbyte> AddSaturate(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> AddSaturate(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> AddSaturate(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> AddSaturate(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<sbyte> SubtractSaturate(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> SubtractSaturate(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> SubtractSaturate(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> SubtractSaturate(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<short> MultiplyRoundedSaturateQ15(Vector128<short> a, Vector128<short> b);
public static Vector128<sbyte> Min(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> Min(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> Min(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> Min(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> Min(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> Min(Vector128<uint> a, Vector128<uint> b);
public static Vector128<sbyte> Max(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> Max(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> Max(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> Max(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> Max(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> Max(Vector128<uint> a, Vector128<uint> b);
public static Vector128<byte> AverageRounded(Vector128<byte> a, Vector128<byte> b);
public static Vector128<ushort> AverageRounded(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<sbyte> Abs(Vector128<sbyte> a);
public static Vector128<short> Abs(Vector128<short> a);
public static Vector128<int> Abs(Vector128<int> a);
public static Vector128<long> Abs(Vector128<long> a);
public static Vector128<nint> Abs(Vector128<nint> a);
// Bit shifts
public static Vector128<sbyte> ShiftLeft(Vector128<sbyte> a, int y);
public static Vector128<byte> ShiftLeft(Vector128<byte> a, int y);
public static Vector128<short> ShiftLeft(Vector128<short> a, int y);
public static Vector128<ushort> ShiftLeft(Vector128<ushort> a, int y);
public static Vector128<int> ShiftLeft(Vector128<int> a, int y);
public static Vector128<uint> ShiftLeft(Vector128<uint> a, int y);
public static Vector128<long> ShiftLeft(Vector128<long> a, int y);
public static Vector128<ulong> ShiftLeft(Vector128<ulong> a, int y);
public static Vector128<nint> ShiftLeft(Vector128<nint> a, int y);
public static Vector128<nuint> ShiftLeft(Vector128<nuint> a, int y);
public static Vector128<sbyte> ShiftRightArithmetic(Vector128<sbyte> a, int y);
public static Vector128<byte> ShiftRightArithmetic(Vector128<byte> a, int y);
public static Vector128<short> ShiftRightArithmetic(Vector128<short> a, int y);
public static Vector128<ushort> ShiftRightArithmetic(Vector128<ushort> a, int y);
public static Vector128<int> ShiftRightArithmetic(Vector128<int> a, int y);
public static Vector128<uint> ShiftRightArithmetic(Vector128<uint> a, int y);
public static Vector128<long> ShiftRightArithmetic(Vector128<long> a, int y);
public static Vector128<ulong> ShiftRightArithmetic(Vector128<ulong> a, int y);
public static Vector128<nint> ShiftRightArithmetic(Vector128<nint> a, int y);
public static Vector128<nuint> ShiftRightArithmetic(Vector128<nuint> a, int y);
public static Vector128<sbyte> ShiftRightLogical(Vector128<sbyte> a, int y);
public static Vector128<byte> ShiftRightLogical(Vector128<byte> a, int y);
public static Vector128<short> ShiftRightLogical(Vector128<short> a, int y);
public static Vector128<ushort> ShiftRightLogical(Vector128<ushort> a, int y);
public static Vector128<int> ShiftRightLogical(Vector128<int> a, int y);
public static Vector128<uint> ShiftRightLogical(Vector128<uint> a, int y);
public static Vector128<long> ShiftRightLogical(Vector128<long> a, int y);
public static Vector128<ulong> ShiftRightLogical(Vector128<ulong> a, int y);
public static Vector128<nint> ShiftRightLogical(Vector128<nint> a, int y);
public static Vector128<nuint> ShiftRightLogical(Vector128<nuint> a, int y);
// Bitwise operations
public static Vector128<sbyte> And(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> And(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> And(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> And(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> And(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> And(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> And(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> And(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> And(Vector128<float> a, Vector128<float> b);
public static Vector128<double> And(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> And(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> And(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> Or(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> Or(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> Or(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> Or(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> Or(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> Or(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> Or(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> Or(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> Or(Vector128<float> a, Vector128<float> b);
public static Vector128<double> Or(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> Or(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> Or(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> Xor(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> Xor(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> Xor(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> Xor(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> Xor(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> Xor(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> Xor(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> Xor(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> Xor(Vector128<float> a, Vector128<float> b);
public static Vector128<double> Xor(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> Xor(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> Xor(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> Not(Vector128<sbyte> a);
public static Vector128<byte> Not(Vector128<byte> a);
public static Vector128<short> Not(Vector128<short> a);
public static Vector128<ushort> Not(Vector128<ushort> a);
public static Vector128<int> Not(Vector128<int> a);
public static Vector128<uint> Not(Vector128<uint> a);
public static Vector128<long> Not(Vector128<long> a);
public static Vector128<ulong> Not(Vector128<ulong> a);
public static Vector128<float> Not(Vector128<float> a);
public static Vector128<double> Not(Vector128<double> a);
public static Vector128<nint> Not(Vector128<nint> a);
public static Vector128<nuint> Not(Vector128<nuint> a);
public static Vector128<sbyte> AndNot(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> AndNot(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> AndNot(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> AndNot(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> AndNot(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> AndNot(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> AndNot(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> AndNot(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> AndNot(Vector128<float> a, Vector128<float> b);
public static Vector128<double> AndNot(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> AndNot(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> AndNot(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> BitwiseSelect(Vector128<sbyte> a, Vector128<sbyte> b, Vector128<sbyte> c);
public static Vector128<byte> BitwiseSelect(Vector128<byte> a, Vector128<byte> b, Vector128<byte> c);
public static Vector128<short> BitwiseSelect(Vector128<short> a, Vector128<short> b, Vector128<short> c);
public static Vector128<ushort> BitwiseSelect(Vector128<ushort> a, Vector128<ushort> b, Vector128<ushort> c);
public static Vector128<int> BitwiseSelect(Vector128<int> a, Vector128<int> b, Vector128<int> c);
public static Vector128<uint> BitwiseSelect(Vector128<uint> a, Vector128<uint> b, Vector128<uint> c);
public static Vector128<long> BitwiseSelect(Vector128<long> a, Vector128<long> b, Vector128<long> c);
public static Vector128<ulong> BitwiseSelect(Vector128<ulong> a, Vector128<ulong> b, Vector128<ulong> c);
public static Vector128<float> BitwiseSelect(Vector128<float> a, Vector128<float> b, Vector128<float> c);
public static Vector128<double> BitwiseSelect(Vector128<double> a, Vector128<double> b, Vector128<double> c);
public static Vector128<nint> BitwiseSelect(Vector128<nint> a, Vector128<nint> b, Vector128<nint> c);
public static Vector128<nuint> BitwiseSelect(Vector128<nuint> a, Vector128<nuint> b, Vector128<nuint> c);
public static Vector128<byte> PopCount(Vector128<byte> v);
// Boolean horizontal reductions
public static bool AnyTrue(Vector128<sbyte> a); // returns i32, AnyBitSet?
public static bool AnyTrue(Vector128<byte> a); // returns i32
public static bool AnyTrue(Vector128<short> a); // returns i32
public static bool AnyTrue(Vector128<ushort> a); // returns i32
public static bool AnyTrue(Vector128<int> a); // returns i32
public static bool AnyTrue(Vector128<uint> a); // returns i32
public static bool AnyTrue(Vector128<long> a); // returns i32
public static bool AnyTrue(Vector128<ulong> a); // returns i32
public static bool AnyTrue(Vector128<float> a); // returns i32
public static bool AnyTrue(Vector128<double> a); // returns i32
public static bool AnyTrue(Vector128<nint> a); // returns i32
public static bool AnyTrue(Vector128<nuint> a); // returns i32
public static bool AllTrue(Vector128<sbyte> a); // returns i32, AreAllNonZero?
public static bool AllTrue(Vector128<byte> a); // returns i32
public static bool AllTrue(Vector128<short> a); // returns i32
public static bool AllTrue(Vector128<ushort> a); // returns i32
public static bool AllTrue(Vector128<int> a); // returns i32
public static bool AllTrue(Vector128<uint> a); // returns i32
public static bool AllTrue(Vector128<long> a); // returns i32
public static bool AllTrue(Vector128<ulong> a); // returns i32
public static bool AllTrue(Vector128<nint> a); // returns i32
public static bool AllTrue(Vector128<nuint> a); // returns i32
// Bitmask extraction
public static int Bitmask(Vector128<sbyte> a);
public static int Bitmask(Vector128<byte> a);
public static int Bitmask(Vector128<short> a);
public static int Bitmask(Vector128<ushort> a);
public static int Bitmask(Vector128<int> a);
public static int Bitmask(Vector128<uint> a);
public static int Bitmask(Vector128<long> a);
public static int Bitmask(Vector128<ulong> a);
public static int Bitmask(Vector128<nint> a);
public static int Bitmask(Vector128<nuint> a);
// Comparisons
public static Vector128<sbyte> CompareEqual(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> CompareEqual(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> CompareEqual(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> CompareEqual(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> CompareEqual(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> CompareEqual(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> CompareEqual(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> CompareEqual(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> CompareEqual(Vector128<float> a, Vector128<float> b);
public static Vector128<double> CompareEqual(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> CompareEqual(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> CompareEqual(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> CompareNotEqual(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> CompareNotEqual(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> CompareNotEqual(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> CompareNotEqual(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> CompareNotEqual(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> CompareNotEqual(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> CompareNotEqual(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> CompareNotEqual(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> CompareNotEqual(Vector128<float> a, Vector128<float> b);
public static Vector128<double> CompareNotEqual(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> CompareNotEqual(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> CompareNotEqual(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> CompareLessThan(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> CompareLessThan(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> CompareLessThan(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> CompareLessThan(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> CompareLessThan(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> CompareLessThan(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> CompareLessThan(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> CompareLessThan(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> CompareLessThan(Vector128<float> a, Vector128<float> b);
public static Vector128<double> CompareLessThan(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> CompareLessThan(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> CompareLessThan(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> CompareLessThanOrEqual(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> CompareLessThanOrEqual(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> CompareLessThanOrEqual(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> CompareLessThanOrEqual(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> CompareLessThanOrEqual(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> CompareLessThanOrEqual(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> CompareLessThanOrEqual(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> CompareLessThanOrEqual(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> CompareLessThanOrEqual(Vector128<float> a, Vector128<float> b);
public static Vector128<double> CompareLessThanOrEqual(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> CompareLessThanOrEqual(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> CompareLessThanOrEqual(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> CompareGreaterThan(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> CompareGreaterThan(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> CompareGreaterThan(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> CompareGreaterThan(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> CompareGreaterThan(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> CompareGreaterThan(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> CompareGreaterThan(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> CompareGreaterThan(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> CompareGreaterThan(Vector128<float> a, Vector128<float> b);
public static Vector128<double> CompareGreaterThan(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> CompareGreaterThan(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> CompareGreaterThan(Vector128<nuint> a, Vector128<nuint> b);
public static Vector128<sbyte> CompareGreaterThanOrEqual(Vector128<sbyte> a, Vector128<sbyte> b);
public static Vector128<byte> CompareGreaterThanOrEqual(Vector128<byte> a, Vector128<byte> b);
public static Vector128<short> CompareGreaterThanOrEqual(Vector128<short> a, Vector128<short> b);
public static Vector128<ushort> CompareGreaterThanOrEqual(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<int> CompareGreaterThanOrEqual(Vector128<int> a, Vector128<int> b);
public static Vector128<uint> CompareGreaterThanOrEqual(Vector128<uint> a, Vector128<uint> b);
public static Vector128<long> CompareGreaterThanOrEqual(Vector128<long> a, Vector128<long> b);
public static Vector128<ulong> CompareGreaterThanOrEqual(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<float> CompareGreaterThanOrEqual(Vector128<float> a, Vector128<float> b);
public static Vector128<double> CompareGreaterThanOrEqual(Vector128<double> a, Vector128<double> b);
public static Vector128<nint> CompareGreaterThanOrEqual(Vector128<nint> a, Vector128<nint> b);
public static Vector128<nuint> CompareGreaterThanOrEqual(Vector128<nuint> a, Vector128<nuint> b);
// Load and store
public static Vector128<sbyte> LoadVector128(sbyte* memarg);
public static Vector128<byte> LoadVector128(byte* memarg);
public static Vector128<short> LoadVector128(short* memarg);
public static Vector128<ushort> LoadVector128(ushort* memarg);
public static Vector128<int> LoadVector128(int* memarg);
public static Vector128<uint> LoadVector128(uint* memarg);
public static Vector128<long> LoadVector128(long* memarg);
public static Vector128<ulong> LoadVector128(ulong* memarg);
public static Vector128<float> LoadVector128(float* memarg);
public static Vector128<double> LoadVector128(double* memarg);
public static Vector128<nint> LoadVector128(nint* memarg);
public static Vector128<nuint> LoadVector128(nuint* memarg);
public static Vector128<int> LoadScalarVector128(int* memarg);
public static Vector128<uint> LoadScalarVector128(uint* memarg);
public static Vector128<long> LoadScalarVector128(long* memarg);
public static Vector128<ulong> LoadScalarVector128(ulong* memarg);
public static Vector128<float> LoadScalarVector128(float* memarg);
public static Vector128<double> LoadScalarVector128(double* memarg);
public static Vector128<nint> LoadScalarVector128(nint* memarg);
public static Vector128<nuint> LoadScalarVector128(nuint* memarg);
public static Vector128<sbyte> LoadScalarAndSplatVector128(sbyte* memarg);
public static Vector128<byte> LoadScalarAndSplatVector128(byte* memarg);
public static Vector128<short> LoadScalarAndSplatVector128(short* memarg);
public static Vector128<ushort> LoadScalarAndSplatVector128(ushort* memarg);
public static Vector128<int> LoadScalarAndSplatVector128(int* memarg);
public static Vector128<uint> LoadScalarAndSplatVector128(uint* memarg);
public static Vector128<long> LoadScalarAndSplatVector128(long* memarg);
public static Vector128<ulong> LoadScalarAndSplatVector128(ulong* memarg);
public static Vector128<float> LoadScalarAndSplatVector128(float* memarg);
public static Vector128<double> LoadScalarAndSplatVector128(double* memarg);
public static Vector128<nint> LoadScalarAndSplatVector128(nint* memarg);
public static Vector128<nuint> LoadScalarAndSplatVector128(nuint* memarg);
public static Vector128<sbyte> LoadScalarAndInsert(sbyte* memarg, Vector128<sbyte> x, byte imm); // takes ImmLaneIdx16
public static Vector128<byte> LoadScalarAndInsert(byte* memarg, Vector128<byte> x, byte imm); // takes ImmLaneIdx16
public static Vector128<short> LoadScalarAndInsert(short* memarg, Vector128<short> x, byte imm); // takes ImmLaneIdx8
public static Vector128<ushort> LoadScalarAndInsert(ushort* memarg, Vector128<ushort> x, byte imm); // takes ImmLaneIdx8
public static Vector128<int> LoadScalarAndInsert(int* memarg, Vector128<int> x, byte imm); // takes ImmLaneIdx4
public static Vector128<uint> LoadScalarAndInsert(uint* memarg, Vector128<uint> x, byte imm); // takes ImmLaneIdx4
public static Vector128<long> LoadScalarAndInsert(long* memarg, Vector128<long> x, byte imm); // takes ImmLaneIdx2
public static Vector128<ulong> LoadScalarAndInsert(ulong* memarg, Vector128<ulong> x, byte imm); // takes ImmLaneIdx2
public static Vector128<float> LoadScalarAndInsert(float* memarg, Vector128<float> x, byte imm); // takes ImmLaneIdx4
public static Vector128<double> LoadScalarAndInsert(double* memarg, Vector128<double> x, byte imm); // takes ImmLaneIdx2
public static Vector128<nint> LoadScalarAndInsert(nint* memarg, Vector128<nint> x, byte imm);
public static Vector128<nuint> LoadScalarAndInsert(nuint* memarg, Vector128<nuint> x, byte imm);
public static Vector128<short> LoadWideningVector128(sbyte* memarg); // takes ImmLaneIdx16
public static Vector128<ushort> LoadWideningVector128(byte* memarg); // takes ImmLaneIdx16
public static Vector128<int> LoadWideningVector128(short* memarg); // takes ImmLaneIdx8
public static Vector128<uint> LoadWideningVector128(ushort* memarg); // takes ImmLaneIdx8
public static Vector128<long> LoadWideningVector128(int* memarg); // takes ImmLaneIdx4
public static Vector128<ulong> LoadWideningVector128(uint* memarg); // takes ImmLaneIdx4
public static void Store(sbyte* memarg, Vector128<sbyte> data);
public static void Store(byte* memarg, Vector128<byte> data);
public static void Store(short* memarg, Vector128<short> data);
public static void Store(ushort* memarg, Vector128<ushort> data);
public static void Store(int* memarg, Vector128<int> data);
public static void Store(uint* memarg, Vector128<uint> data);
public static void Store(long* memarg, Vector128<long> data);
public static void Store(ulong* memarg, Vector128<ulong> data);
public static void Store(float* memarg, Vector128<float> data);
public static void Store(double* memarg, Vector128<double> data);
public static void Store(nint* memarg, Vector128<nint> data);
public static void Store(nuint* memarg, Vector128<nuint> data);
public static void StoreSelectedScalar(sbyte* memarg, Vector128<sbyte> data, byte imm); // takes ImmLaneIdx16
public static void StoreSelectedScalar(byte* memarg, Vector128<byte> data, byte imm); // takes ImmLaneIdx16
public static void StoreSelectedScalar(short* memarg, Vector128<short> data, byte imm); // takes ImmLaneIdx8
public static void StoreSelectedScalar(ushort* memarg, Vector128<ushort> data, byte imm); // takes ImmLaneIdx8
public static void StoreSelectedScalar(int* memarg, Vector128<int> data, byte imm); // takes ImmLaneIdx4
public static void StoreSelectedScalar(uint* memarg, Vector128<uint> data, byte imm); // takes ImmLaneIdx4
public static void StoreSelectedScalar(long* memarg, Vector128<long> data, byte imm); // takes ImmLaneIdx2
public static void StoreSelectedScalar(ulong* memarg, Vector128<ulong> data, byte imm); // takes ImmLaneIdx2
public static void StoreSelectedScalar(float* memarg, Vector128<float> data, byte imm); // takes ImmLaneIdx4
public static void StoreSelectedScalar(double* memarg, Vector128<double> data, byte imm); // takes ImmLaneIdx2
public static void StoreSelectedScalar(nint* memarg, Vector128<nint> data, byte imm);
public static void StoreSelectedScalar(nuint* memarg, Vector128<nuint> data, byte imm);
// Floating-point sign bit operations
public static Vector128<float> Negate(Vector128<float> a);
public static Vector128<double> Negate(Vector128<double> a);
public static Vector128<float> Abs(Vector128<float> a);
public static Vector128<double> Abs(Vector128<double> a);
// Floating-point min and max
public static Vector128<float> Min(Vector128<float> a, Vector128<float> b);
public static Vector128<double> Min(Vector128<double> a, Vector128<double> b);
public static Vector128<float> Max(Vector128<float> a, Vector128<float> b);
public static Vector128<double> Max(Vector128<double> a, Vector128<double> b);
public static Vector128<float> PseudoMin(Vector128<float> a, Vector128<float> b);
public static Vector128<double> PseudoMin(Vector128<double> a, Vector128<double> b);
public static Vector128<float> PseudoMax(Vector128<float> a, Vector128<float> b);
public static Vector128<double> PseudoMax(Vector128<double> a, Vector128<double> b);
// Floating-point arithmetic
public static Vector128<float> Add(Vector128<float> a, Vector128<float> b);
public static Vector128<double> Add(Vector128<double> a, Vector128<double> b);
public static Vector128<float> Subtract(Vector128<float> a, Vector128<float> b);
public static Vector128<double> Subtract(Vector128<double> a, Vector128<double> b);
public static Vector128<float> Divide(Vector128<float> a, Vector128<float> b);
public static Vector128<double> Divide(Vector128<double> a, Vector128<double> b);
public static Vector128<float> Multiply(Vector128<float> a, Vector128<float> b);
public static Vector128<double> Multiply(Vector128<double> a, Vector128<double> b);
public static Vector128<float> Sqrt(Vector128<float> a);
public static Vector128<double> Sqrt(Vector128<double> a);
public static Vector128<float> Ceiling(Vector128<float> a);
public static Vector128<double> Ceiling(Vector128<double> a);
public static Vector128<float> Floor(Vector128<float> a);
public static Vector128<double> Floor(Vector128<double> a);
public static Vector128<float> Truncate(Vector128<float> a);
public static Vector128<double> Truncate(Vector128<double> a);
public static Vector128<float> RoundToNearest(Vector128<float> a, Vector128<float> b);
public static Vector128<double> RoundToNearest(Vector128<double> a, Vector128<double> b);
// Conversions
public static Vector128<float> ConvertToSingle(Vector128<int> a);
public static Vector128<float> ConvertToSingle(Vector128<uint> a);
public static Vector128<float> ConvertToSingle(Vector128<double> a);
public static Vector128<double> ConvertToDoubleLower(Vector128<int> a);
public static Vector128<double> ConvertToDoubleLower(Vector128<uint> a);
public static Vector128<double> ConvertToDoubleLower(Vector128<float> a);
public static Vector128<int> ConvertToInt32Saturate(Vector128<float> a);
public static Vector128<uint> ConvertToInt32Saturate(Vector128<float> a);
public static Vector128<int> ConvertToInt32Saturate(Vector128<double> a);
public static Vector128<uint> ConvertToInt32Saturate(Vector128<double> a);
public static Vector128<sbyte> ConvertNarrowingSaturate(Vector128<short> a, Vector128<short> b);
public static Vector128<byte> ConvertNarrowingSaturate(Vector128<ushort> a, Vector128<ushort> b);
public static Vector128<short> ConvertNarrowingSaturate(Vector128<int> a, Vector128<int> b);
public static Vector128<ushort> ConvertNarrowingSaturate(Vector128<uint> a, Vector128<uint> b);
public static Vector128<int> ConvertNarrowingSaturate(Vector128<long> a, Vector128<long> b);
public static Vector128<uint> ConvertNarrowingSaturate(Vector128<ulong> a, Vector128<ulong> b);
public static Vector128<short> SignExtendWideningLower(Vector128<sbyte> a);
public static Vector128<ushort> SignExtendWideningLower(Vector128<byte> a);
public static Vector128<int> SignExtendWideningLower(Vector128<short> a);
public static Vector128<uint> SignExtendWideningLower(Vector128<ushort> a);
public static Vector128<long> SignExtendWideningLower(Vector128<int> a);
public static Vector128<ulong> SignExtendWideningLower(Vector128<uint> a);
public static Vector128<short> SignExtendWideningUpper(Vector128<sbyte> a);
public static Vector128<ushort> SignExtendWideningUpper(Vector128<byte> a);
public static Vector128<int> SignExtendWideningUpper(Vector128<short> a);
public static Vector128<uint> SignExtendWideningUpper(Vector128<ushort> a);
public static Vector128<long> SignExtendWideningUpper(Vector128<int> a);
public static Vector128<ulong> SignExtendWideningUpper(Vector128<uint> a);
public static Vector128<short> ZeroExtendWideningLower(Vector128<sbyte> a);
public static Vector128<ushort> ZeroExtendWideningLower(Vector128<byte> a);
public static Vector128<int> ZeroExtendWideningLower(Vector128<short> a);
public static Vector128<uint> ZeroExtendWideningLower(Vector128<ushort> a);
public static Vector128<long> ZeroExtendWideningLower(Vector128<int> a);
public static Vector128<ulong> ZeroExtendWideningLower(Vector128<uint> a);
public static Vector128<short> ZeroExtendWideningUpper(Vector128<sbyte> a);
public static Vector128<ushort> ZeroExtendWideningUpper(Vector128<byte> a);
public static Vector128<int> ZeroExtendWideningUpper(Vector128<short> a);
public static Vector128<uint> ZeroExtendWideningUpper(Vector128<ushort> a);
public static Vector128<long> ZeroExtendWideningUpper(Vector128<int> a);
public static Vector128<ulong> ZeroExtendWideningUpper(Vector128<uint> a);
}
}