Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 5c34bea

Browse files
FeiPengInteljkotas
authored andcommitted
Improve Intel hardware intrinsic APIs (dotnet/coreclr#17637)
* Improve Intel hardware intrinsic APIs * Simplify Avx.Extract non-const fallback Signed-off-by: dotnet-bot-corefx-mirror <dotnet-bot@microsoft.com>
1 parent 16f1fe2 commit 5c34bea

File tree

6 files changed

+25
-120
lines changed

6 files changed

+25
-120
lines changed

src/Common/src/CoreLib/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,6 @@ public static class Avx
234234
/// </summary>
235235
public static Vector256<float> DuplicateOddIndexed(Vector256<float> value) { throw new PlatformNotSupportedException(); }
236236

237-
/// <summary>
238-
/// __int8 _mm256_extract_epi8 (__m256i a, const int index)
239-
/// HELPER
240-
/// </summary>
241-
public static sbyte Extract(Vector256<sbyte> value, byte index) { throw new PlatformNotSupportedException(); }
242237
/// <summary>
243238
/// __int8 _mm256_extract_epi8 (__m256i a, const int index)
244239
/// HELPER
@@ -248,11 +243,6 @@ public static class Avx
248243
/// __int16 _mm256_extract_epi16 (__m256i a, const int index)
249244
/// HELPER
250245
/// </summary>
251-
public static short Extract(Vector256<short> value, byte index) { throw new PlatformNotSupportedException(); }
252-
/// <summary>
253-
/// __int16 _mm256_extract_epi16 (__m256i a, const int index)
254-
/// HELPER
255-
/// </summary>
256246
public static ushort Extract(Vector256<ushort> value, byte index) { throw new PlatformNotSupportedException(); }
257247
/// <summary>
258248
/// __int32 _mm256_extract_epi32 (__m256i a, const int index)
@@ -638,45 +628,45 @@ public static class Avx
638628
/// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
639629
/// VMASKMOVPS xmm, xmm, m128
640630
/// </summary>
641-
public static unsafe Vector128<float> MaskLoad(float* address, Vector128<uint> mask) { throw new PlatformNotSupportedException(); }
631+
public static unsafe Vector128<float> MaskLoad(float* address, Vector128<float> mask) { throw new PlatformNotSupportedException(); }
642632
/// <summary>
643633
/// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask)
644634
/// VMASKMOVPD xmm, xmm, m128
645635
/// </summary>
646-
public static unsafe Vector128<double> MaskLoad(double* address, Vector128<ulong> mask) { throw new PlatformNotSupportedException(); }
636+
public static unsafe Vector128<double> MaskLoad(double* address, Vector128<double> mask) { throw new PlatformNotSupportedException(); }
647637

648638
/// <summary>
649639
/// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
650640
/// VMASKMOVPS ymm, ymm, m256
651641
/// </summary>
652-
public static unsafe Vector256<float> MaskLoad(float* address, Vector256<uint> mask) { throw new PlatformNotSupportedException(); }
642+
public static unsafe Vector256<float> MaskLoad(float* address, Vector256<float> mask) { throw new PlatformNotSupportedException(); }
653643
/// <summary>
654644
/// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask)
655645
/// VMASKMOVPD ymm, ymm, m256
656646
/// </summary>
657-
public static unsafe Vector256<double> MaskLoad(double* address, Vector256<ulong> mask) { throw new PlatformNotSupportedException(); }
647+
public static unsafe Vector256<double> MaskLoad(double* address, Vector256<double> mask) { throw new PlatformNotSupportedException(); }
658648

659649
/// <summary>
660650
/// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
661651
/// VMASKMOVPS m128, xmm, xmm
662652
/// </summary>
663-
public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
653+
public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<float> source) { throw new PlatformNotSupportedException(); }
664654
/// <summary>
665655
/// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a)
666656
/// VMASKMOVPD m128, xmm, xmm
667657
/// </summary>
668-
public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
658+
public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<double> source) { throw new PlatformNotSupportedException(); }
669659

670660
/// <summary>
671661
/// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
672662
/// VMASKMOVPS m256, ymm, ymm
673663
/// </summary>
674-
public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<uint> source) { throw new PlatformNotSupportedException(); }
664+
public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<float> source) { throw new PlatformNotSupportedException(); }
675665
/// <summary>
676666
/// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a)
677667
/// VMASKMOVPD m256, ymm, ymm
678668
/// </summary>
679-
public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<ulong> source) { throw new PlatformNotSupportedException(); }
669+
public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<double> source) { throw new PlatformNotSupportedException(); }
680670

681671
/// <summary>
682672
/// __m256 _mm256_max_ps (__m256 a, __m256 b)

src/Common/src/CoreLib/System/Runtime/Intrinsics/X86/Avx.cs

Lines changed: 15 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
using System;
66
using System.Runtime.Intrinsics;
7-
using System.Runtime.CompilerServices;
7+
using Internal.Runtime.CompilerServices;
88

99
namespace System.Runtime.Intrinsics.X86
1010
{
@@ -235,64 +235,23 @@ public static class Avx
235235
/// </summary>
236236
public static Vector256<float> DuplicateOddIndexed(Vector256<float> value) => DuplicateOddIndexed(value);
237237

238-
/// <summary>
239-
/// __int8 _mm256_extract_epi8 (__m256i a, const int index)
240-
/// HELPER
241-
/// </summary>
242-
public static sbyte Extract(Vector256<sbyte> value, byte index)
243-
{
244-
unsafe
245-
{
246-
index &= 0x1F;
247-
sbyte* buffer = stackalloc sbyte[32];
248-
Store(buffer, value);
249-
return buffer[index];
250-
}
251-
}
252-
253238
/// <summary>
254239
/// __int8 _mm256_extract_epi8 (__m256i a, const int index)
255240
/// HELPER
256241
/// </summary>
257242
public static byte Extract(Vector256<byte> value, byte index)
258243
{
259-
unsafe
260-
{
261-
index &= 0x1F;
262-
byte* buffer = stackalloc byte[32];
263-
Store(buffer, value);
264-
return buffer[index];
265-
}
244+
return Unsafe.Add<byte>(ref Unsafe.As<Vector256<byte>, byte>(ref value), index & 0x1F);
266245
}
267246

268-
/// <summary>
269-
/// __int16 _mm256_extract_epi16 (__m256i a, const int index)
270-
/// HELPER
271-
/// </summary>
272-
public static short Extract(Vector256<short> value, byte index)
273-
{
274-
unsafe
275-
{
276-
index &= 0xF;
277-
short* buffer = stackalloc short[16];
278-
Store(buffer, value);
279-
return buffer[index];
280-
}
281-
}
282247

283248
/// <summary>
284249
/// __int16 _mm256_extract_epi16 (__m256i a, const int index)
285250
/// HELPER
286251
/// </summary>
287252
public static ushort Extract(Vector256<ushort> value, byte index)
288253
{
289-
unsafe
290-
{
291-
index &= 0xF;
292-
ushort* buffer = stackalloc ushort[16];
293-
Store(buffer, value);
294-
return buffer[index];
295-
}
254+
return Unsafe.Add<ushort>(ref Unsafe.As<Vector256<ushort>, ushort>(ref value), index & 0xF);
296255
}
297256

298257
/// <summary>
@@ -301,13 +260,7 @@ public static ushort Extract(Vector256<ushort> value, byte index)
301260
/// </summary>
302261
public static int Extract(Vector256<int> value, byte index)
303262
{
304-
unsafe
305-
{
306-
index &= 0x7;
307-
int* buffer = stackalloc int[8];
308-
Store(buffer, value);
309-
return buffer[index];
310-
}
263+
return Unsafe.Add<int>(ref Unsafe.As<Vector256<int>, int>(ref value), index & 0x7);
311264
}
312265

313266
/// <summary>
@@ -316,13 +269,7 @@ public static int Extract(Vector256<int> value, byte index)
316269
/// </summary>
317270
public static uint Extract(Vector256<uint> value, byte index)
318271
{
319-
unsafe
320-
{
321-
index &= 0x7;
322-
uint* buffer = stackalloc uint[8];
323-
Store(buffer, value);
324-
return buffer[index];
325-
}
272+
return Unsafe.Add<uint>(ref Unsafe.As<Vector256<uint>, uint>(ref value), index & 0x7);
326273
}
327274

328275
/// <summary>
@@ -335,13 +282,7 @@ public static long Extract(Vector256<long> value, byte index)
335282
{
336283
throw new PlatformNotSupportedException();
337284
}
338-
unsafe
339-
{
340-
index &= 0x3;
341-
long* buffer = stackalloc long[4];
342-
Store(buffer, value);
343-
return buffer[index];
344-
}
285+
return Unsafe.Add<long>(ref Unsafe.As<Vector256<long>, long>(ref value), index & 0x3);
345286
}
346287

347288
/// <summary>
@@ -354,13 +295,7 @@ public static ulong Extract(Vector256<ulong> value, byte index)
354295
{
355296
throw new PlatformNotSupportedException();
356297
}
357-
unsafe
358-
{
359-
index &= 0x3;
360-
ulong* buffer = stackalloc ulong[4];
361-
Store(buffer, value);
362-
return buffer[index];
363-
}
298+
return Unsafe.Add<ulong>(ref Unsafe.As<Vector256<ulong>, ulong>(ref value), index & 0x3);
364299
}
365300

366301
/// <summary>
@@ -825,45 +760,45 @@ public static Vector256<T> InsertVector128<T>(Vector256<T> value, Vector128<T> d
825760
/// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
826761
/// VMASKMOVPS xmm, xmm, m128
827762
/// </summary>
828-
public static unsafe Vector128<float> MaskLoad(float* address, Vector128<uint> mask) => MaskLoad(address, mask);
763+
public static unsafe Vector128<float> MaskLoad(float* address, Vector128<float> mask) => MaskLoad(address, mask);
829764
/// <summary>
830765
/// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask)
831766
/// VMASKMOVPD xmm, xmm, m128
832767
/// </summary>
833-
public static unsafe Vector128<double> MaskLoad(double* address, Vector128<ulong> mask) => MaskLoad(address, mask);
768+
public static unsafe Vector128<double> MaskLoad(double* address, Vector128<double> mask) => MaskLoad(address, mask);
834769

835770
/// <summary>
836771
/// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
837772
/// VMASKMOVPS ymm, ymm, m256
838773
/// </summary>
839-
public static unsafe Vector256<float> MaskLoad(float* address, Vector256<uint> mask) => MaskLoad(address, mask);
774+
public static unsafe Vector256<float> MaskLoad(float* address, Vector256<float> mask) => MaskLoad(address, mask);
840775
/// <summary>
841776
/// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask)
842777
/// VMASKMOVPD ymm, ymm, m256
843778
/// </summary>
844-
public static unsafe Vector256<double> MaskLoad(double* address, Vector256<ulong> mask) => MaskLoad(address, mask);
779+
public static unsafe Vector256<double> MaskLoad(double* address, Vector256<double> mask) => MaskLoad(address, mask);
845780

846781
/// <summary>
847782
/// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
848783
/// VMASKMOVPS m128, xmm, xmm
849784
/// </summary>
850-
public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<uint> source) => MaskStore(address, mask, source);
785+
public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<float> source) => MaskStore(address, mask, source);
851786
/// <summary>
852787
/// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a)
853788
/// VMASKMOVPD m128, xmm, xmm
854789
/// </summary>
855-
public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<ulong> source) => MaskStore(address, mask, source);
790+
public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<double> source) => MaskStore(address, mask, source);
856791

857792
/// <summary>
858793
/// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
859794
/// VMASKMOVPS m256, ymm, ymm
860795
/// </summary>
861-
public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<uint> source) => MaskStore(address, mask, source);
796+
public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<float> source) => MaskStore(address, mask, source);
862797
/// <summary>
863798
/// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a)
864799
/// VMASKMOVPD m256, ymm, ymm
865800
/// </summary>
866-
public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<ulong> source) => MaskStore(address, mask, source);
801+
public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<double> source) => MaskStore(address, mask, source);
867802

868803
/// <summary>
869804
/// __m256 _mm256_max_ps (__m256 a, __m256 b)

src/Common/src/CoreLib/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -609,11 +609,6 @@ public static class Sse2
609609
/// </summary>
610610
public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
611611

612-
/// <summary>
613-
/// int _mm_extract_epi16 (__m128i a, int immediate)
614-
/// PEXTRW reg, xmm, imm8
615-
/// </summary>
616-
public static short Extract(Vector128<short> value, byte index) { throw new PlatformNotSupportedException(); }
617612
/// <summary>
618613
/// int _mm_extract_epi16 (__m128i a, int immediate)
619614
/// PEXTRW reg, xmm, imm8

src/Common/src/CoreLib/System/Runtime/Intrinsics/X86/Sse2.cs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -610,11 +610,6 @@ public static class Sse2
610610
/// </summary>
611611
public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) => DivideScalar(left, right);
612612

613-
/// <summary>
614-
/// int _mm_extract_epi16 (__m128i a, int immediate)
615-
/// PEXTRW reg, xmm, imm8
616-
/// </summary>
617-
public static short Extract(Vector128<short> value, byte index) => Extract(value, index);
618613
/// <summary>
619614
/// int _mm_extract_epi16 (__m128i a, int immediate)
620615
/// PEXTRW reg, xmm, imm8

src/Common/src/CoreLib/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,6 @@ public static class Sse41
178178
/// </summary>
179179
public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, byte control) { throw new PlatformNotSupportedException(); }
180180

181-
/// <summary>
182-
/// int _mm_extract_epi8 (__m128i a, const int imm8)
183-
/// PEXTRB reg/m8, xmm, imm8
184-
/// </summary>
185-
public static sbyte Extract(Vector128<sbyte> value, byte index) { throw new PlatformNotSupportedException(); }
186181
/// <summary>
187182
/// int _mm_extract_epi8 (__m128i a, const int imm8)
188183
/// PEXTRB reg/m8, xmm, imm8
@@ -283,7 +278,7 @@ public static class Sse41
283278
/// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
284279
/// INSERTPS xmm, xmm/m32, imm8
285280
/// </summary>
286-
public static Vector128<float> Insert(Vector128<float> value, float data, byte index) { throw new PlatformNotSupportedException(); }
281+
public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, byte index) { throw new PlatformNotSupportedException(); }
287282

288283
/// <summary>
289284
/// __m128i _mm_max_epi8 (__m128i a, __m128i b)

src/Common/src/CoreLib/System/Runtime/Intrinsics/X86/Sse41.cs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,6 @@ public static class Sse41
178178
/// </summary>
179179
public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, byte control) => DotProduct(left, right, control);
180180

181-
/// <summary>
182-
/// int _mm_extract_epi8 (__m128i a, const int imm8)
183-
/// PEXTRB reg/m8, xmm, imm8
184-
/// </summary>
185-
public static sbyte Extract(Vector128<sbyte> value, byte index) => Extract(value, index);
186181
/// <summary>
187182
/// int _mm_extract_epi8 (__m128i a, const int imm8)
188183
/// PEXTRB reg/m8, xmm, imm8
@@ -283,7 +278,7 @@ public static class Sse41
283278
/// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
284279
/// INSERTPS xmm, xmm/m32, imm8
285280
/// </summary>
286-
public static Vector128<float> Insert(Vector128<float> value, float data, byte index) => Insert(value, data, index);
281+
public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, byte index) => Insert(value, data, index);
287282

288283
/// <summary>
289284
/// __m128i _mm_max_epi8 (__m128i a, __m128i b)

0 commit comments

Comments
 (0)