-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Headers][X86] Enable constexpr handling for pmulhw/pmulhuw avx512 mask/maskz intrinsics #154341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…sk/maskz intrinsics Followup to llvm#152524 / llvm#152540 - allow the predicated variants to be used in constexpr as well
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesFollowup to #152524 / #152540 - allow the predicated variants to be used in constexpr as well Patch is 20.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154341.diff 4 Files Affected:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 118e1cbc070e1..9263f7af3ee2f 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1083,48 +1083,39 @@ _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mulhi_epi16(__m512i __A, __m512i __B)
-{
- return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
+_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_pmulhw512((__v32hi)__A, (__v32hi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
- __m512i __B)
-{
- return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
- (__v32hi)_mm512_mulhi_epi16(__A, __B),
- (__v32hi)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), (__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
-{
- return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
- (__v32hi)_mm512_mulhi_epi16(__A, __B),
- (__v32hi)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B),
+ (__v32hi)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mulhi_epu16(__m512i __A, __m512i __B)
-{
- return (__m512i)__builtin_ia32_pmulhuw512((__v32hu) __A, (__v32hu) __B);
+_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_pmulhuw512((__v32hu)__A, (__v32hu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
-{
- return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
- (__v32hi)_mm512_mulhi_epu16(__A, __B),
- (__v32hi)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), (__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
- return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
- (__v32hi)_mm512_mulhi_epu16(__A, __B),
- (__v32hi)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B),
+ (__v32hi)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 9aedba0669991..2e2052ad1b682 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -24,6 +24,14 @@
__target__("avx512vl,avx512bw,no-evex512"), \
__min_vector_width__(256)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
/* Integer compare */
#define _mm_cmp_epi8_mask(a, b, p) \
@@ -1592,67 +1600,62 @@ _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
- return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
- (__v8hi)_mm_mulhi_epu16(__A, __B),
- (__v8hi)__W);
+ return (__m128i)__builtin_ia32_selectw_128(
+ (__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mulhi_epu16(__A, __B),
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
- return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm256_mulhi_epu16(__A, __B),
- (__v16hi)__W);
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
- return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm256_mulhi_epu16(__A, __B),
- (__v16hi)_mm256_setzero_si256());
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B),
+ (__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
- return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
- (__v8hi)_mm_mulhi_epi16(__A, __B),
- (__v8hi)__W);
+ return (__m128i)__builtin_ia32_selectw_128(
+ (__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mulhi_epi16(__A, __B),
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
- return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm256_mulhi_epi16(__A, __B),
- (__v16hi)__W);
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
- return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm256_mulhi_epi16(__A, __B),
- (__v16hi)_mm256_setzero_si256());
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B),
+ (__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
- return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
- (__v16qi)_mm_unpackhi_epi8(__A, __B),
- (__v16qi)__W);
+ return (__m128i)__builtin_ia32_selectb_128(
+ (__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3163,5 +3166,7 @@ _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
#endif /* __AVX512VLBWINTRIN_H */
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 0bd9718974c7d..02cedc3c73fb7 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1332,7 +1332,7 @@ __m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
return _mm512_mulhrs_epi16(__A,__B);
}
-__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_mulhrs_epi16
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
@@ -1351,12 +1351,13 @@ __m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
}
TEST_CONSTEXPR(match_v32hi(_mm512_mulhi_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1));
-__m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+__m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_mulhi_epi16
// CHECK: @llvm.x86.avx512.pmulh.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_mulhi_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_mulhi_epi16(_mm512_set1_epi16(1), 0xF00FF00F, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0, 0, -1, -1, 0, 0, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1));
__m512i test_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_mulhi_epi16
@@ -1364,6 +1365,7 @@ __m512i test_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_mulhi_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mulhi_epi16(0x0FF00FF0, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, 0, 0, 0, -1, 0, 0, -1, -1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, -1, -1, 0, 0, -1, 0, 0, 0, 0));
__m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mulhi_epu16
@@ -1372,12 +1374,13 @@ __m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
}
TEST_CONSTEXPR(match_v32hi(_mm512_mulhi_epu16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, -64, 0, 57, 4, -60, 0, 49, 8, -56, 0, 41, 12, -52, 0, 33, 16, -48, 0, 25, 20, -44, 0, 17, 24, -40, 0, 9, 28, 5, 30, 1));
-__m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+__m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_mulhi_epu16
// CHECK: @llvm.x86.avx512.pmulhu.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_mulhi_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_mulhi_epu16(_mm512_set1_epi16(1), 0x0FF00FF0, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 1, 1, 1, 1, 4, -60, 0, 49, 8, -56, 0, 41, 1, 1, 1, 1, 1, 1, 1, 1, 20, -44, 0, 17, 24, -40, 0, 9, 1, 1, 1, 1));
__m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_mulhi_epu16
@@ -1385,13 +1388,14 @@ __m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_mulhi_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mulhi_epu16(0xF00FF00F, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, -64, 0, 57, 0, 0, 0, 0, 0, 0, 0, 0, 12, -52, 0, 33, 16, -48, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 28, 5, 30, 1));
__m512i test_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_maddubs_epi16
// CHECK: @llvm.x86.avx512.pmaddubs.w.512
return _mm512_maddubs_epi16(__X,__Y);
}
-__m512i test_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) {
+__m512i test_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_maddubs_epi16
// CHECK: @llvm.x86.avx512.pmaddubs.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index e792c15ba5435..d8a2d1edf8afb 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -6,6 +6,7 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__mmask32 test_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) {
// CHECK-LABEL: test_mm256_cmpeq_epi8_mask
@@ -1862,6 +1863,7 @@ __m128i test_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_mulhi_epu16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_mulhi_epu16(_mm_set1_epi16(1), 0x3C, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 1, 1, 0, 9, 4, 5, 1, 1));
__m128i test_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_mulhi_epu16
@@ -1869,6 +1871,7 @@ __m128i test_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_mulhi_epu16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_mulhi_epu16(0x87, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 0, -16, 0, 0, 0, 0, 0, 1));
__m256i test_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_mulhi_epu16
@@ -1876,6 +1879,7 @@ __m256i test_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_mulhi_epu16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_mulhi_epu16(_mm256_set1_epi16(1), 0xF00F, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, -32, 0, 25, 1, 1, 1, 1, 1, 1, 1, 1, 12, 5, 14, 1));
__m256i test_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_mulhi_epu16
@@ -1883,6 +1887,7 @@ __m256i test_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_mulhi_epu16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mulhi_epu16(0x0FF0, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, 0, 0, 0, 4, -28, 0, 17, 8, -24, 0, 9, 0, 0, 0, 0));
__m128i test_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_mulhi_epi16
@@ -1890,6 +1895,7 @@ __m128i test_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_mulhi_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_mulhi_epu16(_mm_set1_epi16(1), 0x78, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 1, 1, 1, 9, 4, 5, 6, 1));
__m128i test_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_mulhi_epi16
@@ -1897,6 +1903,7 @@ __m128i test_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_mulhi_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_mulhi_epi16(0xC3, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, 0, 0, 0, -1, -1));
__m256i test_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_mulhi_epi16
@@ -1904,6 +1911,7 @@ __m256i test_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_mulhi_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_mulhi_epi16(_mm256_set1_epi16(1), 0x0FF0, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 1, 1, 1, 1, -1, 0, 0, -1, -1, 0, 0, -1, 1, 1, 1, 1));
__m256i test_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_mulhi_epi16
@@ -1911,6 +1919,7 @@ __m2...
[truncated]
|
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/46/builds/21949 Here is the relevant piece of the build log for the reference |
Followup to #152524 / #152540 - allow the predicated variants to be used in constexpr as well