|
| 1 | +/**************************************************************************** |
| 2 | +** |
| 3 | +** Copyright (C) 2017 Intel Corporation. |
| 4 | +** Contact: https://www.qt.io/licensing/ |
| 5 | +** |
| 6 | +** This file is part of the QtCore module of the Qt Toolkit. |
| 7 | +** |
| 8 | +** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | +** Commercial License Usage |
| 10 | +** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | +** accordance with the commercial license agreement provided with the |
| 12 | +** Software or, alternatively, in accordance with the terms contained in |
| 13 | +** a written agreement between you and The Qt Company. For licensing terms |
| 14 | +** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | +** information use the contact form at https://www.qt.io/contact-us. |
| 16 | +** |
| 17 | +** GNU Lesser General Public License Usage |
| 18 | +** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | +** General Public License version 3 as published by the Free Software |
| 20 | +** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | +** packaging of this file. Please review the following information to |
| 22 | +** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | +** |
| 25 | +** GNU General Public License Usage |
| 26 | +** Alternatively, this file may be used under the terms of the GNU |
| 27 | +** General Public License version 2.0 or (at your option) the GNU General |
| 28 | +** Public license version 3 or any later version approved by the KDE Free |
| 29 | +** Qt Foundation. The licenses are as published by the Free Software |
| 30 | +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | +** included in the packaging of this file. Please review the following |
| 32 | +** information to ensure the GNU General Public License requirements will |
| 33 | +** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | +** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | +** |
| 36 | +** $QT_END_LICENSE$ |
| 37 | +** |
| 38 | +****************************************************************************/ |
| 39 | + |
| 40 | +// All of our supported compilers support <immintrin.h> |
| 41 | +#include <immintrin.h> |
| 42 | +#define T(x) (QT_COMPILER_SUPPORTS_ ## x) |
| 43 | + |
| 44 | +#if !defined(__INTEL_COMPILER) && !defined(_MSC_VER) |
| 45 | +/* GCC requires attributes for a function */ |
| 46 | +# define attribute_target(x) __attribute__((__target__(x))) |
| 47 | +#else |
| 48 | +# define attribute_target(x) |
| 49 | +#endif |
| 50 | + |
| 51 | +#if T(SSE2) |
| 52 | +attribute_target("sse2") void test_sse2() |
| 53 | +{ |
| 54 | + __m128i a = _mm_setzero_si128(); |
| 55 | + _mm_maskmoveu_si128(a, _mm_setzero_si128(), 0); |
| 56 | +} |
| 57 | +#endif |
| 58 | + |
| 59 | +#if T(SSE3) |
| 60 | +attribute_target("sse3") void test_sse3() |
| 61 | +{ |
| 62 | + __m128d a = _mm_set1_pd(6.28); |
| 63 | + __m128d b = _mm_set1_pd(3.14); |
| 64 | + __m128d result = _mm_addsub_pd(a, b); |
| 65 | + (void) _mm_movedup_pd(result); |
| 66 | +} |
| 67 | +#endif |
| 68 | + |
| 69 | +#if T(SSSE3) |
| 70 | +attribute_target("ssse3") void test_ssse3() |
| 71 | +{ |
| 72 | + __m128i a = _mm_set1_epi32(42); |
| 73 | + _mm_abs_epi8(a); |
| 74 | + (void) _mm_sign_epi16(a, _mm_set1_epi32(64)); |
| 75 | +} |
| 76 | +#endif |
| 77 | + |
| 78 | +#if T(SSE4_1) |
| 79 | +attribute_target("sse4.1") void test_sse4_1() |
| 80 | +{ |
| 81 | + __m128 a = _mm_setzero_ps(); |
| 82 | + _mm_ceil_ps(a); |
| 83 | + __m128i result = _mm_mullo_epi32(_mm_set1_epi32(42), _mm_set1_epi32(64)); |
| 84 | + (void)result; |
| 85 | +} |
| 86 | +#endif |
| 87 | + |
| 88 | +#if T(SSE4_2) |
| 89 | +attribute_target("sse4.2") void test_sse4_2() |
| 90 | +{ |
| 91 | + __m128i a = _mm_setzero_si128(); |
| 92 | + __m128i b = _mm_set1_epi32(42); |
| 93 | + (void) _mm_cmpestrm(a, 16, b, 16, 0); |
| 94 | +} |
| 95 | +#endif |
| 96 | + |
| 97 | +#if T(AESNI) |
| 98 | +attribute_target("aes,sse4.2") void test_aesni() |
| 99 | +{ |
| 100 | + __m128i a = _mm_setzero_si128(); |
| 101 | + __m128i b = _mm_aesenc_si128(a, a); |
| 102 | + __m128i c = _mm_aesdec_si128(a, b); |
| 103 | + (void)c; |
| 104 | +} |
| 105 | +#endif |
| 106 | + |
| 107 | +#if T(F16C) |
| 108 | +attribute_target("f16c") void test_f16c() |
| 109 | +{ |
| 110 | + __m128i a = _mm_setzero_si128(); |
| 111 | + __m128 b = _mm_cvtph_ps(a); |
| 112 | + __m256 b256 = _mm256_cvtph_ps(a); |
| 113 | + (void) _mm_cvtps_ph(b, 0); |
| 114 | + (void) _mm256_cvtps_ph(b256, 0); |
| 115 | +} |
| 116 | +#endif |
| 117 | + |
| 118 | +#if T(RDRND) |
| 119 | +attribute_target("rdrnd") int test_rdrnd() |
| 120 | +{ |
| 121 | + unsigned short us; |
| 122 | + unsigned int ui; |
| 123 | + if (_rdrand16_step(&us)) |
| 124 | + return 1; |
| 125 | + if (_rdrand32_step(&ui)) |
| 126 | + return 1; |
| 127 | +# if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) |
| 128 | + unsigned long long ull; |
| 129 | + if (_rdrand64_step(&ull)) |
| 130 | + return 1; |
| 131 | +# endif |
| 132 | +} |
| 133 | +#endif |
| 134 | + |
| 135 | +#if T(SHANI) |
| 136 | +attribute_target("sha") void test_shani() |
| 137 | +{ |
| 138 | + __m128i a = _mm_setzero_si128(); |
| 139 | + __m128i b = _mm_sha1rnds4_epu32(a, a, 0); |
| 140 | + __m128i c = _mm_sha1msg1_epu32(a, b); |
| 141 | + __m128i d = _mm_sha256msg2_epu32(b, c); |
| 142 | + (void)d; |
| 143 | +} |
| 144 | +#endif |
| 145 | + |
| 146 | +#if T(AVX) |
| 147 | +attribute_target("avx") void test_avx() |
| 148 | +{ |
| 149 | + __m256d a = _mm256_setzero_pd(); |
| 150 | + __m256d b = _mm256_set1_pd(42.42); |
| 151 | + (void) _mm256_add_pd(a, b); |
| 152 | +} |
| 153 | +#endif |
| 154 | + |
| 155 | +#if T(AVX2) |
| 156 | +attribute_target("avx2") void test_avx2() |
| 157 | +{ |
| 158 | + _mm256_zeroall(); |
| 159 | + __m256i a = _mm256_setzero_si256(); |
| 160 | + __m256i b = _mm256_and_si256(a, a); |
| 161 | + (void) _mm256_add_epi8(a, b); |
| 162 | +} |
| 163 | +#endif |
| 164 | + |
| 165 | +#if T(AVX512F) |
| 166 | +attribute_target("avx512f") void test_avx512f(char *ptr) |
| 167 | +{ |
| 168 | + /* AVX512 Foundation */ |
| 169 | + __mmask16 m = ~1; |
| 170 | + __m512i i; |
| 171 | + __m512d d; |
| 172 | + __m512 f; |
| 173 | + i = _mm512_maskz_loadu_epi32(0, ptr); |
| 174 | + d = _mm512_loadu_pd((double *)ptr + 64); |
| 175 | + f = _mm512_loadu_ps((float *)ptr + 128); |
| 176 | + _mm512_mask_storeu_epi64(ptr, m, i); |
| 177 | + _mm512_mask_storeu_ps(ptr + 64, m, f); |
| 178 | + _mm512_mask_storeu_pd(ptr + 128, m, d); |
| 179 | +} |
| 180 | +#endif |
| 181 | + |
| 182 | +#if T(AVX512ER) |
| 183 | +attribute_target("avx512er") void test_avx512er() |
| 184 | +{ |
| 185 | + /* AVX512 Exponential and Reciprocal */ |
| 186 | + __m512 f; |
| 187 | + f = _mm512_exp2a23_round_ps(f, 8); |
| 188 | +} |
| 189 | +#endif |
| 190 | + |
| 191 | +#if T(AVX512CD) |
| 192 | +attribute_target("avx512cd") void test_avx512cd() |
| 193 | +{ |
| 194 | + /* AVX512 Conflict Detection */ |
| 195 | + __mmask16 m = ~1; |
| 196 | + __m512i i; |
| 197 | + i = _mm512_maskz_conflict_epi32(m, i); |
| 198 | +} |
| 199 | +#endif |
| 200 | + |
| 201 | +#if T(AVX512PF) |
| 202 | +attribute_target("avx512pf") void test_avx512pf(void *ptr) |
| 203 | +{ |
| 204 | + /* AVX512 Prefetch */ |
| 205 | + __m512i i; |
| 206 | + __mmask16 m = 0xf; |
| 207 | + _mm512_mask_prefetch_i64scatter_pd(ptr, m, i, 2, 2); |
| 208 | +} |
| 209 | +#endif |
| 210 | + |
| 211 | +#if T(AVX512DQ) |
| 212 | +attribute_target("avx512dq") void test_avx512dq() |
| 213 | +{ |
| 214 | + /* AVX512 Doubleword and Quadword support */ |
| 215 | + __m512i i; |
| 216 | + __mmask16 m = ~1; |
| 217 | + m = _mm512_movepi32_mask(i); |
| 218 | +} |
| 219 | +#endif |
| 220 | + |
| 221 | +#if T(AVX512BW) |
| 222 | +attribute_target("avx512bw") void test_avx512bw(char *ptr) |
| 223 | +{ |
| 224 | + /* AVX512 Byte and Word support */ |
| 225 | + __m512i i; |
| 226 | + __mmask16 m = ~1; |
| 227 | + i = _mm512_mask_loadu_epi8(i, m, ptr - 8); |
| 228 | +} |
| 229 | +#endif |
| 230 | + |
| 231 | +#if T(AVX512VL) |
| 232 | +attribute_target("avx512vl") void test_avx512vl(char *ptr) |
| 233 | +{ |
| 234 | + /* AVX512 Vector Length */ |
| 235 | + __mmask16 m = ~1; |
| 236 | + __m256i i2 = _mm256_maskz_loadu_epi32(0, ptr); |
| 237 | + _mm256_mask_storeu_epi32(ptr + 1, m, i2); |
| 238 | +} |
| 239 | +#endif |
| 240 | + |
| 241 | +#if T(AVX512IFMA) |
| 242 | +attribute_target("avx512ifma") void test_avx512ifma() |
| 243 | +{ |
| 244 | + /* AVX512 Integer Fused Multiply-Add */ |
| 245 | + __m512i i; |
| 246 | + i = _mm512_madd52lo_epu64(i, i, i); |
| 247 | +} |
| 248 | +#endif |
| 249 | + |
| 250 | +int main() |
| 251 | +{ |
| 252 | + return 0; |
| 253 | +} |
0 commit comments