Skip to content

Commit

Permalink
[AVX512] Implement masked and 512-bit pshufd intrinsics directly with…
Browse files Browse the repository at this point in the history
… __builtin_shufflevector and __builtin_ia32_select.

llvm-svn: 272467
  • Loading branch information
topperc committed Jun 11, 2016
1 parent 26d5b87 commit 7cc9263
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 36 deletions.
3 changes: 0 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -2177,9 +2177,6 @@ TARGET_BUILTIN(__builtin_ia32_movshdup128_mask, "V4fV4fV4fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_movshdup256_mask, "V8fV8fV8fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_movsldup128_mask, "V4fV4fV4fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_movsldup256_mask, "V8fV8fV8fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pshufd512_mask, "V16iV16iIiV16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pshufd256_mask, "V8iV8iIiV8iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pshufd128_mask, "V4iV4iIiV4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f")
Expand Down
33 changes: 24 additions & 9 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -9052,19 +9052,34 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
}

#define _mm512_shuffle_epi32(A, I) __extension__ ({ \
(__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \
(__v16si)_mm512_undefined_epi32(), \
(__mmask16)-1); })
(__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
(__v16si)_mm512_setzero_si512(), \
0 + (((I) & 0x03) >> 0), \
0 + (((I) & 0x0c) >> 2), \
0 + (((I) & 0x30) >> 4), \
0 + (((I) & 0xc0) >> 6), \
4 + (((I) & 0x03) >> 0), \
4 + (((I) & 0x0c) >> 2), \
4 + (((I) & 0x30) >> 4), \
4 + (((I) & 0xc0) >> 6), \
8 + (((I) & 0x03) >> 0), \
8 + (((I) & 0x0c) >> 2), \
8 + (((I) & 0x30) >> 4), \
8 + (((I) & 0xc0) >> 6), \
12 + (((I) & 0x03) >> 0), \
12 + (((I) & 0x0c) >> 2), \
12 + (((I) & 0x30) >> 4), \
12 + (((I) & 0xc0) >> 6)); })

#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
(__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \
(__v16si)(__m512i)(W), \
(__mmask16)(U)); })
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_shuffle_epi32((A), (I)), \
(__v16si)(__m512i)(W)); })

#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
(__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \
(__v16si)_mm512_setzero_si512(), \
(__mmask16)(U)); })
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_shuffle_epi32((A), (I)), \
(__v16si)_mm512_setzero_si512()); })

static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
Expand Down
25 changes: 12 additions & 13 deletions clang/lib/Headers/avx512vlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -9257,25 +9257,24 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
}

#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
(__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \
(__v8si)(__m256i)(W), \
(__mmask8)(U)); })
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_shuffle_epi32((A), (I)), \
(__v8si)(__m256i)(W)); })

#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
(__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \
(__v8si)_mm256_setzero_si256(), \
(__mmask8)(U)); })
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_shuffle_epi32((A), (I)), \
(__v8si)_mm256_setzero_si256()); })

#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
(__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \
(__v4si)(__m128i)(W), \
(__mmask8)(U)); })
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_shuffle_epi32((A), (I)), \
(__v4si)(__m128i)(W)); })

#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
(__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \
(__v4si)_mm_setzero_si128(), \
(__mmask8)(U)); })

(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_shuffle_epi32((A), (I)), \
(__v4si)_mm_setzero_si128()); })

static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1566,9 +1566,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_fpclasspd512_mask:
case X86::BI__builtin_ia32_fpclasssd_mask:
case X86::BI__builtin_ia32_fpclassss_mask:
case X86::BI__builtin_ia32_pshufd512_mask:
case X86::BI__builtin_ia32_pshufd256_mask:
case X86::BI__builtin_ia32_pshufd128_mask:
i = 1; l = 0; u = 255;
break;
case X86::BI__builtin_ia32_palignr:
Expand Down
10 changes: 6 additions & 4 deletions clang/test/CodeGen/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -5956,19 +5956,21 @@ __m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {

__m512i test_mm512_shuffle_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_shuffle_epi32
// CHECK: @llvm.x86.avx512.mask.pshuf.d.512
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
return _mm512_shuffle_epi32(__A, 1);
}

__m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_shuffle_epi32
// CHECK: @llvm.x86.avx512.mask.pshuf.d.512
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_shuffle_epi32(__W, __U, __A, 1);
}

__m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_shuffle_epi32
// CHECK: @llvm.x86.avx512.mask.pshuf.d.512
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_shuffle_epi32(__U, __A, 1);
}

Expand Down Expand Up @@ -7433,4 +7435,4 @@ __m512d test_mm512_setzero_pd()
// CHECK-LABEL: @test_mm512_setzero_pd
// CHECK: zeroinitializer
return _mm512_setzero_pd();
}
}
12 changes: 8 additions & 4 deletions clang/test/CodeGen/avx512vl-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -6593,25 +6593,29 @@ __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {

__m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_mask_shuffle_epi32
// CHECK: @llvm.x86.avx512.mask.pshuf.d.128
// CHECK: shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_shuffle_epi32(__W, __U, __A, 1);
}

__m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_maskz_shuffle_epi32
// CHECK: @llvm.x86.avx512.mask.pshuf.d.128
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_shuffle_epi32(__U, __A, 2);
}

__m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_shuffle_epi32
// CHECK: @llvm.x86.avx512.mask.pshuf.d.256
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_shuffle_epi32(__W, __U, __A, 2);
}

__m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_shuffle_epi32
// CHECK: @llvm.x86.avx512.mask.pshuf.d.256
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_shuffle_epi32(__U, __A, 2);
}

Expand Down

0 comments on commit 7cc9263

Please sign in to comment.