Skip to content

Commit a4cf479

Browse files
committed
[X86] shuffle-vs-trunc-128.ll - add BWVL-ONLY/VBMI/VBMI-FAST/VBMI-SLOW check prefixes to recover missing test checks
It is VERY annoying that update_llc_test_checks.py silently fails instead of correctly warning when this happens :(
1 parent 6528f10 commit a4cf479

File tree

1 file changed

+153
-4
lines changed

1 file changed

+153
-4
lines changed

llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll

Lines changed: 153 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
1111
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
1212
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
13-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
14-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
15-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
16-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
13+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512BWVL-ONLY
14+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512BWVL-ONLY
15+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512VBMI,AVX512VBMI-FAST
16+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512VBMI,AVX512VBMI-SLOW
1717

1818
; PR31551
1919
; Pairs of shufflevector:trunc functions with functional equivalence.
@@ -870,6 +870,29 @@ define <16 x i8> @oddelts_v32i16_shuffle_v16i16_to_v16i8(<32 x i16> %n2) nounwin
870870
; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
871871
; AVX512BW-NEXT: vzeroupper
872872
; AVX512BW-NEXT: retq
873+
;
874+
; AVX512BWVL-ONLY-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8:
875+
; AVX512BWVL-ONLY: # %bb.0:
876+
; AVX512BWVL-ONLY-NEXT: vextracti64x4 $1, %zmm0, %ymm1
877+
; AVX512BWVL-ONLY-NEXT: vextracti128 $1, %ymm1, %xmm2
878+
; AVX512BWVL-ONLY-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14]
879+
; AVX512BWVL-ONLY-NEXT: vpshufb %xmm3, %xmm2, %xmm2
880+
; AVX512BWVL-ONLY-NEXT: vpshufb %xmm3, %xmm1, %xmm1
881+
; AVX512BWVL-ONLY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
882+
; AVX512BWVL-ONLY-NEXT: vpsrld $16, %ymm0, %ymm0
883+
; AVX512BWVL-ONLY-NEXT: vpmovdb %ymm0, %xmm0
884+
; AVX512BWVL-ONLY-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
885+
; AVX512BWVL-ONLY-NEXT: vzeroupper
886+
; AVX512BWVL-ONLY-NEXT: retq
887+
;
888+
; AVX512VBMI-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8:
889+
; AVX512VBMI: # %bb.0:
890+
; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [2,6,10,14,18,22,26,30,34,38,42,46,50,54,58,62]
891+
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm2
892+
; AVX512VBMI-NEXT: vpermt2b %ymm2, %ymm1, %ymm0
893+
; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
894+
; AVX512VBMI-NEXT: vzeroupper
895+
; AVX512VBMI-NEXT: retq
873896
%n0 = bitcast <32 x i16> %n2 to <64 x i8>
874897
%p = shufflevector <64 x i8> %n0, <64 x i8> poison, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
875898
ret <16 x i8> %p
@@ -1182,6 +1205,71 @@ define <16 x i8> @evenelts_v32i16_trunc_v16i16_to_v16i8(<32 x i16> %n2) nounwind
11821205
; AVX512BW-NEXT: popq %rbp
11831206
; AVX512BW-NEXT: vzeroupper
11841207
; AVX512BW-NEXT: retq
1208+
;
1209+
; AVX512BWVL-ONLY-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8:
1210+
; AVX512BWVL-ONLY: # %bb.0:
1211+
; AVX512BWVL-ONLY-NEXT: pushq %rbp
1212+
; AVX512BWVL-ONLY-NEXT: pushq %r14
1213+
; AVX512BWVL-ONLY-NEXT: pushq %rbx
1214+
; AVX512BWVL-ONLY-NEXT: vextracti32x4 $3, %zmm0, %xmm1
1215+
; AVX512BWVL-ONLY-NEXT: vpextrw $6, %xmm1, %eax
1216+
; AVX512BWVL-ONLY-NEXT: vpextrw $4, %xmm1, %ecx
1217+
; AVX512BWVL-ONLY-NEXT: vpextrw $2, %xmm1, %edx
1218+
; AVX512BWVL-ONLY-NEXT: vmovd %xmm1, %esi
1219+
; AVX512BWVL-ONLY-NEXT: vextracti32x4 $2, %zmm0, %xmm1
1220+
; AVX512BWVL-ONLY-NEXT: vpextrw $6, %xmm1, %edi
1221+
; AVX512BWVL-ONLY-NEXT: vpextrw $4, %xmm1, %r8d
1222+
; AVX512BWVL-ONLY-NEXT: vpextrw $2, %xmm1, %r9d
1223+
; AVX512BWVL-ONLY-NEXT: vmovd %xmm1, %r10d
1224+
; AVX512BWVL-ONLY-NEXT: vextracti128 $1, %ymm0, %xmm1
1225+
; AVX512BWVL-ONLY-NEXT: vpextrw $6, %xmm1, %r11d
1226+
; AVX512BWVL-ONLY-NEXT: vpextrw $4, %xmm1, %ebx
1227+
; AVX512BWVL-ONLY-NEXT: vpextrw $2, %xmm1, %ebp
1228+
; AVX512BWVL-ONLY-NEXT: vmovd %xmm1, %r14d
1229+
; AVX512BWVL-ONLY-NEXT: vpmovdb %xmm0, %xmm0
1230+
; AVX512BWVL-ONLY-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
1231+
; AVX512BWVL-ONLY-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0
1232+
; AVX512BWVL-ONLY-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0
1233+
; AVX512BWVL-ONLY-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0
1234+
; AVX512BWVL-ONLY-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0
1235+
; AVX512BWVL-ONLY-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0
1236+
; AVX512BWVL-ONLY-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0
1237+
; AVX512BWVL-ONLY-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0
1238+
; AVX512BWVL-ONLY-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
1239+
; AVX512BWVL-ONLY-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
1240+
; AVX512BWVL-ONLY-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1241+
; AVX512BWVL-ONLY-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1242+
; AVX512BWVL-ONLY-NEXT: popq %rbx
1243+
; AVX512BWVL-ONLY-NEXT: popq %r14
1244+
; AVX512BWVL-ONLY-NEXT: popq %rbp
1245+
; AVX512BWVL-ONLY-NEXT: vzeroupper
1246+
; AVX512BWVL-ONLY-NEXT: retq
1247+
;
1248+
; AVX512VBMI-FAST-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8:
1249+
; AVX512VBMI-FAST: # %bb.0:
1250+
; AVX512VBMI-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,79]
1251+
; AVX512VBMI-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
1252+
; AVX512VBMI-FAST-NEXT: vpermi2b %zmm2, %zmm0, %zmm1
1253+
; AVX512VBMI-FAST-NEXT: vextracti32x4 $3, %zmm0, %xmm0
1254+
; AVX512VBMI-FAST-NEXT: vpextrw $6, %xmm0, %eax
1255+
; AVX512VBMI-FAST-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
1256+
; AVX512VBMI-FAST-NEXT: vzeroupper
1257+
; AVX512VBMI-FAST-NEXT: retq
1258+
;
1259+
; AVX512VBMI-SLOW-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8:
1260+
; AVX512VBMI-SLOW: # %bb.0:
1261+
; AVX512VBMI-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4,8,12,16,20,24,28,32,36,40,44,48,77,78,79]
1262+
; AVX512VBMI-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
1263+
; AVX512VBMI-SLOW-NEXT: vpermi2b %zmm2, %zmm0, %zmm1
1264+
; AVX512VBMI-SLOW-NEXT: vextracti32x4 $3, %zmm0, %xmm0
1265+
; AVX512VBMI-SLOW-NEXT: vpextrw $6, %xmm0, %eax
1266+
; AVX512VBMI-SLOW-NEXT: vpextrw $4, %xmm0, %ecx
1267+
; AVX512VBMI-SLOW-NEXT: vpextrw $2, %xmm0, %edx
1268+
; AVX512VBMI-SLOW-NEXT: vpinsrb $13, %edx, %xmm1, %xmm0
1269+
; AVX512VBMI-SLOW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1270+
; AVX512VBMI-SLOW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1271+
; AVX512VBMI-SLOW-NEXT: vzeroupper
1272+
; AVX512VBMI-SLOW-NEXT: retq
11851273
%n0 = shufflevector <32 x i16> %n2, <32 x i16> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
11861274
%n1 = trunc <16 x i16> %n0 to <16 x i8>
11871275
ret <16 x i8> %n1
@@ -1504,6 +1592,67 @@ define <16 x i8> @oddelts_v32i16_trunc_v16i16_to_v16i8(<32 x i16> %n2) nounwind
15041592
; AVX512BW-NEXT: popq %rbp
15051593
; AVX512BW-NEXT: vzeroupper
15061594
; AVX512BW-NEXT: retq
1595+
;
1596+
; AVX512BWVL-ONLY-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8:
1597+
; AVX512BWVL-ONLY: # %bb.0:
1598+
; AVX512BWVL-ONLY-NEXT: pushq %rbp
1599+
; AVX512BWVL-ONLY-NEXT: pushq %r14
1600+
; AVX512BWVL-ONLY-NEXT: pushq %rbx
1601+
; AVX512BWVL-ONLY-NEXT: vextracti32x4 $3, %zmm0, %xmm1
1602+
; AVX512BWVL-ONLY-NEXT: vpextrw $7, %xmm1, %eax
1603+
; AVX512BWVL-ONLY-NEXT: vpextrw $5, %xmm1, %ecx
1604+
; AVX512BWVL-ONLY-NEXT: vpextrw $3, %xmm1, %edx
1605+
; AVX512BWVL-ONLY-NEXT: vpextrw $1, %xmm1, %esi
1606+
; AVX512BWVL-ONLY-NEXT: vextracti32x4 $2, %zmm0, %xmm1
1607+
; AVX512BWVL-ONLY-NEXT: vpextrw $7, %xmm1, %edi
1608+
; AVX512BWVL-ONLY-NEXT: vpextrw $5, %xmm1, %r8d
1609+
; AVX512BWVL-ONLY-NEXT: vpextrw $3, %xmm1, %r9d
1610+
; AVX512BWVL-ONLY-NEXT: vpextrw $1, %xmm1, %r10d
1611+
; AVX512BWVL-ONLY-NEXT: vextracti128 $1, %ymm0, %xmm1
1612+
; AVX512BWVL-ONLY-NEXT: vpextrw $7, %xmm1, %r11d
1613+
; AVX512BWVL-ONLY-NEXT: vpextrw $5, %xmm1, %ebx
1614+
; AVX512BWVL-ONLY-NEXT: vpextrw $3, %xmm1, %ebp
1615+
; AVX512BWVL-ONLY-NEXT: vpextrw $1, %xmm1, %r14d
1616+
; AVX512BWVL-ONLY-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u]
1617+
; AVX512BWVL-ONLY-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
1618+
; AVX512BWVL-ONLY-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0
1619+
; AVX512BWVL-ONLY-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0
1620+
; AVX512BWVL-ONLY-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0
1621+
; AVX512BWVL-ONLY-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0
1622+
; AVX512BWVL-ONLY-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0
1623+
; AVX512BWVL-ONLY-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0
1624+
; AVX512BWVL-ONLY-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0
1625+
; AVX512BWVL-ONLY-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
1626+
; AVX512BWVL-ONLY-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
1627+
; AVX512BWVL-ONLY-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1628+
; AVX512BWVL-ONLY-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1629+
; AVX512BWVL-ONLY-NEXT: popq %rbx
1630+
; AVX512BWVL-ONLY-NEXT: popq %r14
1631+
; AVX512BWVL-ONLY-NEXT: popq %rbp
1632+
; AVX512BWVL-ONLY-NEXT: vzeroupper
1633+
; AVX512BWVL-ONLY-NEXT: retq
1634+
;
1635+
; AVX512VBMI-FAST-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8:
1636+
; AVX512VBMI-FAST: # %bb.0:
1637+
; AVX512VBMI-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [2,6,10,14,18,22,26,30,34,38,42,46,50,54,58,62]
1638+
; AVX512VBMI-FAST-NEXT: vpermb %zmm0, %zmm1, %zmm0
1639+
; AVX512VBMI-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1640+
; AVX512VBMI-FAST-NEXT: vzeroupper
1641+
; AVX512VBMI-FAST-NEXT: retq
1642+
;
1643+
; AVX512VBMI-SLOW-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8:
1644+
; AVX512VBMI-SLOW: # %bb.0:
1645+
; AVX512VBMI-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [2,6,10,14,18,22,26,30,34,38,42,46,50,u,u,u]
1646+
; AVX512VBMI-SLOW-NEXT: vpermb %zmm0, %zmm1, %zmm1
1647+
; AVX512VBMI-SLOW-NEXT: vextracti32x4 $3, %zmm0, %xmm0
1648+
; AVX512VBMI-SLOW-NEXT: vpextrw $7, %xmm0, %eax
1649+
; AVX512VBMI-SLOW-NEXT: vpextrw $5, %xmm0, %ecx
1650+
; AVX512VBMI-SLOW-NEXT: vpextrw $3, %xmm0, %edx
1651+
; AVX512VBMI-SLOW-NEXT: vpinsrb $13, %edx, %xmm1, %xmm0
1652+
; AVX512VBMI-SLOW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1653+
; AVX512VBMI-SLOW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1654+
; AVX512VBMI-SLOW-NEXT: vzeroupper
1655+
; AVX512VBMI-SLOW-NEXT: retq
15071656
%n0 = shufflevector <32 x i16> %n2, <32 x i16> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
15081657
%n1 = trunc <16 x i16> %n0 to <16 x i8>
15091658
ret <16 x i8> %n1

0 commit comments

Comments
 (0)