Skip to content

Commit ca827d5

Browse files
authored
[X86] Convert logicalshift(x, C) -> and(x, M) iff x is allsignbits (#83596)
If we're logical shifting an all-signbits value, then we can just mask out the shifted bits. This helps removes some unnecessary bitcasted vXi16 shifts used for vXi8 shifts (which SimplifyDemandedBits will struggle to remove through the bitcast), and allows some AVX1 shifts of 256-bit values to stay as a YMM instruction. Noticed in codegen from #82290
1 parent 10f5e98 commit ca827d5

File tree

4 files changed

+52
-92
lines changed

4 files changed

+52
-92
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28981,6 +28981,7 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
2898128981
SDValue R = Op.getOperand(0);
2898228982
SDValue Amt = Op.getOperand(1);
2898328983
unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false);
28984+
unsigned EltSizeInBits = VT.getScalarSizeInBits();
2898428985

2898528986
auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
2898628987
assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
@@ -29027,7 +29028,7 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
2902729028
return SDValue();
2902829029

2902929030
// If the shift amount is out of range, return undef.
29030-
if (APIntShiftAmt.uge(VT.getScalarSizeInBits()))
29031+
if (APIntShiftAmt.uge(EltSizeInBits))
2903129032
return DAG.getUNDEF(VT);
2903229033

2903329034
uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
@@ -29055,6 +29056,15 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
2905529056
Op.getOpcode() == ISD::SRA)
2905629057
return ArithmeticShiftRight64(ShiftAmt);
2905729058

29059+
// If we're logical shifting an all-signbits value then we can just perform as
29060+
// a mask.
29061+
if ((Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL) &&
29062+
DAG.ComputeNumSignBits(R) == EltSizeInBits) {
29063+
SDValue Mask = DAG.getAllOnesConstant(dl, VT);
29064+
Mask = DAG.getNode(Op.getOpcode(), dl, VT, Mask, Amt);
29065+
return DAG.getNode(ISD::AND, dl, VT, R, Mask);
29066+
}
29067+
2905829068
if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) ||
2905929069
(Subtarget.hasBWI() && VT == MVT::v64i8)) {
2906029070
unsigned NumElts = VT.getVectorNumElements();

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

Lines changed: 37 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
180180
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
181181
; SSE2-NEXT: pand %xmm1, %xmm0
182182
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
183-
; SSE2-NEXT: psrlw $7, %xmm0
184183
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
185184
; SSE2-NEXT: retq
186185
;
@@ -191,7 +190,6 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
191190
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
192191
; SSSE3-NEXT: pand %xmm1, %xmm0
193192
; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
194-
; SSSE3-NEXT: psrlw $7, %xmm0
195193
; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
196194
; SSSE3-NEXT: retq
197195
;
@@ -203,7 +201,6 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
203201
; AVX1-NEXT: # xmm1 = mem[0,0]
204202
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
205203
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
206-
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
207204
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
208205
; AVX1-NEXT: retq
209206
;
@@ -214,7 +211,6 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
214211
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
215212
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
216213
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
217-
; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
218214
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
219215
; AVX2-NEXT: retq
220216
;
@@ -268,11 +264,10 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
268264
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8]
269265
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
270266
; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1
271-
; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
272267
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
273268
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
274-
; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
275269
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
270+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
276271
; AVX1-NEXT: retq
277272
;
278273
; AVX2-LABEL: ext_i4_4i64:
@@ -328,11 +323,10 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
328323
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
329324
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
330325
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
331-
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
332326
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
333327
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
334-
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
335328
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
329+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
336330
; AVX1-NEXT: retq
337331
;
338332
; AVX2-LABEL: ext_i8_8i32:
@@ -390,11 +384,10 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
390384
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
391385
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
392386
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
393-
; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
394387
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
395388
; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
396-
; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
397389
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
390+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
398391
; AVX1-NEXT: retq
399392
;
400393
; AVX2-LABEL: ext_i16_16i16:
@@ -436,14 +429,12 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
436429
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
437430
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
438431
; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
439-
; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
440432
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
441433
; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
442434
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
443435
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
444436
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
445437
; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
446-
; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
447438
; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
448439
; SSE2-SSSE3-NEXT: retq
449440
;
@@ -460,13 +451,9 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
460451
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
461452
; AVX1-NEXT: # xmm2 = mem[0,0]
462453
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
463-
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
464-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
465-
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
466454
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
467-
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
468-
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
469455
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
456+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
470457
; AVX1-NEXT: retq
471458
;
472459
; AVX2-LABEL: ext_i32_32i8:
@@ -477,7 +464,6 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
477464
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
478465
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
479466
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
480-
; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
481467
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
482468
; AVX2-NEXT: retq
483469
;
@@ -550,19 +536,18 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
550536
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
551537
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
552538
; AVX1-NEXT: vpcmpeqq %xmm0, %xmm2, %xmm0
553-
; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
554539
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
555540
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
556-
; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
557541
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
558-
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [16,32,64,128]
559-
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
560-
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm2
561-
; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
542+
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,1,1,1]
543+
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
544+
; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [16,32,64,128]
545+
; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
546+
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm3
562547
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
563548
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
564-
; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
565-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
549+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
550+
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
566551
; AVX1-NEXT: retq
567552
;
568553
; AVX2-LABEL: ext_i8_8i64:
@@ -631,19 +616,18 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
631616
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
632617
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
633618
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
634-
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
635619
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
636620
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
637-
; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
638621
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
639-
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
640-
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
641-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
642-
; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
622+
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
623+
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
624+
; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [256,512,1024,2048,4096,8192,16384,32768]
625+
; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
626+
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3
643627
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
644628
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
645-
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
646-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
629+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
630+
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
647631
; AVX1-NEXT: retq
648632
;
649633
; AVX2-LABEL: ext_i16_16i32:
@@ -712,23 +696,22 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
712696
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
713697
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
714698
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
715-
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm3
716-
; AVX1-NEXT: vpsrlw $15, %xmm3, %xmm3
717-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
699+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
718700
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
719-
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
720-
; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
721-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
701+
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
702+
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
703+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
704+
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
705+
; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
722706
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
723707
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
724708
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
725709
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
726-
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
727-
; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
728-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
729-
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
730-
; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
731-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
710+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
711+
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4
712+
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
713+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
714+
; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
732715
; AVX1-NEXT: retq
733716
;
734717
; AVX2-LABEL: ext_i32_32i16:
@@ -782,26 +765,22 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
782765
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
783766
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
784767
; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
785-
; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
786768
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
787769
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
788770
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
789771
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
790772
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
791773
; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
792-
; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
793774
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
794775
; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
795776
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
796777
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
797778
; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
798-
; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
799779
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
800780
; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
801781
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
802782
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
803783
; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
804-
; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
805784
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
806785
; SSE2-SSSE3-NEXT: retq
807786
;
@@ -817,26 +796,20 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
817796
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
818797
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
819798
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
820-
; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
821-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
822-
; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
823799
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
824-
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
825-
; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
826800
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
827-
; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
801+
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
802+
; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
803+
; AVX1-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
828804
; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
829-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
805+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm1
830806
; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
831807
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
832-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
833-
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
834-
; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
835-
; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
808+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
809+
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm4
836810
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
837-
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
838-
; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
839-
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
811+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
812+
; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
840813
; AVX1-NEXT: retq
841814
;
842815
; AVX2-LABEL: ext_i64_64i8:
@@ -847,13 +820,11 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
847820
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
848821
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
849822
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
850-
; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
851823
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
852824
; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
853825
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
854826
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
855827
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
856-
; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
857828
; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
858829
; AVX2-NEXT: retq
859830
;

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
185185
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
186186
; SSE2-NEXT: pand %xmm1, %xmm0
187187
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
188-
; SSE2-NEXT: psrlw $7, %xmm0
189188
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
190189
; SSE2-NEXT: retq
191190
;
@@ -196,7 +195,6 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
196195
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
197196
; SSSE3-NEXT: pand %xmm1, %xmm0
198197
; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
199-
; SSSE3-NEXT: psrlw $7, %xmm0
200198
; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
201199
; SSSE3-NEXT: retq
202200
;
@@ -208,7 +206,6 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
208206
; AVX1-NEXT: # xmm1 = mem[0,0]
209207
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
210208
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
211-
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
212209
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
213210
; AVX1-NEXT: retq
214211
;
@@ -219,7 +216,6 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
219216
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
220217
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
221218
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
222-
; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
223219
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
224220
; AVX2-NEXT: retq
225221
;
@@ -252,13 +248,9 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
252248
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
253249
; AVX1-NEXT: # xmm2 = mem[0,0]
254250
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
255-
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
256-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
257-
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
258251
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
259-
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
260-
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
261252
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
253+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
262254
; AVX1-NEXT: retq
263255
;
264256
; AVX2-LABEL: bitcast_i32_32i1:
@@ -269,7 +261,6 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
269261
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
270262
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
271263
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
272-
; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
273264
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
274265
; AVX2-NEXT: retq
275266
;

0 commit comments

Comments
 (0)