@@ -619,6 +619,162 @@ define <16 x i1> @shl_to_ror_eq_16xi16_s8_fail_preserve_i16(<16 x i16> %x) {
619
619
ret <16 x i1 > %r
620
620
}
621
621
622
+ define <16 x i8 > @shl_s3_cmp_v16i8 (<16 x i8 > %x , <16 x i8 > %y ) {
623
+ ; CHECK-NOBMI-LABEL: shl_s3_cmp_v16i8:
624
+ ; CHECK-NOBMI: # %bb.0:
625
+ ; CHECK-NOBMI-NEXT: pcmpeqb %xmm1, %xmm0
626
+ ; CHECK-NOBMI-NEXT: psllw $3, %xmm0
627
+ ; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
628
+ ; CHECK-NOBMI-NEXT: retq
629
+ ;
630
+ ; CHECK-BMI2-SSE2-LABEL: shl_s3_cmp_v16i8:
631
+ ; CHECK-BMI2-SSE2: # %bb.0:
632
+ ; CHECK-BMI2-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
633
+ ; CHECK-BMI2-SSE2-NEXT: psllw $3, %xmm0
634
+ ; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
635
+ ; CHECK-BMI2-SSE2-NEXT: retq
636
+ ;
637
+ ; CHECK-AVX12-LABEL: shl_s3_cmp_v16i8:
638
+ ; CHECK-AVX12: # %bb.0:
639
+ ; CHECK-AVX12-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
640
+ ; CHECK-AVX12-NEXT: vpsllw $3, %xmm0, %xmm0
641
+ ; CHECK-AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
642
+ ; CHECK-AVX12-NEXT: retq
643
+ ;
644
+ ; CHECK-AVX512-LABEL: shl_s3_cmp_v16i8:
645
+ ; CHECK-AVX512: # %bb.0:
646
+ ; CHECK-AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
647
+ ; CHECK-AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
648
+ ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
649
+ ; CHECK-AVX512-NEXT: retq
650
+ %cmp = icmp eq <16 x i8 > %x , %y
651
+ %ext = sext <16 x i1 > %cmp to <16 x i8 >
652
+ %shr = shl <16 x i8 > %ext , <i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 , i8 3 >
653
+ ret <16 x i8 > %shr
654
+ }
655
+
656
+ define <4 x i64 > @shl_s31_cmp_v4f64 (<4 x double > %x , <4 x double > %y ) {
657
+ ; CHECK-NOBMI-LABEL: shl_s31_cmp_v4f64:
658
+ ; CHECK-NOBMI: # %bb.0:
659
+ ; CHECK-NOBMI-NEXT: cmpeqpd %xmm3, %xmm1
660
+ ; CHECK-NOBMI-NEXT: cmpeqpd %xmm2, %xmm0
661
+ ; CHECK-NOBMI-NEXT: psllq $31, %xmm0
662
+ ; CHECK-NOBMI-NEXT: psllq $31, %xmm1
663
+ ; CHECK-NOBMI-NEXT: retq
664
+ ;
665
+ ; CHECK-BMI2-SSE2-LABEL: shl_s31_cmp_v4f64:
666
+ ; CHECK-BMI2-SSE2: # %bb.0:
667
+ ; CHECK-BMI2-SSE2-NEXT: cmpeqpd %xmm3, %xmm1
668
+ ; CHECK-BMI2-SSE2-NEXT: cmpeqpd %xmm2, %xmm0
669
+ ; CHECK-BMI2-SSE2-NEXT: psllq $31, %xmm0
670
+ ; CHECK-BMI2-SSE2-NEXT: psllq $31, %xmm1
671
+ ; CHECK-BMI2-SSE2-NEXT: retq
672
+ ;
673
+ ; CHECK-AVX1-LABEL: shl_s31_cmp_v4f64:
674
+ ; CHECK-AVX1: # %bb.0:
675
+ ; CHECK-AVX1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
676
+ ; CHECK-AVX1-NEXT: vpsllq $31, %xmm0, %xmm1
677
+ ; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
678
+ ; CHECK-AVX1-NEXT: vpsllq $31, %xmm0, %xmm0
679
+ ; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
680
+ ; CHECK-AVX1-NEXT: retq
681
+ ;
682
+ ; CHECK-AVX2-LABEL: shl_s31_cmp_v4f64:
683
+ ; CHECK-AVX2: # %bb.0:
684
+ ; CHECK-AVX2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
685
+ ; CHECK-AVX2-NEXT: vpsllq $31, %ymm0, %ymm0
686
+ ; CHECK-AVX2-NEXT: retq
687
+ ;
688
+ ; CHECK-AVX512-LABEL: shl_s31_cmp_v4f64:
689
+ ; CHECK-AVX512: # %bb.0:
690
+ ; CHECK-AVX512-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
691
+ ; CHECK-AVX512-NEXT: vpsllq $31, %ymm0, %ymm0
692
+ ; CHECK-AVX512-NEXT: retq
693
+ %cmp = fcmp oeq <4 x double > %x , %y
694
+ %ext = sext <4 x i1 > %cmp to <4 x i64 >
695
+ %shr = shl <4 x i64 > %ext , <i64 31 , i64 31 , i64 31 , i64 31 >
696
+ ret <4 x i64 > %shr
697
+ }
698
+
699
+ define <16 x i8 > @shr_s1_cmp_v16i8 (<16 x i8 > %x , <16 x i8 > %y ) {
700
+ ; CHECK-NOBMI-LABEL: shr_s1_cmp_v16i8:
701
+ ; CHECK-NOBMI: # %bb.0:
702
+ ; CHECK-NOBMI-NEXT: pcmpeqb %xmm1, %xmm0
703
+ ; CHECK-NOBMI-NEXT: psrlw $1, %xmm0
704
+ ; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
705
+ ; CHECK-NOBMI-NEXT: retq
706
+ ;
707
+ ; CHECK-BMI2-SSE2-LABEL: shr_s1_cmp_v16i8:
708
+ ; CHECK-BMI2-SSE2: # %bb.0:
709
+ ; CHECK-BMI2-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
710
+ ; CHECK-BMI2-SSE2-NEXT: psrlw $1, %xmm0
711
+ ; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
712
+ ; CHECK-BMI2-SSE2-NEXT: retq
713
+ ;
714
+ ; CHECK-AVX12-LABEL: shr_s1_cmp_v16i8:
715
+ ; CHECK-AVX12: # %bb.0:
716
+ ; CHECK-AVX12-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
717
+ ; CHECK-AVX12-NEXT: vpsrlw $1, %xmm0, %xmm0
718
+ ; CHECK-AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
719
+ ; CHECK-AVX12-NEXT: retq
720
+ ;
721
+ ; CHECK-AVX512-LABEL: shr_s1_cmp_v16i8:
722
+ ; CHECK-AVX512: # %bb.0:
723
+ ; CHECK-AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
724
+ ; CHECK-AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0
725
+ ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
726
+ ; CHECK-AVX512-NEXT: retq
727
+ %cmp = icmp eq <16 x i8 > %x , %y
728
+ %ext = sext <16 x i1 > %cmp to <16 x i8 >
729
+ %shr = lshr <16 x i8 > %ext , <i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >
730
+ ret <16 x i8 > %shr
731
+ }
732
+
733
+ define <8 x i32 > @shr_s9_cmp_v8i32 (<8 x i32 > %x , <8 x i32 > %y ) {
734
+ ; CHECK-NOBMI-LABEL: shr_s9_cmp_v8i32:
735
+ ; CHECK-NOBMI: # %bb.0:
736
+ ; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm1
737
+ ; CHECK-NOBMI-NEXT: pcmpgtd %xmm2, %xmm0
738
+ ; CHECK-NOBMI-NEXT: psrld $9, %xmm0
739
+ ; CHECK-NOBMI-NEXT: psrld $9, %xmm1
740
+ ; CHECK-NOBMI-NEXT: retq
741
+ ;
742
+ ; CHECK-BMI2-SSE2-LABEL: shr_s9_cmp_v8i32:
743
+ ; CHECK-BMI2-SSE2: # %bb.0:
744
+ ; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm3, %xmm1
745
+ ; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm2, %xmm0
746
+ ; CHECK-BMI2-SSE2-NEXT: psrld $9, %xmm0
747
+ ; CHECK-BMI2-SSE2-NEXT: psrld $9, %xmm1
748
+ ; CHECK-BMI2-SSE2-NEXT: retq
749
+ ;
750
+ ; CHECK-AVX1-LABEL: shr_s9_cmp_v8i32:
751
+ ; CHECK-AVX1: # %bb.0:
752
+ ; CHECK-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
753
+ ; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
754
+ ; CHECK-AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
755
+ ; CHECK-AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
756
+ ; CHECK-AVX1-NEXT: vpsrld $9, %xmm0, %xmm0
757
+ ; CHECK-AVX1-NEXT: vpsrld $9, %xmm2, %xmm1
758
+ ; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
759
+ ; CHECK-AVX1-NEXT: retq
760
+ ;
761
+ ; CHECK-AVX2-LABEL: shr_s9_cmp_v8i32:
762
+ ; CHECK-AVX2: # %bb.0:
763
+ ; CHECK-AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
764
+ ; CHECK-AVX2-NEXT: vpsrld $9, %ymm0, %ymm0
765
+ ; CHECK-AVX2-NEXT: retq
766
+ ;
767
+ ; CHECK-AVX512-LABEL: shr_s9_cmp_v8i32:
768
+ ; CHECK-AVX512: # %bb.0:
769
+ ; CHECK-AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
770
+ ; CHECK-AVX512-NEXT: vpsrld $9, %ymm0, %ymm0
771
+ ; CHECK-AVX512-NEXT: retq
772
+ %cmp = icmp sgt <8 x i32 > %x , %y
773
+ %ext = sext <8 x i1 > %cmp to <8 x i32 >
774
+ %shr = lshr <8 x i32 > %ext , <i32 9 , i32 9 , i32 9 , i32 9 , i32 9 , i32 9 , i32 9 , i32 9 >
775
+ ret <8 x i32 > %shr
776
+ }
777
+
622
778
define i1 @shr_to_shl_eq_i32_s5_fail_doesnt_add_up (i32 %x ) {
623
779
; CHECK-LABEL: shr_to_shl_eq_i32_s5_fail_doesnt_add_up:
624
780
; CHECK: # %bb.0:
0 commit comments