Skip to content

Commit 1e8d3c3

Browse files
committed
[X86] cmp-shiftX-maskX.ll - add additional tests for #83596
Shows cases where logical shifts of allsignbits values can be profitably converted to masks
1 parent 582718f commit 1e8d3c3

File tree

1 file changed

+156
-0
lines changed

1 file changed

+156
-0
lines changed

llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,162 @@ define <16 x i1> @shl_to_ror_eq_16xi16_s8_fail_preserve_i16(<16 x i16> %x) {
619619
ret <16 x i1> %r
620620
}
621621

622+
define <16 x i8> @shl_s3_cmp_v16i8(<16 x i8> %x, <16 x i8> %y) {
623+
; CHECK-NOBMI-LABEL: shl_s3_cmp_v16i8:
624+
; CHECK-NOBMI: # %bb.0:
625+
; CHECK-NOBMI-NEXT: pcmpeqb %xmm1, %xmm0
626+
; CHECK-NOBMI-NEXT: psllw $3, %xmm0
627+
; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
628+
; CHECK-NOBMI-NEXT: retq
629+
;
630+
; CHECK-BMI2-SSE2-LABEL: shl_s3_cmp_v16i8:
631+
; CHECK-BMI2-SSE2: # %bb.0:
632+
; CHECK-BMI2-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
633+
; CHECK-BMI2-SSE2-NEXT: psllw $3, %xmm0
634+
; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
635+
; CHECK-BMI2-SSE2-NEXT: retq
636+
;
637+
; CHECK-AVX12-LABEL: shl_s3_cmp_v16i8:
638+
; CHECK-AVX12: # %bb.0:
639+
; CHECK-AVX12-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
640+
; CHECK-AVX12-NEXT: vpsllw $3, %xmm0, %xmm0
641+
; CHECK-AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
642+
; CHECK-AVX12-NEXT: retq
643+
;
644+
; CHECK-AVX512-LABEL: shl_s3_cmp_v16i8:
645+
; CHECK-AVX512: # %bb.0:
646+
; CHECK-AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
647+
; CHECK-AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
648+
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
649+
; CHECK-AVX512-NEXT: retq
650+
%cmp = icmp eq <16 x i8> %x, %y
651+
%ext = sext <16 x i1> %cmp to <16 x i8>
652+
%shr = shl <16 x i8> %ext, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
653+
ret <16 x i8> %shr
654+
}
655+
656+
define <4 x i64> @shl_s31_cmp_v4f64(<4 x double> %x, <4 x double> %y) {
657+
; CHECK-NOBMI-LABEL: shl_s31_cmp_v4f64:
658+
; CHECK-NOBMI: # %bb.0:
659+
; CHECK-NOBMI-NEXT: cmpeqpd %xmm3, %xmm1
660+
; CHECK-NOBMI-NEXT: cmpeqpd %xmm2, %xmm0
661+
; CHECK-NOBMI-NEXT: psllq $31, %xmm0
662+
; CHECK-NOBMI-NEXT: psllq $31, %xmm1
663+
; CHECK-NOBMI-NEXT: retq
664+
;
665+
; CHECK-BMI2-SSE2-LABEL: shl_s31_cmp_v4f64:
666+
; CHECK-BMI2-SSE2: # %bb.0:
667+
; CHECK-BMI2-SSE2-NEXT: cmpeqpd %xmm3, %xmm1
668+
; CHECK-BMI2-SSE2-NEXT: cmpeqpd %xmm2, %xmm0
669+
; CHECK-BMI2-SSE2-NEXT: psllq $31, %xmm0
670+
; CHECK-BMI2-SSE2-NEXT: psllq $31, %xmm1
671+
; CHECK-BMI2-SSE2-NEXT: retq
672+
;
673+
; CHECK-AVX1-LABEL: shl_s31_cmp_v4f64:
674+
; CHECK-AVX1: # %bb.0:
675+
; CHECK-AVX1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
676+
; CHECK-AVX1-NEXT: vpsllq $31, %xmm0, %xmm1
677+
; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
678+
; CHECK-AVX1-NEXT: vpsllq $31, %xmm0, %xmm0
679+
; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
680+
; CHECK-AVX1-NEXT: retq
681+
;
682+
; CHECK-AVX2-LABEL: shl_s31_cmp_v4f64:
683+
; CHECK-AVX2: # %bb.0:
684+
; CHECK-AVX2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
685+
; CHECK-AVX2-NEXT: vpsllq $31, %ymm0, %ymm0
686+
; CHECK-AVX2-NEXT: retq
687+
;
688+
; CHECK-AVX512-LABEL: shl_s31_cmp_v4f64:
689+
; CHECK-AVX512: # %bb.0:
690+
; CHECK-AVX512-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
691+
; CHECK-AVX512-NEXT: vpsllq $31, %ymm0, %ymm0
692+
; CHECK-AVX512-NEXT: retq
693+
%cmp = fcmp oeq <4 x double> %x, %y
694+
%ext = sext <4 x i1> %cmp to <4 x i64>
695+
%shr = shl <4 x i64> %ext, <i64 31, i64 31, i64 31, i64 31>
696+
ret <4 x i64> %shr
697+
}
698+
699+
define <16 x i8> @shr_s1_cmp_v16i8(<16 x i8> %x, <16 x i8> %y) {
700+
; CHECK-NOBMI-LABEL: shr_s1_cmp_v16i8:
701+
; CHECK-NOBMI: # %bb.0:
702+
; CHECK-NOBMI-NEXT: pcmpeqb %xmm1, %xmm0
703+
; CHECK-NOBMI-NEXT: psrlw $1, %xmm0
704+
; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
705+
; CHECK-NOBMI-NEXT: retq
706+
;
707+
; CHECK-BMI2-SSE2-LABEL: shr_s1_cmp_v16i8:
708+
; CHECK-BMI2-SSE2: # %bb.0:
709+
; CHECK-BMI2-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
710+
; CHECK-BMI2-SSE2-NEXT: psrlw $1, %xmm0
711+
; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
712+
; CHECK-BMI2-SSE2-NEXT: retq
713+
;
714+
; CHECK-AVX12-LABEL: shr_s1_cmp_v16i8:
715+
; CHECK-AVX12: # %bb.0:
716+
; CHECK-AVX12-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
717+
; CHECK-AVX12-NEXT: vpsrlw $1, %xmm0, %xmm0
718+
; CHECK-AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
719+
; CHECK-AVX12-NEXT: retq
720+
;
721+
; CHECK-AVX512-LABEL: shr_s1_cmp_v16i8:
722+
; CHECK-AVX512: # %bb.0:
723+
; CHECK-AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
724+
; CHECK-AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0
725+
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
726+
; CHECK-AVX512-NEXT: retq
727+
%cmp = icmp eq <16 x i8> %x, %y
728+
%ext = sext <16 x i1> %cmp to <16 x i8>
729+
%shr = lshr <16 x i8> %ext, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
730+
ret <16 x i8> %shr
731+
}
732+
733+
define <8 x i32> @shr_s9_cmp_v8i32(<8 x i32> %x, <8 x i32> %y) {
734+
; CHECK-NOBMI-LABEL: shr_s9_cmp_v8i32:
735+
; CHECK-NOBMI: # %bb.0:
736+
; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm1
737+
; CHECK-NOBMI-NEXT: pcmpgtd %xmm2, %xmm0
738+
; CHECK-NOBMI-NEXT: psrld $9, %xmm0
739+
; CHECK-NOBMI-NEXT: psrld $9, %xmm1
740+
; CHECK-NOBMI-NEXT: retq
741+
;
742+
; CHECK-BMI2-SSE2-LABEL: shr_s9_cmp_v8i32:
743+
; CHECK-BMI2-SSE2: # %bb.0:
744+
; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm3, %xmm1
745+
; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm2, %xmm0
746+
; CHECK-BMI2-SSE2-NEXT: psrld $9, %xmm0
747+
; CHECK-BMI2-SSE2-NEXT: psrld $9, %xmm1
748+
; CHECK-BMI2-SSE2-NEXT: retq
749+
;
750+
; CHECK-AVX1-LABEL: shr_s9_cmp_v8i32:
751+
; CHECK-AVX1: # %bb.0:
752+
; CHECK-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
753+
; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
754+
; CHECK-AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
755+
; CHECK-AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
756+
; CHECK-AVX1-NEXT: vpsrld $9, %xmm0, %xmm0
757+
; CHECK-AVX1-NEXT: vpsrld $9, %xmm2, %xmm1
758+
; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
759+
; CHECK-AVX1-NEXT: retq
760+
;
761+
; CHECK-AVX2-LABEL: shr_s9_cmp_v8i32:
762+
; CHECK-AVX2: # %bb.0:
763+
; CHECK-AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
764+
; CHECK-AVX2-NEXT: vpsrld $9, %ymm0, %ymm0
765+
; CHECK-AVX2-NEXT: retq
766+
;
767+
; CHECK-AVX512-LABEL: shr_s9_cmp_v8i32:
768+
; CHECK-AVX512: # %bb.0:
769+
; CHECK-AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
770+
; CHECK-AVX512-NEXT: vpsrld $9, %ymm0, %ymm0
771+
; CHECK-AVX512-NEXT: retq
772+
%cmp = icmp sgt <8 x i32> %x, %y
773+
%ext = sext <8 x i1> %cmp to <8 x i32>
774+
%shr = lshr <8 x i32> %ext, <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
775+
ret <8 x i32> %shr
776+
}
777+
622778
define i1 @shr_to_shl_eq_i32_s5_fail_doesnt_add_up(i32 %x) {
623779
; CHECK-LABEL: shr_to_shl_eq_i32_s5_fail_doesnt_add_up:
624780
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)