Skip to content

Commit 9ac29c2

Browse files
committed
[X86][ARM][AArch64] shouldFoldMaskToVariableShiftPair should be true for scalars up to the biggest legal type
For ARM, AArch64, and X86, we want to do this for scalars up to the biggest legal type.
1 parent 13e540a commit 9ac29c2

File tree

10 files changed

+2044
-473
lines changed

10 files changed

+2044
-473
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,16 @@ class AArch64TargetLowering : public TargetLowering {
300300
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
301301
CombineLevel Level) const override;
302302

303+
/// Return true if it is profitable to fold a pair of shifts into a mask.
304+
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
305+
EVT VT = Y.getValueType();
306+
307+
if (VT.isVector())
308+
return false;
309+
310+
return VT.getScalarSizeInBits() <= 64;
311+
}
312+
303313
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
304314
unsigned SelectOpcode, SDValue X,
305315
SDValue Y) const override;

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,16 @@ class VectorType;
773773
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
774774
CombineLevel Level) const override;
775775

776+
/// Return true if it is profitable to fold a pair of shifts into a mask.
777+
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
778+
EVT VT = Y.getValueType();
779+
780+
if (VT.isVector())
781+
return false;
782+
783+
return VT.getScalarSizeInBits() <= 32;
784+
}
785+
776786
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
777787
unsigned SelectOpcode, SDValue X,
778788
SDValue Y) const override;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3663,11 +3663,8 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
36633663
if (VT.isVector())
36643664
return false;
36653665

3666-
// 64-bit shifts on 32-bit targets produce really bad bloated code.
3667-
if (VT == MVT::i64 && !Subtarget.is64Bit())
3668-
return false;
3669-
3670-
return true;
3666+
unsigned MaxWidth = Subtarget.is64Bit() ? 64 : 32;
3667+
return VT.getScalarSizeInBits() <= MaxWidth;
36713668
}
36723669

36733670
TargetLowering::ShiftLegalizationStrategy

llvm/test/CodeGen/AArch64/and-mask-variable.ll

Lines changed: 64 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,78 @@
33
; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
define i32 @mask_pair(i32 %x, i32 %y) {
6-
; CHECK-LABEL: mask_pair:
7-
; CHECK: // %bb.0:
8-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
9-
; CHECK-NEXT: lsl w8, w8, w1
10-
; CHECK-NEXT: and w0, w8, w0
11-
; CHECK-NEXT: ret
6+
; CHECK-SD-LABEL: mask_pair:
7+
; CHECK-SD: // %bb.0:
8+
; CHECK-SD-NEXT: lsr w8, w0, w1
9+
; CHECK-SD-NEXT: lsl w0, w8, w1
10+
; CHECK-SD-NEXT: ret
11+
;
12+
; CHECK-GI-LABEL: mask_pair:
13+
; CHECK-GI: // %bb.0:
14+
; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
15+
; CHECK-GI-NEXT: lsl w8, w8, w1
16+
; CHECK-GI-NEXT: and w0, w8, w0
17+
; CHECK-GI-NEXT: ret
1218
%shl = shl nsw i32 -1, %y
1319
%and = and i32 %shl, %x
1420
ret i32 %and
1521
}
1622

1723
define i64 @mask_pair_64(i64 %x, i64 %y) {
18-
; CHECK-LABEL: mask_pair_64:
19-
; CHECK: // %bb.0:
20-
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
21-
; CHECK-NEXT: lsl x8, x8, x1
22-
; CHECK-NEXT: and x0, x8, x0
23-
; CHECK-NEXT: ret
24+
; CHECK-SD-LABEL: mask_pair_64:
25+
; CHECK-SD: // %bb.0:
26+
; CHECK-SD-NEXT: lsr x8, x0, x1
27+
; CHECK-SD-NEXT: lsl x0, x8, x1
28+
; CHECK-SD-NEXT: ret
29+
;
30+
; CHECK-GI-LABEL: mask_pair_64:
31+
; CHECK-GI: // %bb.0:
32+
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
33+
; CHECK-GI-NEXT: lsl x8, x8, x1
34+
; CHECK-GI-NEXT: and x0, x8, x0
35+
; CHECK-GI-NEXT: ret
2436
%shl = shl nsw i64 -1, %y
2537
%and = and i64 %shl, %x
2638
ret i64 %and
2739
}
40+
41+
define i128 @mask_pair_128(i128 %x, i128 %y) {
42+
; CHECK-SD-LABEL: mask_pair_128:
43+
; CHECK-SD: // %bb.0:
44+
; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
45+
; CHECK-SD-NEXT: mvn w9, w2
46+
; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
47+
; CHECK-SD-NEXT: lsl x8, x8, x2
48+
; CHECK-SD-NEXT: lsr x9, x10, x9
49+
; CHECK-SD-NEXT: tst x2, #0x40
50+
; CHECK-SD-NEXT: orr x9, x8, x9
51+
; CHECK-SD-NEXT: csel x9, x8, x9, ne
52+
; CHECK-SD-NEXT: csel x8, xzr, x8, ne
53+
; CHECK-SD-NEXT: and x0, x8, x0
54+
; CHECK-SD-NEXT: and x1, x9, x1
55+
; CHECK-SD-NEXT: ret
56+
;
57+
; CHECK-GI-LABEL: mask_pair_128:
58+
; CHECK-GI: // %bb.0:
59+
; CHECK-GI-NEXT: mov w8, #64 // =0x40
60+
; CHECK-GI-NEXT: mov x9, #-1 // =0xffffffffffffffff
61+
; CHECK-GI-NEXT: sub x10, x2, #64
62+
; CHECK-GI-NEXT: sub x8, x8, x2
63+
; CHECK-GI-NEXT: lsl x11, x9, x2
64+
; CHECK-GI-NEXT: cmp x2, #64
65+
; CHECK-GI-NEXT: lsr x8, x9, x8
66+
; CHECK-GI-NEXT: lsl x9, x9, x10
67+
; CHECK-GI-NEXT: csel x10, x11, xzr, lo
68+
; CHECK-GI-NEXT: orr x8, x8, x11
69+
; CHECK-GI-NEXT: and x0, x10, x0
70+
; CHECK-GI-NEXT: csel x8, x8, x9, lo
71+
; CHECK-GI-NEXT: cmp x2, #0
72+
; CHECK-GI-NEXT: csinv x8, x8, xzr, ne
73+
; CHECK-GI-NEXT: and x1, x8, x1
74+
; CHECK-GI-NEXT: ret
75+
%shl = shl nsw i128 -1, %y
76+
%and = and i128 %shl, %x
77+
ret i128 %and
78+
}
2879
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
29-
; CHECK-GI: {{.*}}
30-
; CHECK-SD: {{.*}}
80+
; CHECK: {{.*}}

llvm/test/CodeGen/AArch64/extract-bits.ll

Lines changed: 43 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -532,11 +532,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
532532
define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
533533
; CHECK-LABEL: bextr32_c0:
534534
; CHECK: // %bb.0:
535-
; CHECK-NEXT: neg w8, w2
536-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
537-
; CHECK-NEXT: lsr w10, w0, w1
538-
; CHECK-NEXT: lsr w8, w9, w8
539-
; CHECK-NEXT: and w0, w8, w10
535+
; CHECK-NEXT: lsr w8, w0, w1
536+
; CHECK-NEXT: neg w9, w2
537+
; CHECK-NEXT: lsl w8, w8, w9
538+
; CHECK-NEXT: lsr w0, w8, w9
540539
; CHECK-NEXT: ret
541540
%shifted = lshr i32 %val, %numskipbits
542541
%numhighbits = sub i32 32, %numlowbits
@@ -548,12 +547,11 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
548547
define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
549548
; CHECK-LABEL: bextr32_c1_indexzext:
550549
; CHECK: // %bb.0:
551-
; CHECK-NEXT: mov w8, #32 // =0x20
552-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
553-
; CHECK-NEXT: lsr w10, w0, w1
554-
; CHECK-NEXT: sub w8, w8, w2
555-
; CHECK-NEXT: lsr w8, w9, w8
556-
; CHECK-NEXT: and w0, w8, w10
550+
; CHECK-NEXT: lsr w8, w0, w1
551+
; CHECK-NEXT: mov w9, #32 // =0x20
552+
; CHECK-NEXT: sub w9, w9, w2
553+
; CHECK-NEXT: lsl w8, w8, w9
554+
; CHECK-NEXT: lsr w0, w8, w9
557555
; CHECK-NEXT: ret
558556
%skip = zext i8 %numskipbits to i32
559557
%shifted = lshr i32 %val, %skip
@@ -569,10 +567,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
569567
; CHECK: // %bb.0:
570568
; CHECK-NEXT: ldr w8, [x0]
571569
; CHECK-NEXT: neg w9, w2
572-
; CHECK-NEXT: mov w10, #-1 // =0xffffffff
573-
; CHECK-NEXT: lsr w9, w10, w9
574570
; CHECK-NEXT: lsr w8, w8, w1
575-
; CHECK-NEXT: and w0, w9, w8
571+
; CHECK-NEXT: lsl w8, w8, w9
572+
; CHECK-NEXT: lsr w0, w8, w9
576573
; CHECK-NEXT: ret
577574
%val = load i32, ptr %w
578575
%shifted = lshr i32 %val, %numskipbits
@@ -587,11 +584,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
587584
; CHECK: // %bb.0:
588585
; CHECK-NEXT: ldr w8, [x0]
589586
; CHECK-NEXT: mov w9, #32 // =0x20
590-
; CHECK-NEXT: mov w10, #-1 // =0xffffffff
591587
; CHECK-NEXT: sub w9, w9, w2
592588
; CHECK-NEXT: lsr w8, w8, w1
593-
; CHECK-NEXT: lsr w9, w10, w9
594-
; CHECK-NEXT: and w0, w9, w8
589+
; CHECK-NEXT: lsl w8, w8, w9
590+
; CHECK-NEXT: lsr w0, w8, w9
595591
; CHECK-NEXT: ret
596592
%val = load i32, ptr %w
597593
%skip = zext i8 %numskipbits to i32
@@ -606,11 +602,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
606602
define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
607603
; CHECK-LABEL: bextr32_c4_commutative:
608604
; CHECK: // %bb.0:
609-
; CHECK-NEXT: neg w8, w2
610-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
611-
; CHECK-NEXT: lsr w10, w0, w1
612-
; CHECK-NEXT: lsr w8, w9, w8
613-
; CHECK-NEXT: and w0, w10, w8
605+
; CHECK-NEXT: lsr w8, w0, w1
606+
; CHECK-NEXT: neg w9, w2
607+
; CHECK-NEXT: lsl w8, w8, w9
608+
; CHECK-NEXT: lsr w0, w8, w9
614609
; CHECK-NEXT: ret
615610
%shifted = lshr i32 %val, %numskipbits
616611
%numhighbits = sub i32 32, %numlowbits
@@ -624,11 +619,10 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
624619
define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
625620
; CHECK-LABEL: bextr64_c0:
626621
; CHECK: // %bb.0:
627-
; CHECK-NEXT: neg x8, x2
628-
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
629-
; CHECK-NEXT: lsr x10, x0, x1
630-
; CHECK-NEXT: lsr x8, x9, x8
631-
; CHECK-NEXT: and x0, x8, x10
622+
; CHECK-NEXT: lsr x8, x0, x1
623+
; CHECK-NEXT: neg x9, x2
624+
; CHECK-NEXT: lsl x8, x8, x9
625+
; CHECK-NEXT: lsr x0, x8, x9
632626
; CHECK-NEXT: ret
633627
%shifted = lshr i64 %val, %numskipbits
634628
%numhighbits = sub i64 64, %numlowbits
@@ -640,13 +634,12 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
640634
define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
641635
; CHECK-LABEL: bextr64_c1_indexzext:
642636
; CHECK: // %bb.0:
643-
; CHECK-NEXT: mov w8, #64 // =0x40
644-
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
645637
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
646-
; CHECK-NEXT: lsr x10, x0, x1
647-
; CHECK-NEXT: sub w8, w8, w2
648-
; CHECK-NEXT: lsr x8, x9, x8
649-
; CHECK-NEXT: and x0, x8, x10
638+
; CHECK-NEXT: lsr x8, x0, x1
639+
; CHECK-NEXT: mov w9, #64 // =0x40
640+
; CHECK-NEXT: sub w9, w9, w2
641+
; CHECK-NEXT: lsl x8, x8, x9
642+
; CHECK-NEXT: lsr x0, x8, x9
650643
; CHECK-NEXT: ret
651644
%skip = zext i8 %numskipbits to i64
652645
%shifted = lshr i64 %val, %skip
@@ -662,10 +655,9 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
662655
; CHECK: // %bb.0:
663656
; CHECK-NEXT: ldr x8, [x0]
664657
; CHECK-NEXT: neg x9, x2
665-
; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
666-
; CHECK-NEXT: lsr x9, x10, x9
667658
; CHECK-NEXT: lsr x8, x8, x1
668-
; CHECK-NEXT: and x0, x9, x8
659+
; CHECK-NEXT: lsl x8, x8, x9
660+
; CHECK-NEXT: lsr x0, x8, x9
669661
; CHECK-NEXT: ret
670662
%val = load i64, ptr %w
671663
%shifted = lshr i64 %val, %numskipbits
@@ -679,13 +671,12 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
679671
; CHECK-LABEL: bextr64_c3_load_indexzext:
680672
; CHECK: // %bb.0:
681673
; CHECK-NEXT: ldr x8, [x0]
674+
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
682675
; CHECK-NEXT: mov w9, #64 // =0x40
683-
; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
684676
; CHECK-NEXT: sub w9, w9, w2
685-
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
686677
; CHECK-NEXT: lsr x8, x8, x1
687-
; CHECK-NEXT: lsr x9, x10, x9
688-
; CHECK-NEXT: and x0, x9, x8
678+
; CHECK-NEXT: lsl x8, x8, x9
679+
; CHECK-NEXT: lsr x0, x8, x9
689680
; CHECK-NEXT: ret
690681
%val = load i64, ptr %w
691682
%skip = zext i8 %numskipbits to i64
@@ -700,11 +691,10 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
700691
define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
701692
; CHECK-LABEL: bextr64_c4_commutative:
702693
; CHECK: // %bb.0:
703-
; CHECK-NEXT: neg x8, x2
704-
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
705-
; CHECK-NEXT: lsr x10, x0, x1
706-
; CHECK-NEXT: lsr x8, x9, x8
707-
; CHECK-NEXT: and x0, x10, x8
694+
; CHECK-NEXT: lsr x8, x0, x1
695+
; CHECK-NEXT: neg x9, x2
696+
; CHECK-NEXT: lsl x8, x8, x9
697+
; CHECK-NEXT: lsr x0, x8, x9
708698
; CHECK-NEXT: ret
709699
%shifted = lshr i64 %val, %numskipbits
710700
%numhighbits = sub i64 64, %numlowbits
@@ -737,11 +727,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
737727
define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
738728
; CHECK-LABEL: bextr64_32_c1:
739729
; CHECK: // %bb.0:
740-
; CHECK-NEXT: neg w8, w2
741-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
742-
; CHECK-NEXT: lsr x10, x0, x1
743-
; CHECK-NEXT: lsr w8, w9, w8
744-
; CHECK-NEXT: and w0, w8, w10
730+
; CHECK-NEXT: lsr x8, x0, x1
731+
; CHECK-NEXT: neg w9, w2
732+
; CHECK-NEXT: lsl w8, w8, w9
733+
; CHECK-NEXT: lsr w0, w8, w9
745734
; CHECK-NEXT: ret
746735
%shifted = lshr i64 %val, %numskipbits
747736
%truncshifted = trunc i64 %shifted to i32
@@ -756,11 +745,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
756745
define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
757746
; CHECK-LABEL: bextr64_32_c2:
758747
; CHECK: // %bb.0:
759-
; CHECK-NEXT: neg w8, w2
760-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
761-
; CHECK-NEXT: lsr x10, x0, x1
762-
; CHECK-NEXT: lsr w8, w9, w8
763-
; CHECK-NEXT: and w0, w8, w10
748+
; CHECK-NEXT: lsr x8, x0, x1
749+
; CHECK-NEXT: neg w9, w2
750+
; CHECK-NEXT: lsl w8, w8, w9
751+
; CHECK-NEXT: lsr w0, w8, w9
764752
; CHECK-NEXT: ret
765753
%shifted = lshr i64 %val, %numskipbits
766754
%numhighbits = sub i32 32, %numlowbits

0 commit comments

Comments
 (0)