Skip to content

Commit eb703f9

Browse files
davemgreenyuxuanchen1997
authored andcommitted
[AArch64] Remove superfluous sxtw in peephole opt (#96293)
Summary: Across a basic-block we might have an i32 extract from a value that only operates on upper bits (for example a sxtw). We can replace the COPY with a new version skipping the sxtw. This is a re-commit of 7f2a5df, with a fix for removing all the intermediate COPY nodes (and some extra debug logging). Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251396
1 parent fe2a719 commit eb703f9

File tree

3 files changed

+157
-8
lines changed

3 files changed

+157
-8
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
128128
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
129129
bool visitINSvi64lane(MachineInstr &MI);
130130
bool visitFMOVDr(MachineInstr &MI);
131+
bool visitCopy(MachineInstr &MI);
131132
bool runOnMachineFunction(MachineFunction &MF) override;
132133

133134
StringRef getPassName() const override {
@@ -690,6 +691,40 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
690691
return true;
691692
}
692693

694+
// Across a basic-block we might have in i32 extract from a value that only
695+
// operates on upper bits (for example a sxtw). We can replace the COPY with a
696+
// new version skipping the sxtw.
697+
bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
698+
Register InputReg = MI.getOperand(1).getReg();
699+
if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
700+
!MRI->hasOneNonDBGUse(InputReg))
701+
return false;
702+
703+
MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
704+
SmallPtrSet<MachineInstr *, 4> DeadInstrs;
705+
DeadInstrs.insert(SrcMI);
706+
while (SrcMI && SrcMI->isFullCopy() &&
707+
MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {
708+
SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
709+
DeadInstrs.insert(SrcMI);
710+
}
711+
712+
if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
713+
SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
714+
return false;
715+
716+
Register SrcReg = SrcMI->getOperand(1).getReg();
717+
MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
718+
LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
719+
MI.getOperand(1).setReg(SrcReg);
720+
LLVM_DEBUG(dbgs() << " to: " << MI);
721+
for (auto *DeadMI : DeadInstrs) {
722+
LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI);
723+
DeadMI->eraseFromParent();
724+
}
725+
return true;
726+
}
727+
693728
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
694729
if (skipFunction(MF.getFunction()))
695730
return false;
@@ -771,6 +806,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
771806
case AArch64::FMOVDr:
772807
Changed |= visitFMOVDr(MI);
773808
break;
809+
case AArch64::COPY:
810+
Changed |= visitCopy(MI);
811+
break;
774812
}
775813
}
776814
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
3+
4+
---
5+
name: removeSxtw
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0.entry:
9+
liveins: $x0
10+
; CHECK-LABEL: name: removeSxtw
11+
; CHECK: liveins: $x0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
14+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
15+
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
16+
; CHECK-NEXT: $w0 = COPY [[ADDWri]]
17+
; CHECK-NEXT: RET_ReallyLR implicit $w0
18+
%0:gpr64 = COPY $x0
19+
%1:gpr64 = SBFMXri %0:gpr64, 0, 31
20+
%2:gpr32sp = COPY %1.sub_32:gpr64
21+
%3:gpr32sp = ADDWri %2:gpr32sp, 1, 0
22+
$w0 = COPY %3:gpr32sp
23+
RET_ReallyLR implicit $w0
24+
...
25+
---
26+
name: extraCopy
27+
tracksRegLiveness: true
28+
body: |
29+
bb.0.entry:
30+
liveins: $x0
31+
; CHECK-LABEL: name: extraCopy
32+
; CHECK: liveins: $x0
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
35+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
36+
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
37+
; CHECK-NEXT: $w0 = COPY [[ADDWri]]
38+
; CHECK-NEXT: RET_ReallyLR implicit $w0
39+
%0:gpr64 = COPY $x0
40+
%1:gpr64 = SBFMXri %0:gpr64, 0, 31
41+
%2:gpr64all = COPY %1:gpr64
42+
%3:gpr32sp = COPY %2.sub_32:gpr64all
43+
%4:gpr32sp = ADDWri %3:gpr32sp, 1, 0
44+
$w0 = COPY %4:gpr32sp
45+
RET_ReallyLR implicit $w0
46+
...
47+
---
48+
name: multipleCopies
49+
tracksRegLiveness: true
50+
body: |
51+
; CHECK-LABEL: name: multipleCopies
52+
; CHECK: bb.0.entry:
53+
; CHECK-NEXT: successors: %bb.1(0x80000000)
54+
; CHECK-NEXT: liveins: $w0
55+
; CHECK-NEXT: {{ $}}
56+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
57+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
58+
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32
59+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].sub_32
60+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
61+
; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY2]], [[COPY1]]
62+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBWrr]], %subreg.sub_32
63+
; CHECK-NEXT: {{ $}}
64+
; CHECK-NEXT: bb.1:
65+
; CHECK-NEXT: successors: %bb.2(0x80000000)
66+
; CHECK-NEXT: {{ $}}
67+
; CHECK-NEXT: bb.2:
68+
; CHECK-NEXT: successors: %bb.1(0x04000000), %bb.2(0x7c000000)
69+
; CHECK-NEXT: {{ $}}
70+
; CHECK-NEXT: CBZX [[SUBREG_TO_REG]], %bb.1
71+
; CHECK-NEXT: B %bb.2
72+
bb.0.entry:
73+
successors: %bb.1(0x80000000)
74+
liveins: $w0
75+
76+
%2:gpr32 = COPY $w0
77+
%4:gpr64all = IMPLICIT_DEF
78+
%3:gpr64 = INSERT_SUBREG %4, %2, %subreg.sub_32
79+
%5:gpr64 = SBFMXri killed %3, 0, 31
80+
%0:gpr64all = COPY %5
81+
%6:gpr64all = COPY %0
82+
%7:gpr32 = COPY %6.sub_32
83+
%8:gpr32 = COPY $wzr
84+
%9:gpr32 = SUBWrr %8, %7
85+
%10:gpr32 = ORRWrs $wzr, %9, 0
86+
%1:gpr64 = SUBREG_TO_REG 0, %10, %subreg.sub_32
87+
88+
bb.1:
89+
successors: %bb.2(0x80000000)
90+
91+
bb.2:
92+
successors: %bb.1(0x04000000), %bb.2(0x7c000000)
93+
94+
CBZX %1, %bb.1
95+
B %bb.2
96+
97+
...

llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ define i64 @smull_ldrsw_shift(ptr %x0, i64 %x1) {
281281
; CHECK-LABEL: smull_ldrsw_shift:
282282
; CHECK: // %bb.0: // %entry
283283
; CHECK-NEXT: ldrsw x8, [x0]
284-
; CHECK-NEXT: sxtw x9, w1
285-
; CHECK-NEXT: smull x0, w8, w9
284+
; CHECK-NEXT: smull x0, w8, w1
286285
; CHECK-NEXT: ret
287286
entry:
288287
%ext64 = load i32, ptr %x0
@@ -490,8 +489,7 @@ define i64 @smaddl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
490489
; CHECK-LABEL: smaddl_ldrsw_shift:
491490
; CHECK: // %bb.0: // %entry
492491
; CHECK-NEXT: ldrsw x8, [x0]
493-
; CHECK-NEXT: sxtw x9, w1
494-
; CHECK-NEXT: smaddl x0, w8, w9, x2
492+
; CHECK-NEXT: smaddl x0, w8, w1, x2
495493
; CHECK-NEXT: ret
496494
entry:
497495
%ext64 = load i32, ptr %x0
@@ -654,8 +652,7 @@ define i64 @smnegl_ldrsw_shift(ptr %x0, i64 %x1) {
654652
; CHECK-LABEL: smnegl_ldrsw_shift:
655653
; CHECK: // %bb.0: // %entry
656654
; CHECK-NEXT: ldrsw x8, [x0]
657-
; CHECK-NEXT: sxtw x9, w1
658-
; CHECK-NEXT: smnegl x0, w8, w9
655+
; CHECK-NEXT: smnegl x0, w8, w1
659656
; CHECK-NEXT: ret
660657
entry:
661658
%ext64 = load i32, ptr %x0
@@ -818,8 +815,7 @@ define i64 @smsubl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
818815
; CHECK-LABEL: smsubl_ldrsw_shift:
819816
; CHECK: // %bb.0: // %entry
820817
; CHECK-NEXT: ldrsw x8, [x0]
821-
; CHECK-NEXT: sxtw x9, w1
822-
; CHECK-NEXT: smsubl x0, w8, w9, x2
818+
; CHECK-NEXT: smsubl x0, w8, w1, x2
823819
; CHECK-NEXT: ret
824820
entry:
825821
%ext64 = load i32, ptr %x0
@@ -1451,3 +1447,21 @@ define i64 @umaddl_and_and(i64 %x, i64 %y, i64 %a) {
14511447
%add = add i64 %a, %mul
14521448
ret i64 %add
14531449
}
1450+
1451+
; Check which can contain multiple copies that should all be removed.
1452+
define i32 @f(i32 %0) {
1453+
entry:
1454+
%1 = sext i32 %0 to i64
1455+
br label %A
1456+
1457+
A:
1458+
%2 = trunc i64 %1 to i32
1459+
%a69.us = sub i32 0, %2
1460+
%a69.us.fr = freeze i32 %a69.us
1461+
%3 = zext i32 %a69.us.fr to i64
1462+
br label %B
1463+
1464+
B:
1465+
%t = icmp eq i64 0, %3
1466+
br i1 %t, label %A, label %B
1467+
}

0 commit comments

Comments
 (0)