Skip to content

Commit 29763aa

Browse files
authored
[AArch64] Peephole optimization to remove redundant csel instructions (#101483)
Given a sequence such as %8:gpr64 = COPY $xzr %10:gpr64 = COPY $xzr %11:gpr64 = CSELXr %8:gpr64, %10:gpr64, 0, implicit $nzcv `PeepholeOptimizer::foldRedundantCopy` led to the creation of select instructions where both inputs were the same register: %11:gpr64 = CSELXr %8:gpr64, %8:gpr64, 0, implicit $nzcv This change adds a later peephole optimization that replaces such selects with unconditional moves.
1 parent 69fe7da commit 29763aa

File tree

3 files changed

+31
-3
lines changed

3 files changed

+31
-3
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@
6161
// %6:fpr128 = IMPLICIT_DEF
6262
// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
6363
//
64+
// 8. Remove redundant CSELs that select between identical registers, by
65+
// replacing them with unconditional moves.
66+
//
6467
//===----------------------------------------------------------------------===//
6568

6669
#include "AArch64ExpandImm.h"
@@ -124,6 +127,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
124127
template <typename T>
125128
bool visitAND(unsigned Opc, MachineInstr &MI);
126129
bool visitORR(MachineInstr &MI);
130+
bool visitCSEL(MachineInstr &MI);
127131
bool visitINSERT(MachineInstr &MI);
128132
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
129133
bool visitINSvi64lane(MachineInstr &MI);
@@ -283,6 +287,26 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
283287
return true;
284288
}
285289

290+
bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
291+
// Replace CSEL with MOV when both inputs are the same register.
292+
if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
293+
return false;
294+
295+
auto ZeroReg =
296+
MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
297+
auto OrOpcode =
298+
MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
299+
300+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
301+
.addReg(MI.getOperand(0).getReg(), RegState::Define)
302+
.addReg(ZeroReg)
303+
.addReg(MI.getOperand(1).getReg())
304+
.addImm(0);
305+
306+
MI.eraseFromParent();
307+
return true;
308+
}
309+
286310
bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
287311
// Check this INSERT_SUBREG comes from below zero-extend pattern.
288312
//
@@ -788,6 +812,10 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
788812
visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
789813
{AArch64::ADDXri, AArch64::ADDSXri}, MI);
790814
break;
815+
case AArch64::CSELWr:
816+
case AArch64::CSELXr:
817+
Changed |= visitCSEL(MI);
818+
break;
791819
case AArch64::INSvi64gpr:
792820
Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
793821
break;

llvm/test/CodeGen/AArch64/peephole-csel.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
66
; CHECK: // %bb.0: // %entry
77
; CHECK-NEXT: tst w2, #0x1
88
; CHECK-NEXT: mov w8, #1 // =0x1
9-
; CHECK-NEXT: csel x9, xzr, xzr, eq
9+
; CHECK-NEXT: mov x9, xzr
1010
; CHECK-NEXT: tst w1, #0x1
1111
; CHECK-NEXT: csel x8, x8, x9, eq
1212
; CHECK-NEXT: str x8, [x0]

llvm/test/CodeGen/AArch64/peephole-csel.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ body: |
1919
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
2020
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
2121
; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv
22-
; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY1]], [[COPY1]], 0, implicit $nzcv
22+
; CHECK-NEXT: [[ORRXrs:%[0-9]+]]:gpr64 = ORRXrs $xzr, [[COPY1]], 0
2323
; CHECK-NEXT: RET_ReallyLR
2424
%3:gpr64 = COPY $x1
2525
%4:gpr64 = COPY $x0
@@ -46,7 +46,7 @@ body: |
4646
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
4747
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
4848
; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
49-
; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY1]], [[COPY1]], 0, implicit $nzcv
49+
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY1]], 0
5050
; CHECK-NEXT: RET_ReallyLR
5151
%3:gpr32 = COPY $w1
5252
%4:gpr32 = COPY $w0

0 commit comments

Comments
 (0)