Skip to content

[AArch64] Fold swapped sub/SUBS conditions #121412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25026,6 +25026,30 @@ static SDValue performCSELCombine(SDNode *N,
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
return Folded;

// CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
// if SUB(y, x) already exists and we can produce a swapped predicate for cc.
SDValue Cond = N->getOperand(3);
if (DCI.isAfterLegalizeDAG() && Cond.getOpcode() == AArch64ISD::SUBS &&
Cond.hasOneUse() && Cond->hasNUsesOfValue(0, 0) &&
DAG.doesNodeExist(ISD::SUB, N->getVTList(),
{Cond.getOperand(1), Cond.getOperand(0)}) &&
!DAG.doesNodeExist(ISD::SUB, N->getVTList(),
{Cond.getOperand(0), Cond.getOperand(1)}) &&
!isNullConstant(Cond.getOperand(1))) {
AArch64CC::CondCode OldCond =
static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
AArch64CC::CondCode NewCond = getSwappedCondition(OldCond);
if (NewCond != AArch64CC::AL) {
SDLoc DL(N);
SDValue Sub = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
Cond.getOperand(1), Cond.getOperand(0));
return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0),
N->getOperand(1),
DAG.getConstant(NewCond, DL, MVT::i32),
Sub.getValue(1));
}
}

return performCONDCombine(N, DCI, DAG, 2, 3);
}

Expand Down
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,36 @@ inline static CondCode getInvertedCondCode(CondCode Code) {
return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1);
}

/// getSwappedCondition - assume the flags are set by MI(a,b), return
/// the condition code if we modify the instructions such that flags are
/// set by MI(b,a).
inline static CondCode getSwappedCondition(CondCode CC) {
switch (CC) {
default:
return AL;
case EQ:
return EQ;
case NE:
return NE;
case HS:
return LS;
case LO:
return HI;
case HI:
return LO;
case LS:
return HS;
case GE:
return LE;
case LT:
return GT;
case GT:
return LT;
case LE:
return GE;
}
}

/// Given a condition code, return NZCV flags that would satisfy that condition.
/// The flag bits are in the format expected by the ccmp instructions.
/// Note that many different flag settings can satisfy a given condition code,
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AArch64/adds_cmn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,8 @@ entry:
define { i32, i32 } @subs_cmp_c(i32 noundef %x, i32 noundef %y) {
; CHECK-LABEL: subs_cmp_c:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: sub w1, w1, w0
; CHECK-NEXT: cset w8, hs
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: subs w1, w1, w0
; CHECK-NEXT: cset w0, ls
; CHECK-NEXT: ret
entry:
%0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
Expand Down
52 changes: 20 additions & 32 deletions llvm/test/CodeGen/AArch64/csel-subs-swapped.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ define i32 @eq_i32(i32 %x) {
; CHECK-LABEL: eq_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
; CHECK-NEXT: sub w8, w8, w0
; CHECK-NEXT: subs w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, eq
; CHECK-NEXT: ret
%cmp = icmp eq i32 %x, -2097152
Expand All @@ -19,8 +18,7 @@ define i32 @ne_i32(i32 %x) {
; CHECK-LABEL: ne_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
; CHECK-NEXT: sub w8, w8, w0
; CHECK-NEXT: subs w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, ne
; CHECK-NEXT: ret
%cmp = icmp ne i32 %x, -2097152
Expand All @@ -33,9 +31,8 @@ define i32 @sgt_i32(i32 %x) {
; CHECK-LABEL: sgt_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
; CHECK-NEXT: sub w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, gt
; CHECK-NEXT: subs w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, lt
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %x, -2097152
%sub = sub i32 -2097152, %x
Expand All @@ -62,9 +59,8 @@ define i32 @slt_i32(i32 %x) {
; CHECK-LABEL: slt_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
; CHECK-NEXT: sub w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, lt
; CHECK-NEXT: subs w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, gt
; CHECK-NEXT: ret
%cmp = icmp slt i32 %x, -2097152
%sub = sub i32 -2097152, %x
Expand All @@ -91,9 +87,8 @@ define i32 @ugt_i32(i32 %x) {
; CHECK-LABEL: ugt_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
; CHECK-NEXT: sub w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, hi
; CHECK-NEXT: subs w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, lo
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %x, -2097152
%sub = sub i32 -2097152, %x
Expand All @@ -120,9 +115,8 @@ define i32 @ult_i32(i32 %x) {
; CHECK-LABEL: ult_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
; CHECK-NEXT: sub w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, lo
; CHECK-NEXT: subs w8, w8, w0
; CHECK-NEXT: csel w0, w0, w8, hi
; CHECK-NEXT: ret
%cmp = icmp ult i32 %x, -2097152
%sub = sub i32 -2097152, %x
Expand Down Expand Up @@ -150,8 +144,7 @@ define i64 @eq_i64(i64 %x) {
; CHECK-LABEL: eq_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #100 // =0x64
; CHECK-NEXT: cmp x0, #100
; CHECK-NEXT: sub x8, x8, x0
; CHECK-NEXT: subs x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, eq
; CHECK-NEXT: ret
%cmp = icmp eq i64 %x, 100
Expand All @@ -164,8 +157,7 @@ define i64 @ne_i64(i64 %x) {
; CHECK-LABEL: ne_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #100 // =0x64
; CHECK-NEXT: cmp x0, #100
; CHECK-NEXT: sub x8, x8, x0
; CHECK-NEXT: subs x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, ne
; CHECK-NEXT: ret
%cmp = icmp ne i64 %x, 100
Expand All @@ -178,9 +170,8 @@ define i64 @sgt_i64(i64 %x) {
; CHECK-LABEL: sgt_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #100 // =0x64
; CHECK-NEXT: cmp x0, #100
; CHECK-NEXT: sub x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, gt
; CHECK-NEXT: subs x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, lt
; CHECK-NEXT: ret
%cmp = icmp sgt i64 %x, 100
%sub = sub i64 100, %x
Expand All @@ -206,9 +197,8 @@ define i64 @slt_i64(i64 %x) {
; CHECK-LABEL: slt_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #100 // =0x64
; CHECK-NEXT: cmp x0, #100
; CHECK-NEXT: sub x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, lt
; CHECK-NEXT: subs x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, gt
; CHECK-NEXT: ret
%cmp = icmp slt i64 %x, 100
%sub = sub i64 100, %x
Expand All @@ -234,9 +224,8 @@ define i64 @ugt_i64(i64 %x) {
; CHECK-LABEL: ugt_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #100 // =0x64
; CHECK-NEXT: cmp x0, #100
; CHECK-NEXT: sub x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, hi
; CHECK-NEXT: subs x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, lo
; CHECK-NEXT: ret
%cmp = icmp ugt i64 %x, 100
%sub = sub i64 100, %x
Expand All @@ -262,9 +251,8 @@ define i64 @ult_i64(i64 %x) {
; CHECK-LABEL: ult_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #100 // =0x64
; CHECK-NEXT: cmp x0, #100
; CHECK-NEXT: sub x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, lo
; CHECK-NEXT: subs x8, x8, x0
; CHECK-NEXT: csel x0, x0, x8, hi
; CHECK-NEXT: ret
%cmp = icmp ult i64 %x, 100
%sub = sub i64 100, %x
Expand Down
Loading