Skip to content
This repository was archived by the owner on Mar 22, 2019. It is now read-only.

Commit 7e06b42

Browse files
committed
[InstCombine] reduce even more unsigned saturated add with 'not' op
We want to use the sum in the icmp to allow matching with m_UAddWithOverflow and eliminate the 'not'. This is discussed in D51929 and is another step towards solving PR14613: https://bugs.llvm.org/show_bug.cgi?id=14613 Name: uaddsat, -1 fval %notx = xor i32 %x, -1 %a = add i32 %x, %y %c = icmp ugt i32 %notx, %y %r = select i1 %c, i32 %a, i32 -1 => %a = add i32 %x, %y %c2 = icmp ugt i32 %y, %a %r = select i1 %c2, i32 -1, i32 %a Name: uaddsat, -1 fval + ult %notx = xor i32 %x, -1 %a = add i32 %x, %y %c = icmp ult i32 %y, %notx %r = select i1 %c, i32 %a, i32 -1 => %a = add i32 %x, %y %c2 = icmp ugt i32 %y, %a %r = select i1 %c2, i32 -1, i32 %a https://rise4fun.com/Alive/nTp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354276 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6280180 commit 7e06b42

File tree

2 files changed

+42
-35
lines changed

2 files changed

+42
-35
lines changed

lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -680,42 +680,53 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
680680
if (!Cmp->hasOneUse())
681681
return nullptr;
682682

683-
// Canonicalize to 'ULT' to simplify matching below.
684683
Value *Cmp0 = Cmp->getOperand(0);
685684
Value *Cmp1 = Cmp->getOperand(1);
685+
686+
// Match unsigned saturated add with constant.
687+
Value *X;
688+
const APInt *C, *CmpC;
689+
if (match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
690+
match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
691+
// Commute compare predicate and select operands. The backend is expecting
692+
// this form (-1 is true value). If this changes, the backend must be
693+
// updated too:
694+
// (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C)
695+
Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1);
696+
return Builder.CreateSelect(NewCmp, FVal, TVal);
697+
}
698+
699+
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
700+
// There are 8 commuted variants.
701+
// Canonicalize -1 (saturated result) to true value of the select.
702+
if (match(FVal, m_AllOnes())) {
703+
std::swap(TVal, FVal);
704+
std::swap(Cmp0, Cmp1);
705+
}
706+
if (!match(TVal, m_AllOnes()))
707+
return nullptr;
708+
709+
// Canonicalize predicate to 'ULT'.
686710
ICmpInst::Predicate Pred = Cmp->getPredicate();
687711
if (Pred == ICmpInst::ICMP_UGT) {
688712
Pred = ICmpInst::ICMP_ULT;
689713
std::swap(Cmp0, Cmp1);
690714
}
691-
692715
if (Pred != ICmpInst::ICMP_ULT)
693716
return nullptr;
694717

695-
// Match unsigned saturated add of 2 variables with an unnecessary 'not'.
696-
// TODO: There are more variations of this pattern.
697-
Value *X, *Y;
698-
if (match(TVal, m_AllOnes()) && match(Cmp0, m_Not(m_Value(X))) &&
718+
Value *Y;
719+
if (match(Cmp0, m_Not(m_Value(X))) &&
699720
match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
700721
// Change the comparison to use the sum (false value of the select). That is
701-
// the canonical pattern match form for uadd.with.overflow and eliminates a
722+
// a canonical pattern match form for uadd.with.overflow and eliminates a
702723
// use of the 'not' op:
703724
// (~X u< Y) ? -1 : (X + Y) --> ((X + Y) u< Y) ? -1 : (X + Y)
704725
// (~X u< Y) ? -1 : (Y + X) --> ((Y + X) u< Y) ? -1 : (Y + X)
705726
Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y);
706727
return Builder.CreateSelect(NewCmp, TVal, FVal);
707728
}
708729

709-
// Match unsigned saturated add with constant.
710-
const APInt *C, *CmpC;
711-
if (match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
712-
match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
713-
// Commute compare predicate and select operands:
714-
// (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C)
715-
Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1);
716-
return Builder.CreateSelect(NewCmp, FVal, TVal);
717-
}
718-
719730
return nullptr;
720731
}
721732

test/Transforms/InstCombine/saturating-add-sub.ll

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -706,11 +706,10 @@ define <2 x i32> @uadd_sat_ugt_commute_add(<2 x i32> %xp, <2 x i32> %yp) {
706706
define i32 @uadd_sat_commute_select(i32 %x, i32 %yp) {
707707
; CHECK-LABEL: @uadd_sat_commute_select(
708708
; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
709-
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
710-
; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X]]
711-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[Y]], [[NOTX]]
712-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
713-
; CHECK-NEXT: ret i32 [[R]]
709+
; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
710+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
711+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
712+
; CHECK-NEXT: ret i32 [[TMP2]]
714713
;
715714
%y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
716715
%notx = xor i32 %x, -1
@@ -724,11 +723,10 @@ define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {
724723
; CHECK-LABEL: @uadd_sat_commute_select_commute_add(
725724
; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[XP:%.*]]
726725
; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
727-
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1
728726
; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X]], [[Y]]
729-
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[Y]], [[NOTX]]
730-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
731-
; CHECK-NEXT: ret i32 [[R]]
727+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
728+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
729+
; CHECK-NEXT: ret i32 [[TMP2]]
732730
;
733731
%x = urem i32 42, %xp ; thwart complexity-based-canonicalization
734732
%y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
@@ -741,11 +739,10 @@ define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {
741739

742740
define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
743741
; CHECK-LABEL: @uadd_sat_commute_select_ugt(
744-
; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
745-
; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X]]
746-
; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[NOTX]], [[Y]]
747-
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A]], <2 x i32> <i32 -1, i32 -1>
748-
; CHECK-NEXT: ret <2 x i32> [[R]]
742+
; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
743+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
744+
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
745+
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
749746
;
750747
%notx = xor <2 x i32> %x, <i32 -1, i32 -1>
751748
%a = add <2 x i32> %y, %x
@@ -757,11 +754,10 @@ define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
757754
define i32 @uadd_sat_commute_select_ugt_commute_add(i32 %xp, i32 %y) {
758755
; CHECK-LABEL: @uadd_sat_commute_select_ugt_commute_add(
759756
; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[XP:%.*]]
760-
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1
761757
; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[Y:%.*]]
762-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTX]], [[Y]]
763-
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
764-
; CHECK-NEXT: ret i32 [[R]]
758+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
759+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
760+
; CHECK-NEXT: ret i32 [[TMP2]]
765761
;
766762
%x = srem i32 42, %xp ; thwart complexity-based-canonicalization
767763
%notx = xor i32 %x, -1

0 commit comments

Comments
 (0)