[InstCombine] Detect uadd with overflow idiom

AZero13 · AZero13 · commit 29aed2ca2aa3 · 2025-05-17T02:45:41.000-04:00
Change processUMulZExtIdiom to also support adds, since the idiom is the same, except with add instead of mul. Alive2: https://alive2.llvm.org/ce/z/SsB4AK
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6515,72 +6515,75 @@ bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp,
   llvm_unreachable("Unexpected overflow result");
 }
 
-/// Recognize and process idiom involving test for multiplication
+/// Recognize and process idiom involving test for unsigned
 /// overflow.
 ///
 /// The caller has matched a pattern of the form:
+///   I = cmp u (add(zext A, zext B), V
 ///   I = cmp u (mul(zext A, zext B), V
 /// The function checks if this is a test for overflow and if so replaces
-/// multiplication with call to 'mul.with.overflow' intrinsic.
+/// addition with call to the right intrinsic.
 ///
 /// \param I Compare instruction.
-/// \param MulVal Result of 'mult' instruction.  It is one of the arguments of
+/// \param Val Result of instruction.  It is one of the arguments of
 ///               the compare instruction.  Must be of integer type.
 /// \param OtherVal The other argument of compare instruction.
 /// \returns Instruction which must replace the compare instruction, NULL if no
 ///          replacement required.
-static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
-                                         const APInt *OtherVal,
-                                         InstCombinerImpl &IC) {
+static Instruction *processUZExtIdiom(ICmpInst &I, Value *Val,
+                                      const APInt *OtherVal,
+                                      InstCombinerImpl &IC) {
   // Don't bother doing this transformation for pointers, don't do it for
   // vectors.
-  if (!isa<IntegerType>(MulVal->getType()))
+  if (!isa<IntegerType>(Val->getType()))
     return nullptr;
 
-  auto *MulInstr = dyn_cast<Instruction>(MulVal);
-  if (!MulInstr)
+  auto *Instr = dyn_cast<Instruction>(Val);
+  if (!Instr)
     return nullptr;
-  assert(MulInstr->getOpcode() == Instruction::Mul);
 
-  auto *LHS = cast<ZExtInst>(MulInstr->getOperand(0)),
-       *RHS = cast<ZExtInst>(MulInstr->getOperand(1));
+  unsigned Opcode = Instr->getOpcode();
+  assert(Opcode == Instruction::Add || Opcode == Instruction::Mul);
+
+  auto *LHS = cast<ZExtInst>(Instr->getOperand(0)),
+       *RHS = cast<ZExtInst>(Instr->getOperand(1));
   assert(LHS->getOpcode() == Instruction::ZExt);
   assert(RHS->getOpcode() == Instruction::ZExt);
   Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
 
-  // Calculate type and width of the result produced by mul.with.overflow.
+  // Calculate type and width of the result produced by add/mul.with.overflow.
   Type *TyA = A->getType(), *TyB = B->getType();
   unsigned WidthA = TyA->getPrimitiveSizeInBits(),
            WidthB = TyB->getPrimitiveSizeInBits();
-  unsigned MulWidth;
-  Type *MulType;
+  unsigned ResultWidth;
+  Type *ResultType;
   if (WidthB > WidthA) {
-    MulWidth = WidthB;
-    MulType = TyB;
+    ResultWidth = WidthB;
+    ResultType = TyB;
   } else {
-    MulWidth = WidthA;
-    MulType = TyA;
+    ResultWidth = WidthA;
+    ResultType = TyA;
   }
 
-  // In order to replace the original mul with a narrower mul.with.overflow,
-  // all uses must ignore upper bits of the product.  The number of used low
-  // bits must be not greater than the width of mul.with.overflow.
-  if (MulVal->hasNUsesOrMore(2))
-    for (User *U : MulVal->users()) {
+  // In order to replace the original result with an add/mul.with.overflow
+  // intrinsic, all uses must ignore upper bits of the result.  The number of
+  // used low bits must be not greater than the width of add/mul.with.overflow.
+  if (Val->hasNUsesOrMore(2))
+    for (User *U : Val->users()) {
       if (U == &I)
         continue;
       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
-        // Check if truncation ignores bits above MulWidth.
+        // Check if truncation ignores bits above ResultWidth.
         unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
-        if (TruncWidth > MulWidth)
+        if (TruncWidth > ResultWidth)
           return nullptr;
       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
-        // Check if AND ignores bits above MulWidth.
+        // Check if AND ignores bits above ResultWidth.
         if (BO->getOpcode() != Instruction::And)
           return nullptr;
         if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
           const APInt &CVal = CI->getValue();
-          if (CVal.getBitWidth() - CVal.countl_zero() > MulWidth)
+          if (CVal.getBitWidth() - CVal.countl_zero() > ResultWidth)
             return nullptr;
         } else {
           // In this case we could have the operand of the binary operation
@@ -6598,9 +6601,9 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
   switch (I.getPredicate()) {
   case ICmpInst::ICMP_UGT: {
     // Recognize pattern:
-    //   mulval = mul(zext A, zext B)
-    //   cmp ugt mulval, max
-    APInt MaxVal = APInt::getMaxValue(MulWidth);
+    //   val = add/mul(zext A, zext B)
+    //   cmp ugt val, max
+    APInt MaxVal = APInt::getMaxValue(ResultWidth);
     MaxVal = MaxVal.zext(OtherVal->getBitWidth());
     if (MaxVal.eq(*OtherVal))
       break; // Recognized
@@ -6609,9 +6612,9 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
 
   case ICmpInst::ICMP_ULT: {
     // Recognize pattern:
-    //   mulval = mul(zext A, zext B)
-    //   cmp ule mulval, max + 1
-    APInt MaxVal = APInt::getOneBitSet(OtherVal->getBitWidth(), MulWidth);
+    //   val = add/mul(zext A, zext B)
+    //   cmp ule val, max + 1
+    APInt MaxVal = APInt::getOneBitSet(OtherVal->getBitWidth(), ResultWidth);
     if (MaxVal.eq(*OtherVal))
       break; // Recognized
     return nullptr;
@@ -6622,38 +6625,42 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
   }
 
   InstCombiner::BuilderTy &Builder = IC.Builder;
-  Builder.SetInsertPoint(MulInstr);
-
-  // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
-  Value *MulA = A, *MulB = B;
-  if (WidthA < MulWidth)
-    MulA = Builder.CreateZExt(A, MulType);
-  if (WidthB < MulWidth)
-    MulB = Builder.CreateZExt(B, MulType);
-  CallInst *Call =
-      Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, MulType,
-                              {MulA, MulB}, /*FMFSource=*/nullptr, "umul");
-  IC.addToWorklist(MulInstr);
-
-  // If there are uses of mul result other than the comparison, we know that
+  Builder.SetInsertPoint(Instr);
+
+  // Replace: add/mul(zext A, zext B) --> add/mul.with.overflow(A, B)
+  Value *ResultA = A, *ResultB = B;
+  if (WidthA < ResultWidth)
+    ResultA = Builder.CreateZExt(A, ResultType);
+  if (WidthB < ResultWidth)
+    ResultB = Builder.CreateZExt(B, ResultType);
+  CallInst *Call = Builder.CreateIntrinsic(
+      Opcode == Instruction::Add ? Intrinsic::uadd_with_overflow
+                                 : Intrinsic::umul_with_overflow,
+      ResultType, {ResultA, ResultB}, /*FMFSource=*/nullptr,
+      Intrinsic::uadd_with_overflow ? "uadd" : "umul");
+  IC.addToWorklist(Instr);
+
+  // If there are uses of add result other than the comparison, we know that
   // they are truncation or binary AND. Change them to use result of
-  // mul.with.overflow and adjust properly mask/size.
-  if (MulVal->hasNUsesOrMore(2)) {
-    Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
-    for (User *U : make_early_inc_range(MulVal->users())) {
+  // add/mul.with.overflow and adjust properly mask/size.
+  if (Val->hasNUsesOrMore(2)) {
+    Value *Extract = Builder.CreateExtractValue(
+        Call, 0, Instruction::Add ? "uadd.value" : "umul.value");
+    for (User *U : make_early_inc_range(Val->users())) {
       if (U == &I)
         continue;
       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
-        if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
-          IC.replaceInstUsesWith(*TI, Mul);
+        if (TI->getType()->getPrimitiveSizeInBits() == ResultWidth)
+          IC.replaceInstUsesWith(*TI, Extract);
         else
-          TI->setOperand(0, Mul);
+          TI->setOperand(0, Extract);
       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
         assert(BO->getOpcode() == Instruction::And);
-        // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
+        // Replace (Extract & mask) --> zext (add/mul.with.overflow &
+        // short_mask)
         ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
-        APInt ShortMask = CI->getValue().trunc(MulWidth);
-        Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
+        APInt ShortMask = CI->getValue().trunc(ResultWidth);
+        Value *ShortAnd = Builder.CreateAnd(Extract, ShortMask);
         Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
         IC.replaceInstUsesWith(*BO, Zext);
       } else {
@@ -7078,7 +7085,7 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
         // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1
         // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1
         // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1
-        // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1
+        // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X != -1
         return CreateRangeCheck();
       }
     } else if (IsSExt ? C->isAllOnes() : C->isOne()) {
@@ -7791,10 +7798,12 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
       }
     }
 
+    // (zext X) + (zext Y)  --> llvm.uadd.with.overflow.
     // (zext X) * (zext Y)  --> llvm.umul.with.overflow.
-    if (match(Op0, m_NUWMul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
+    if ((match(Op0, m_NUWAdd(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) ||
+         match(Op0, m_NUWMul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))))) &&
         match(Op1, m_APInt(C))) {
-      if (Instruction *R = processUMulZExtIdiom(I, Op0, C, *this))
+      if (Instruction *R = processUZExtIdiom(I, Op0, C, *this))
         return R;
     }
 
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -2352,11 +2352,7 @@ define i8 @fold_add_umax_to_usub_multiuse(i8 %a) {
 
 define i32 @uadd_with_zext(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_with_zext(
-; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[X:%.*]] to i64
-; CHECK-NEXT:    [[CONV1:%.*]] = zext i32 [[Y:%.*]] to i64
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT:    [[COND1:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
-; CHECK-NEXT:    [[COND:%.*]] = trunc nuw i64 [[COND1]] to i32
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %conv = zext i32 %x to i64
@@ -2370,13 +2366,9 @@ define i32 @uadd_with_zext(i32 %x, i32 %y) {
 
 define i32 @uadd_with_zext_multi_use(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_with_zext_multi_use(
-; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[X:%.*]] to i64
-; CHECK-NEXT:    [[CONV1:%.*]] = zext i32 [[Y:%.*]] to i64
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT:    [[TRUNCADD:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    [[TRUNCADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    call void @usei32(i32 [[TRUNCADD]])
-; CHECK-NEXT:    [[COND1:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
-; CHECK-NEXT:    [[COND:%.*]] = trunc nuw i64 [[COND1]] to i32
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y]])
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %conv = zext i32 %x to i64
diff --git a/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll
@@ -150,10 +150,8 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant_with_or_fail(<2 x i32
 
 define i32 @uadd_with_zext(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_with_zext(
-; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[X:%.*]] to i64
-; CHECK-NEXT:    [[CONV1:%.*]] = zext i32 [[Y:%.*]] to i64
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign ugt i64 [[ADD]], 4294967295
+; CHECK-NEXT:    [[UADD:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = extractvalue { i32, i1 } [[UADD]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
@@ -167,10 +165,9 @@ define i32 @uadd_with_zext(i32 %x, i32 %y) {
 
 define i32 @uadd_with_zext_inverse(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_with_zext_inverse(
-; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[X:%.*]] to i64
-; CHECK-NEXT:    [[CONV1:%.*]] = zext i32 [[Y:%.*]] to i64
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign ult i64 [[ADD]], 4294967296
+; CHECK-NEXT:    [[UADD:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i1 } [[UADD]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = xor i1 [[TMP1]], true
 ; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[CMP]] to i32
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;