[InstCombine] Enable fold select into operand for FAdd, FMul, FSub and FDiv.

huihzhang · huihzhang · commit 9cd7c534e27c · 2021-11-22T15:10:10.000-08:00
For FAdd, FMul, FSub and FDiv, fold select into one of the operands to enable further optimizations, i.e., floating-point reduction detection. Turn code: %C = fadd %A, %B %D = select %cond, %C, %A into: %C = select %cond, %B, -0.000000e+00 %D = fadd %A, %C Alive2 verification (with --disable-undef-input), timed out otherwise. FAdd - https://alive2.llvm.org/ce/z/eUxN4Y FMul - https://alive2.llvm.org/ce/z/5SWZz4 FSub - https://alive2.llvm.org/ce/z/Dhj8dU FDiv - https://alive2.llvm.org/ce/z/Yj_NA2 Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D113442
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -246,12 +246,16 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
 static unsigned getSelectFoldableOperands(BinaryOperator *I) {
   switch (I->getOpcode()) {
   case Instruction::Add:
+  case Instruction::FAdd:
   case Instruction::Mul:
+  case Instruction::FMul:
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
     return 3;              // Can fold through either operand.
   case Instruction::Sub:   // Can only fold on the amount subtracted.
+  case Instruction::FSub:
+  case Instruction::FDiv:  // Can only fold on the divisor amount.
   case Instruction::Shl:   // Can only fold on the shift amount.
   case Instruction::LShr:
   case Instruction::AShr:
diff --git a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
@@ -3,8 +3,8 @@
 
 define float @select_fadd(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fadd(
-; CHECK-NEXT:    [[C:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
+; CHECK-NEXT:    [[D:%.*]] = fadd float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd float %A, %B
@@ -14,8 +14,8 @@ define float @select_fadd(i1 %cond, float %A, float %B) {
 
 define float @select_fadd_swapped(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fadd_swapped(
-; CHECK-NEXT:    [[C:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fadd float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd float %A, %B
@@ -25,8 +25,8 @@ define float @select_fadd_swapped(i1 %cond, float %A, float %B) {
 
 define float @select_fadd_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fadd_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = fadd fast float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
+; CHECK-NEXT:    [[D:%.*]] = fadd fast float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd fast float %A, %B
@@ -36,8 +36,8 @@ define float @select_fadd_fast_math(i1 %cond, float %A, float %B) {
 
 define float @select_fadd_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fadd_swapped_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = fadd fast float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fadd fast float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd fast float %A, %B
@@ -47,8 +47,8 @@ define float @select_fadd_swapped_fast_math(i1 %cond, float %A, float %B) {
 
 define float @select_fmul(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fmul(
-; CHECK-NEXT:    [[C:%.*]] = fmul float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[D:%.*]] = fmul float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul float %A, %B
@@ -58,8 +58,8 @@ define float @select_fmul(i1 %cond, float %A, float %B) {
 
 define float @select_fmul_swapped(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fmul_swapped(
-; CHECK-NEXT:    [[C:%.*]] = fmul float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fmul float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul float %A, %B
@@ -69,8 +69,8 @@ define float @select_fmul_swapped(i1 %cond, float %A, float %B) {
 
 define float @select_fmul_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fmul_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[D:%.*]] = fmul fast float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul fast float %A, %B
@@ -80,8 +80,8 @@ define float @select_fmul_fast_math(i1 %cond, float %A, float %B) {
 
 define float @select_fmul_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fmul_swapped_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fmul fast float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul fast float %A, %B
@@ -91,8 +91,8 @@ define float @select_fmul_swapped_fast_math(i1 %cond, float %A, float %B) {
 
 define float @select_fsub(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fsub(
-; CHECK-NEXT:    [[C:%.*]] = fsub float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00
+; CHECK-NEXT:    [[D:%.*]] = fsub float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub float %A, %B
@@ -102,8 +102,8 @@ define float @select_fsub(i1 %cond, float %A, float %B) {
 
 define float @select_fsub_swapped(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fsub_swapped(
-; CHECK-NEXT:    [[C:%.*]] = fsub float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fsub float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub float %A, %B
@@ -113,8 +113,8 @@ define float @select_fsub_swapped(i1 %cond, float %A, float %B) {
 
 define float @select_fsub_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fsub_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = fsub fast float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00
+; CHECK-NEXT:    [[D:%.*]] = fsub fast float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub fast float %A, %B
@@ -124,8 +124,8 @@ define float @select_fsub_fast_math(i1 %cond, float %A, float %B) {
 
 define float @select_fsub_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fsub_swapped_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = fsub fast float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fsub fast float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub fast float %A, %B
@@ -147,8 +147,8 @@ define float @select_fsub_invalid(i1 %cond, float %A, float %B) {
 
 define float @select_fdiv(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fdiv(
-; CHECK-NEXT:    [[C:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[D:%.*]] = fdiv float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv float %A, %B
@@ -158,8 +158,8 @@ define float @select_fdiv(i1 %cond, float %A, float %B) {
 
 define float @select_fdiv_swapped(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fdiv_swapped(
-; CHECK-NEXT:    [[C:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fdiv float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv float %A, %B
@@ -169,8 +169,8 @@ define float @select_fdiv_swapped(i1 %cond, float %A, float %B) {
 
 define float @select_fdiv_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fdiv_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = fdiv fast float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[D:%.*]] = fdiv fast float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv fast float %A, %B
@@ -180,8 +180,8 @@ define float @select_fdiv_fast_math(i1 %cond, float %A, float %B) {
 
 define float @select_fdiv_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_fdiv_swapped_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = fdiv fast float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[A]], float [[C]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fdiv fast float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv fast float %A, %B
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
@@ -43,8 +43,8 @@ define float @test(float* nocapture readonly %pA, float* nocapture readonly %pB,
 ; CHECK-NEXT:    [[TMP8:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = fdiv fast <4 x float> [[TMP9]], [[TMP7]]
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast <4 x float> [[TMP10]], [[VEC_PHI]]
-; CHECK-NEXT:    [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP11]]
+; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[TMP10]]
+; CHECK-NEXT:    [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP11]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -959,7 +959,7 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
@@ -1019,7 +1019,7 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP40]])
 ; CHECK-NEXT:    [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer
-; CHECK-NEXT:    [[TMP43:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]])
+; CHECK-NEXT:    [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
@@ -1366,9 +1366,9 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP11]]
-; CHECK-NEXT:    [[PREDPHI3]] = select <4 x i1> [[TMP13]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
+; CHECK-NEXT:    [[PREDPHI2:%.*]] = select <4 x i1> [[TMP13]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[PREDPHI_V]]
+; CHECK-NEXT:    [[PREDPHI3]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI2]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll