Skip to content

Commit 8e50ec5

Browse files
committed
!fixup also rewrite SCEVUMaxExpr, use getConstantMultiple.
1 parent 23d922e commit 8e50ec5

File tree

2 files changed

+50
-17
lines changed

2 files changed

+50
-17
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16002,16 +16002,16 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
1600216002

1600316003
// Try to strengthen divisibility of SMax/UMax expressions coming from >=
1600416004
// 1 conditions.
16005-
if (auto *SMax = dyn_cast<SCEVSMaxExpr>(Rewritten)) {
16006-
unsigned MinTrailingZeros = SE.getMinTrailingZeros(SMax->getOperand(1));
16007-
for (const SCEV *Op : drop_begin(SMax->operands(), 2))
16008-
MinTrailingZeros =
16009-
std::min(MinTrailingZeros, SE.getMinTrailingZeros(Op));
16010-
if (MinTrailingZeros != 0)
16011-
Rewritten = SE.getSMaxExpr(
16012-
SE.getConstant(APInt(SMax->getType()->getScalarSizeInBits(), 1)
16013-
.shl(MinTrailingZeros)),
16014-
SMax);
16005+
auto *Max = dyn_cast<SCEVMinMaxExpr>(Rewritten);
16006+
if (Max && isa<SCEVSMaxExpr, SCEVUMaxExpr>(Rewritten) &&
16007+
Rewritten->getType()->isIntegerTy() && Max->getOperand(0)->isOne()) {
16008+
APInt CommonMultiple = SE.getConstantMultiple(Max->getOperand(1));
16009+
for (const SCEV *Op : drop_begin(Max->operands(), 2)) {
16010+
CommonMultiple = APIntOps::GreatestCommonDivisor(
16011+
CommonMultiple, SE.getConstantMultiple(Op));
16012+
}
16013+
SmallVector<const SCEV *> Ops = {SE.getConstant(CommonMultiple), Max};
16014+
Rewritten = SE.getMinMaxExpr(Max->getSCEVType(), Ops);
1601516015
}
1601616016
Guards.RewriteMap.insert({Expr, Rewritten});
1601716017
}

llvm/test/Transforms/LoopVectorize/single_early_exit.ll

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -546,19 +546,50 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe
546546
; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_2]])
547547
; CHECK-NEXT: [[N:%.*]] = add i32 [[SEL]], -1
548548
; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
549+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SEL]], -2
550+
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
551+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 2
552+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
553+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
554+
; CHECK: vector.ph:
555+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
556+
; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
549557
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
558+
; CHECK: vector.body:
559+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ]
560+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
561+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
562+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
563+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
564+
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
565+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
566+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]]
567+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
568+
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
569+
; CHECK: middle.split:
570+
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH:%.*]]
571+
; CHECK: middle.block:
572+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]]
573+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
574+
; CHECK: vector.early.exit:
575+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
576+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]]
577+
; CHECK-NEXT: br label [[EXIT_LOOPEXIT]]
578+
; CHECK: scalar.ph:
579+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ 0, [[PH]] ]
580+
; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]]
550581
; CHECK: loop.header:
551-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[PH]] ]
552-
; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]]
582+
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[LOOP_LATCH1:%.*]] ], [ [[IV]], [[SCALAR_PH]] ]
583+
; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]]
553584
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I]], align 1
554585
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0
555-
; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]]
586+
; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH1]]
556587
; CHECK: loop.latch:
557-
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
558-
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]]
559-
; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]]
588+
; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1
589+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], [[N_EXT]]
590+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP12:![0-9]+]]
560591
; CHECK: exit.loopexit:
561-
; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV]], [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ]
592+
; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV1]], [[LOOP_HEADER1]] ], [ 0, [[LOOP_LATCH1]] ], [ 0, [[LOOP_LATCH]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ]
562593
; CHECK-NEXT: br label [[EXIT]]
563594
; CHECK: exit:
564595
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ -2, [[THEN]] ], [ [[RES_PH]], [[EXIT_LOOPEXIT]] ]
@@ -609,4 +640,6 @@ exit:
609640
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
610641
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
611642
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]}
643+
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
644+
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
612645
;.

0 commit comments

Comments
 (0)