Skip to content

Commit 8d4a5e6

Browse files
committed
[SCEV] Rewrite more SCEVAddExpr when applying guards.
When re-writing SCEVAddExprs to apply information from guards, check if we have information for the expression itself. If so, apply it. When we have an expression of the form (Const + A), check if we have have guard info for (Const + 1 + A) and use it. This is needed to avoid regressions in a few cases, where we have BTCs with a subtracted constant. Rewriting expressions could cause regressions, e.g. when comparing 2 SCEV expressions where we are only able to rewrite one side, but I could not find any cases where this happens more with this patch in practice. Depends on llvm#160012 (included in PR) Proofs for some of the test changes: https://alive2.llvm.org/ce/z/RPX6t_
1 parent 6674962 commit 8d4a5e6

File tree

7 files changed

+41
-32
lines changed

7 files changed

+41
-32
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16079,16 +16079,32 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
1607916079
}
1608016080

1608116081
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
16082-
// Trip count expressions sometimes consist of adding 3 operands, i.e.
16083-
// (Const + A + B). There may be guard info for A + B, and if so, apply
16084-
// it.
16085-
// TODO: Could more generally apply guards to Add sub-expressions.
16086-
if (isa<SCEVConstant>(Expr->getOperand(0)) &&
16087-
Expr->getNumOperands() == 3) {
16088-
if (const SCEV *S = Map.lookup(
16089-
SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
16090-
return SE.getAddExpr(Expr->getOperand(0), S);
16082+
if (const SCEV *S = Map.lookup(Expr))
16083+
return S;
16084+
if (isa<SCEVConstant>(Expr->getOperand(0))) {
16085+
// Trip count expressions sometimes consist of adding 3 operands, i.e.
16086+
// (Const + A + B). There may be guard info for A + B, and if so, apply
16087+
// it.
16088+
// TODO: Could more generally apply guards to Add sub-expressions.
16089+
if (Expr->getNumOperands() == 3) {
16090+
if (const SCEV *S = Map.lookup(
16091+
SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
16092+
return SE.getAddExpr(Expr->getOperand(0), S);
16093+
}
16094+
16095+
// For expressions of the form (Const + A), check if we have guard info
16096+
// for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes
16097+
// sure we don't loose information when rewriting expressions based on
16098+
// back-edge taken counts in some cases..
16099+
if (Expr->getNumOperands() == 2) {
16100+
auto *NewC =
16101+
SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
16102+
if (const SCEV *S =
16103+
Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1))))
16104+
return SE.getMinusSCEV(S, SE.getOne(Expr->getType()));
16105+
}
1609116106
}
16107+
1609216108
SmallVector<const SCEV *, 2> Operands;
1609316109
bool Changed = false;
1609416110
for (const auto *Op : Expr->operands()) {

llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ declare void @clobber()
3333
define void @test_add_sub_1_guard(ptr %src, i32 %n) {
3434
; CHECK-LABEL: 'test_add_sub_1_guard'
3535
; CHECK-NEXT: Determining loop execution counts for: @test_add_sub_1_guard
36-
; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
37-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
38-
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
36+
; CHECK-NEXT: Loop %loop: backedge-taken count is i64 0
37+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 0
38+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 0
3939
; CHECK-NEXT: Loop %loop: Trip multiple is 1
4040
;
4141
entry:

llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,12 @@ define void @umax(i32 noundef %a, i32 noundef %b) {
102102
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
103103
; CHECK-NEXT: --> ((2 * %a) umax (4 * %b)) U: [0,-1) S: [-2147483648,2147483647)
104104
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
105-
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
105+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
106106
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
107-
; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
107+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
108108
; CHECK-NEXT: Determining loop execution counts for: @umax
109109
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
110-
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
110+
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
111111
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
112112
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
113113
;
@@ -197,12 +197,12 @@ define void @smax(i32 noundef %a, i32 noundef %b) {
197197
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
198198
; CHECK-NEXT: --> ((2 * %a)<nsw> smax (4 * %b)<nsw>) U: [0,-1) S: [-2147483648,2147483647)
199199
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
200-
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
200+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
201201
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
202-
; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
202+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
203203
; CHECK-NEXT: Determining loop execution counts for: @smax
204204
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
205-
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
205+
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
206206
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
207207
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
208208
;

llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -343,14 +343,13 @@ define void @slt_no_smax_needed(i64 %n, ptr %dst) {
343343
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
344344
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
345345
; CHECK: loop.preheader:
346-
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1)
347346
; CHECK-NEXT: br label [[LOOP:%.*]]
348347
; CHECK: loop:
349348
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
350349
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
351350
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
352351
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
353-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
352+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
354353
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
355354
; CHECK: exit.loopexit:
356355
; CHECK-NEXT: br label [[EXIT]]
@@ -385,14 +384,13 @@ define void @ult_no_umax_needed(i64 %n, ptr %dst) {
385384
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
386385
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
387386
; CHECK: loop.preheader:
388-
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1)
389387
; CHECK-NEXT: br label [[LOOP:%.*]]
390388
; CHECK: loop:
391389
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
392390
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
393391
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
394392
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
395-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]]
393+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
396394
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
397395
; CHECK: exit.loopexit:
398396
; CHECK-NEXT: br label [[EXIT]]

llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ define void @test_memset_size_can_use_info_from_guards(i32 %x, ptr %dst) {
6161
; CHECK: [[LOOP1_BACKEDGE]]:
6262
; CHECK-NEXT: br label %[[LOOP1]]
6363
; CHECK: [[LOOP2_PREHEADER]]:
64-
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64
65-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
66-
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
64+
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SHR]], -1
65+
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
66+
; CHECK-NEXT: [[UMAX:%.*]] = add nuw nsw i64 [[TMP1]], 1
6767
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[UMAX]], i1 false)
6868
; CHECK-NEXT: br label %[[LOOP2:.*]]
6969
; CHECK: [[LOOP2]]:

llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
193193
; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1
194194
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
195195
; CHECK: pred.store.continue6:
196-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
196+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
197197
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
198198
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
199199
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]

llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -465,12 +465,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
465465
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
466466
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
467467
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
468-
; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
469-
; CHECK: [[VECTOR_MEMCHECK]]:
470-
; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
471-
; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
472-
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16
473-
; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]]
468+
; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]]
474469
;
475470
entry:
476471
%offset = sub i32 %x, %y

0 commit comments

Comments
 (0)