Skip to content
This repository was archived by the owner on Jan 7, 2023. It is now read-only.

Commit fd6a6c9

Browse files
aus-intelgfxbot
authored andcommitted
Add patch to prohibit use of SCEV expander in LFTR if possible
Change-Id: Ib9286c8ac98497c89311b70da8404753473922f1
1 parent 8af4449 commit fd6a6c9

File tree

3 files changed

+340
-0
lines changed

3 files changed

+340
-0
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
From cbf2de408fa9a89ee446d0159ecd8bb81340f0b4 Mon Sep 17 00:00:00 2001
2+
From: Aleksander Us <aleksander.us@intel.com>
3+
Date: Mon, 26 Aug 2019 15:45:47 +0300
4+
Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in
5+
LFTR when possible.
6+
7+
SCEV analysis cannot properly cache instruction with poison flags
8+
(for example, add nsw outside of loop will not be reused by expander).
9+
This can lead to generating of additional instructions by SCEV expander.
10+
11+
Example IR:
12+
13+
...
14+
%maxval = add nuw nsw i32 %a1, %a2
15+
...
16+
for.body:
17+
...
18+
%cmp22 = icmp ult i32 %ivadd, %maxval
19+
br i1 %cmp22, label %for.body, label %for.end
20+
...
21+
22+
SCEV expander will generate copy of %maxval in preheader but without
23+
nuw/nsw flags. This can be avoided by explicit check that iv count
24+
value gives the same SCEV expressions as calculated by LFTR.
25+
---
26+
lib/Transforms/Scalar/IndVarSimplify.cpp | 13 ++++++++++++-
27+
test/Transforms/IndVarSimplify/add_nsw.ll | 23 +++++++++++++++++++++++
28+
test/Transforms/IndVarSimplify/udiv.ll | 1 +
29+
3 files changed, 36 insertions(+), 1 deletion(-)
30+
create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll
31+
32+
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
33+
index 8656e88b79c..1744b424722 100644
34+
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
35+
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
36+
@@ -2174,8 +2174,19 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
37+
38+
IVLimit = SE->getAddExpr(IVInit, IVCount);
39+
}
40+
- // Expand the code for the iteration count.
41+
+
42+
+ // If computed limit is equal to old limit then do not use SCEV expander
43+
+ // because it can lost NUW/NSW flags and create extra instructions.
44+
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
45+
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) {
46+
+ Value *Limit = Cmp->getOperand(0);
47+
+ if (!L->isLoopInvariant(Limit))
48+
+ Limit = Cmp->getOperand(1);
49+
+ if (SE->getSCEV(Limit) == IVLimit)
50+
+ return Limit;
51+
+ }
52+
+
53+
+ // Expand the code for the iteration count.
54+
IRBuilder<> Builder(BI);
55+
assert(SE->isLoopInvariant(IVLimit, L) &&
56+
"Computed iteration count is not loop invariant!");
57+
diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll
58+
new file mode 100644
59+
index 00000000000..abd1cbb6c51
60+
--- /dev/null
61+
+++ b/test/Transforms/IndVarSimplify/add_nsw.ll
62+
@@ -0,0 +1,23 @@
63+
+; RUN: opt -indvars -S %s | FileCheck %s
64+
+
65+
+target datalayout = "e-p:32:32-i64:64-n8:16:32"
66+
+
67+
+; CHECK: for.body.preheader:
68+
+; CHECK-NOT: add
69+
+; CHECK: for.body:
70+
+
71+
+define void @foo(i32 %a1, i32 %a2) {
72+
+entry:
73+
+ %maxval = add nuw nsw i32 %a1, %a2
74+
+ %cmp = icmp slt i32 %maxval, 1
75+
+ br i1 %cmp, label %for.end, label %for.body
76+
+
77+
+for.body: ; preds = %entry, %for.body
78+
+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ]
79+
+ %add31 = add nuw nsw i32 %j.02, 1
80+
+ %cmp22 = icmp slt i32 %add31, %maxval
81+
+ br i1 %cmp22, label %for.body, label %for.end
82+
+
83+
+for.end: ; preds = %for.body
84+
+ ret void
85+
+}
86+
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
87+
index b3f2c2a6a66..3530343ef4a 100644
88+
--- a/test/Transforms/IndVarSimplify/udiv.ll
89+
+++ b/test/Transforms/IndVarSimplify/udiv.ll
90+
@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
91+
; CHECK-LABEL: @foo(
92+
; CHECK: for.body.preheader:
93+
; CHECK-NOT: udiv
94+
+; CHECK: for.body:
95+
96+
define void @foo(double* %p, i64 %n) nounwind {
97+
entry:
98+
--
99+
2.18.0
100+
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
From a64f085d0f1ce0725d2ca896e32ad213515658a0 Mon Sep 17 00:00:00 2001
2+
From: Aleksander Us <aleksander.us@intel.com>
3+
Date: Mon, 26 Aug 2019 15:45:47 +0300
4+
Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in
5+
LFTR when possible.
6+
7+
SCEV analysis cannot properly cache instruction with poison flags
8+
(for example, add nsw outside of loop will not be reused by expander).
9+
This can lead to generating of additional instructions by SCEV expander.
10+
11+
Example IR:
12+
13+
...
14+
%maxval = add nuw nsw i32 %a1, %a2
15+
...
16+
for.body:
17+
...
18+
%cmp22 = icmp ult i32 %ivadd, %maxval
19+
br i1 %cmp22, label %for.body, label %for.end
20+
...
21+
22+
SCEV expander will generate copy of %maxval in preheader but without
23+
nuw/nsw flags. This can be avoided by explicit check that iv count
24+
value gives the same SCEV expressions as calculated by LFTR.
25+
---
26+
lib/Transforms/Scalar/IndVarSimplify.cpp | 13 ++++++++++++-
27+
test/Transforms/IndVarSimplify/add_nsw.ll | 23 +++++++++++++++++++++++
28+
test/Transforms/IndVarSimplify/udiv.ll | 1 +
29+
3 files changed, 36 insertions(+), 1 deletion(-)
30+
create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll
31+
32+
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
33+
index 48d8e457ba7..4c7b6b4bbf4 100644
34+
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
35+
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
36+
@@ -2318,8 +2318,19 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
37+
38+
IVLimit = SE->getAddExpr(IVInit, IVCount);
39+
}
40+
- // Expand the code for the iteration count.
41+
+
42+
+ // If computed limit is equal to old limit then do not use SCEV expander
43+
+ // because it can lost NUW/NSW flags and create extra instructions.
44+
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
45+
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) {
46+
+ Value *Limit = Cmp->getOperand(0);
47+
+ if (!L->isLoopInvariant(Limit))
48+
+ Limit = Cmp->getOperand(1);
49+
+ if (SE->getSCEV(Limit) == IVLimit)
50+
+ return Limit;
51+
+ }
52+
+
53+
+ // Expand the code for the iteration count.
54+
IRBuilder<> Builder(BI);
55+
assert(SE->isLoopInvariant(IVLimit, L) &&
56+
"Computed iteration count is not loop invariant!");
57+
diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll
58+
new file mode 100644
59+
index 00000000000..abd1cbb6c51
60+
--- /dev/null
61+
+++ b/test/Transforms/IndVarSimplify/add_nsw.ll
62+
@@ -0,0 +1,23 @@
63+
+; RUN: opt -indvars -S %s | FileCheck %s
64+
+
65+
+target datalayout = "e-p:32:32-i64:64-n8:16:32"
66+
+
67+
+; CHECK: for.body.preheader:
68+
+; CHECK-NOT: add
69+
+; CHECK: for.body:
70+
+
71+
+define void @foo(i32 %a1, i32 %a2) {
72+
+entry:
73+
+ %maxval = add nuw nsw i32 %a1, %a2
74+
+ %cmp = icmp slt i32 %maxval, 1
75+
+ br i1 %cmp, label %for.end, label %for.body
76+
+
77+
+for.body: ; preds = %entry, %for.body
78+
+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ]
79+
+ %add31 = add nuw nsw i32 %j.02, 1
80+
+ %cmp22 = icmp slt i32 %add31, %maxval
81+
+ br i1 %cmp22, label %for.body, label %for.end
82+
+
83+
+for.end: ; preds = %for.body
84+
+ ret void
85+
+}
86+
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
87+
index b3f2c2a6a66..3530343ef4a 100644
88+
--- a/test/Transforms/IndVarSimplify/udiv.ll
89+
+++ b/test/Transforms/IndVarSimplify/udiv.ll
90+
@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
91+
; CHECK-LABEL: @foo(
92+
; CHECK: for.body.preheader:
93+
; CHECK-NOT: udiv
94+
+; CHECK: for.body:
95+
96+
define void @foo(double* %p, i64 %n) nounwind {
97+
entry:
98+
--
99+
2.18.0
100+
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001
2+
From: Aleksander Us <aleksander.us@intel.com>
3+
Date: Mon, 26 Aug 2019 15:45:47 +0300
4+
Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in
5+
LFTR when possible.
6+
7+
SCEV analysis cannot properly cache instruction with poison flags
8+
(for example, add nsw outside of loop will not be reused by expander).
9+
This can lead to generating of additional instructions by SCEV expander.
10+
11+
Example IR:
12+
13+
...
14+
%maxval = add nuw nsw i32 %a1, %a2
15+
...
16+
for.body:
17+
...
18+
%cmp22 = icmp ult i32 %ivadd, %maxval
19+
br i1 %cmp22, label %for.body, label %for.end
20+
...
21+
22+
SCEV expander will generate copy of %maxval in preheader but without
23+
nuw/nsw flags. This can be avoided by explicit check that iv count
24+
value gives the same SCEV expressions as calculated by LFTR.
25+
---
26+
lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++-
27+
test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++
28+
test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++-----
29+
test/Transforms/IndVarSimplify/udiv.ll | 1 +
30+
4 files changed, 38 insertions(+), 7 deletions(-)
31+
create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll
32+
33+
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
34+
index f9fc698a4a9..5e04dac8aa6 100644
35+
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
36+
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
37+
@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
38+
if (UsePostInc)
39+
IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
40+
41+
+ // If computed limit is equal to old limit then do not use SCEV expander
42+
+ // because it can lost NUW/NSW flags and create extra instructions.
43+
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
44+
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) {
45+
+ Value *Limit = Cmp->getOperand(0);
46+
+ if (!L->isLoopInvariant(Limit))
47+
+ Limit = Cmp->getOperand(1);
48+
+ if (SE->getSCEV(Limit) == IVLimit)
49+
+ return Limit;
50+
+ }
51+
+
52+
// Expand the code for the iteration count.
53+
assert(SE->isLoopInvariant(IVLimit, L) &&
54+
"Computed iteration count is not loop invariant!");
55+
@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
56+
// SCEV expression (IVInit) for a pointer type IV value (IndVar).
57+
Type *LimitTy = ExitCount->getType()->isPointerTy() ?
58+
IndVar->getType() : ExitCount->getType();
59+
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
60+
return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
61+
}
62+
}
63+
diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll
64+
new file mode 100644
65+
index 00000000000..abd1cbb6c51
66+
--- /dev/null
67+
+++ b/test/Transforms/IndVarSimplify/add_nsw.ll
68+
@@ -0,0 +1,23 @@
69+
+; RUN: opt -indvars -S %s | FileCheck %s
70+
+
71+
+target datalayout = "e-p:32:32-i64:64-n8:16:32"
72+
+
73+
+; CHECK: for.body.preheader:
74+
+; CHECK-NOT: add
75+
+; CHECK: for.body:
76+
+
77+
+define void @foo(i32 %a1, i32 %a2) {
78+
+entry:
79+
+ %maxval = add nuw nsw i32 %a1, %a2
80+
+ %cmp = icmp slt i32 %maxval, 1
81+
+ br i1 %cmp, label %for.end, label %for.body
82+
+
83+
+for.body: ; preds = %entry, %for.body
84+
+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ]
85+
+ %add31 = add nuw nsw i32 %j.02, 1
86+
+ %cmp22 = icmp slt i32 %add31, %maxval
87+
+ br i1 %cmp22, label %for.body, label %for.end
88+
+
89+
+for.end: ; preds = %for.body
90+
+ ret void
91+
+}
92+
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
93+
index 14ae9738696..509d662b767 100644
94+
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
95+
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
96+
@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
97+
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
98+
; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
99+
; CHECK: outer.preheader:
100+
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1
101+
; CHECK-NEXT: br label [[OUTER:%.*]]
102+
; CHECK: outer:
103+
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ]
104+
-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ]
105+
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
106+
; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
107+
; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
108+
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
109+
@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
110+
; CHECK: inner:
111+
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ]
112+
; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
113+
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]]
114+
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]]
115+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
116+
; CHECK: outer.inc.loopexit:
117+
; CHECK-NEXT: br label [[OUTER_INC]]
118+
; CHECK: outer.inc:
119+
; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1
120+
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1
121+
-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]]
122+
+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]]
123+
; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
124+
; CHECK: exit.loopexit:
125+
; CHECK-NEXT: br label [[EXIT]]
126+
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
127+
index b3f2c2a6a66..3530343ef4a 100644
128+
--- a/test/Transforms/IndVarSimplify/udiv.ll
129+
+++ b/test/Transforms/IndVarSimplify/udiv.ll
130+
@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
131+
; CHECK-LABEL: @foo(
132+
; CHECK: for.body.preheader:
133+
; CHECK-NOT: udiv
134+
+; CHECK: for.body:
135+
136+
define void @foo(double* %p, i64 %n) nounwind {
137+
entry:
138+
--
139+
2.18.0
140+

0 commit comments

Comments
 (0)