1
+ ; REQUIRES: asserts
2
+
3
+ ; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
4
+ ; RUN: -force-tail-folding-style=data-with-evl \
5
+ ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
6
+ ; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
7
+
8
+ define void @vp_smax (ptr noalias %a , ptr noalias %b , ptr noalias %c , i64 %N ) {
9
+ ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
10
+ ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
11
+ ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
12
+ ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
13
+
14
+ ; IF-EVL: vector.ph:
15
+ ; IF-EVL-NEXT: Successor(s): vector loop
16
+
17
+ ; IF-EVL: <x1> vector loop: {
18
+ ; IF-EVL-NEXT: vector.body:
19
+ ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
20
+ ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
21
+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
22
+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
23
+ ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
24
+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
25
+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
26
+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
27
+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
28
+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
29
+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
30
+ ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.smax(ir<[[LD1]]>, ir<[[LD2]]>)
31
+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
32
+ ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
33
+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
34
+ ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
35
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
36
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
37
+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
38
+ ; IF-EVL-NEXT: No successors
39
+ ; IF-EVL-NEXT: }
40
+
41
+ entry:
42
+ br label %for.body
43
+
44
+ for.body:
45
+ %indvars.iv = phi i64 [ %indvars.iv.next , %for.body ], [ 0 , %entry ]
46
+ %arrayidx = getelementptr inbounds i32 , ptr %b , i64 %indvars.iv
47
+ %0 = load i32 , ptr %arrayidx , align 4
48
+ %arrayidx3 = getelementptr inbounds i32 , ptr %c , i64 %indvars.iv
49
+ %1 = load i32 , ptr %arrayidx3 , align 4
50
+ %. = tail call i32 @llvm.smax.i32 (i32 %0 , i32 %1 )
51
+ %arrayidx11 = getelementptr inbounds i32 , ptr %a , i64 %indvars.iv
52
+ store i32 %. , ptr %arrayidx11 , align 4
53
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
54
+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
55
+ br i1 %exitcond.not , label %exit , label %for.body
56
+
57
+ exit:
58
+ ret void
59
+ }
60
+
61
+ define void @vp_smin (ptr noalias %a , ptr noalias %b , ptr noalias %c , i64 %N ) {
62
+ ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
63
+ ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
64
+ ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
65
+ ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
66
+
67
+ ; IF-EVL: vector.ph:
68
+ ; IF-EVL-NEXT: Successor(s): vector loop
69
+
70
+ ; IF-EVL: <x1> vector loop: {
71
+ ; IF-EVL-NEXT: vector.body:
72
+ ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
73
+ ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
74
+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
75
+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
76
+ ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
77
+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
78
+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
79
+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
80
+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
81
+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
82
+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
83
+ ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.smin(ir<[[LD1]]>, ir<[[LD2]]>)
84
+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
85
+ ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
86
+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
87
+ ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
88
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
89
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
90
+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
91
+ ; IF-EVL-NEXT: No successors
92
+ ; IF-EVL-NEXT: }
93
+
94
+ entry:
95
+ br label %for.body
96
+
97
+ for.body:
98
+ %indvars.iv = phi i64 [ %indvars.iv.next , %for.body ], [ 0 , %entry ]
99
+ %arrayidx = getelementptr inbounds i32 , ptr %b , i64 %indvars.iv
100
+ %0 = load i32 , ptr %arrayidx , align 4
101
+ %arrayidx3 = getelementptr inbounds i32 , ptr %c , i64 %indvars.iv
102
+ %1 = load i32 , ptr %arrayidx3 , align 4
103
+ %. = tail call i32 @llvm.smin.i32 (i32 %0 , i32 %1 )
104
+ %arrayidx11 = getelementptr inbounds i32 , ptr %a , i64 %indvars.iv
105
+ store i32 %. , ptr %arrayidx11 , align 4
106
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
107
+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
108
+ br i1 %exitcond.not , label %exit , label %for.body
109
+
110
+ exit:
111
+ ret void
112
+ }
113
+
114
+ define void @vp_umax (ptr noalias %a , ptr noalias %b , ptr noalias %c , i64 %N ) {
115
+ ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
116
+ ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
117
+ ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
118
+ ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
119
+
120
+ ; IF-EVL: vector.ph:
121
+ ; IF-EVL-NEXT: Successor(s): vector loop
122
+
123
+ ; IF-EVL: <x1> vector loop: {
124
+ ; IF-EVL-NEXT: vector.body:
125
+ ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
126
+ ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
127
+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
128
+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
129
+ ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
130
+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
131
+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
132
+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
133
+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
134
+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
135
+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
136
+ ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.umax(ir<[[LD1]]>, ir<[[LD2]]>)
137
+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
138
+ ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
139
+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
140
+ ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
141
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
142
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
143
+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
144
+ ; IF-EVL-NEXT: No successors
145
+ ; IF-EVL-NEXT: }
146
+
147
+ entry:
148
+ br label %for.body
149
+
150
+ for.body:
151
+ %indvars.iv = phi i64 [ %indvars.iv.next , %for.body ], [ 0 , %entry ]
152
+ %arrayidx = getelementptr inbounds i32 , ptr %b , i64 %indvars.iv
153
+ %0 = load i32 , ptr %arrayidx , align 4
154
+ %arrayidx3 = getelementptr inbounds i32 , ptr %c , i64 %indvars.iv
155
+ %1 = load i32 , ptr %arrayidx3 , align 4
156
+ %. = tail call i32 @llvm.umax.i32 (i32 %0 , i32 %1 )
157
+ %arrayidx11 = getelementptr inbounds i32 , ptr %a , i64 %indvars.iv
158
+ store i32 %. , ptr %arrayidx11 , align 4
159
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
160
+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
161
+ br i1 %exitcond.not , label %exit , label %for.body
162
+
163
+ exit:
164
+ ret void
165
+ }
166
+
167
+ define void @vp_umin (ptr noalias %a , ptr noalias %b , ptr noalias %c , i64 %N ) {
168
+ ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
169
+ ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
170
+ ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
171
+ ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
172
+
173
+ ; IF-EVL: vector.ph:
174
+ ; IF-EVL-NEXT: Successor(s): vector loop
175
+
176
+ ; IF-EVL: <x1> vector loop: {
177
+ ; IF-EVL-NEXT: vector.body:
178
+ ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
179
+ ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]>
180
+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
181
+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
182
+ ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
183
+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
184
+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
185
+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
186
+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
187
+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
188
+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
189
+ ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.umin(ir<[[LD1]]>, ir<[[LD2]]>)
190
+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
191
+ ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
192
+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
193
+ ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
194
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
195
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
196
+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
197
+ ; IF-EVL-NEXT: No successors
198
+ ; IF-EVL-NEXT: }
199
+
200
+ entry:
201
+ br label %for.body
202
+
203
+ for.body:
204
+ %indvars.iv = phi i64 [ %indvars.iv.next , %for.body ], [ 0 , %entry ]
205
+ %arrayidx = getelementptr inbounds i32 , ptr %b , i64 %indvars.iv
206
+ %0 = load i32 , ptr %arrayidx , align 4
207
+ %arrayidx3 = getelementptr inbounds i32 , ptr %c , i64 %indvars.iv
208
+ %1 = load i32 , ptr %arrayidx3 , align 4
209
+ %. = tail call i32 @llvm.umin.i32 (i32 %0 , i32 %1 )
210
+ %arrayidx11 = getelementptr inbounds i32 , ptr %a , i64 %indvars.iv
211
+ store i32 %. , ptr %arrayidx11 , align 4
212
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
213
+ %exitcond.not = icmp eq i64 %indvars.iv.next , %N
214
+ br i1 %exitcond.not , label %exit , label %for.body
215
+
216
+ exit:
217
+ ret void
218
+ }
219
+
220
+ declare i32 @llvm.smax.i32 (i32 , i32 )
221
+ declare i32 @llvm.smin.i32 (i32 , i32 )
222
+ declare i32 @llvm.umax.i32 (i32 , i32 )
223
+ declare i32 @llvm.umin.i32 (i32 , i32 )
0 commit comments