@@ -144,9 +144,9 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture
144
144
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
145
145
; CHECK-NEXT: .LBB3_1: # %vector.body
146
146
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
147
- ; CHECK-NEXT: lbu a3, 0 (a1)
148
- ; CHECK-NEXT: vle8.v v8 , (a0)
149
- ; CHECK-NEXT: vadd.vx v8, v8, a3
147
+ ; CHECK-NEXT: vlse8.v v8, (a1), zero
148
+ ; CHECK-NEXT: vle8.v v9 , (a0)
149
+ ; CHECK-NEXT: vadd.vv v8, v9, v8
150
150
; CHECK-NEXT: vse8.v v8, (a0)
151
151
; CHECK-NEXT: addi a0, a0, 32
152
152
; CHECK-NEXT: addi a1, a1, 160
@@ -182,9 +182,9 @@ define void @gather_zero_stride_i32(ptr noalias nocapture %A, ptr noalias nocapt
182
182
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
183
183
; CHECK-NEXT: .LBB4_1: # %vector.body
184
184
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
185
- ; CHECK-NEXT: lw a3, 0 (a1)
186
- ; CHECK-NEXT: vle32.v v8 , (a0)
187
- ; CHECK-NEXT: vadd.vx v8, v8, a3
185
+ ; CHECK-NEXT: vlse32.v v8, (a1), zero
186
+ ; CHECK-NEXT: vle32.v v9 , (a0)
187
+ ; CHECK-NEXT: vadd.vv v8, v9, v8
188
188
; CHECK-NEXT: vse32.v v8, (a0)
189
189
; CHECK-NEXT: addi a0, a0, 8
190
190
; CHECK-NEXT: addi a1, a1, 160
@@ -214,57 +214,22 @@ for.cond.cleanup: ; preds = %vector.body
214
214
}
215
215
216
216
define void @gather_zero_stride_unfold (ptr noalias nocapture %A , ptr noalias nocapture readonly %B ) {
217
- ; V-LABEL: gather_zero_stride_unfold:
218
- ; V: # %bb.0: # %entry
219
- ; V-NEXT: addi a2, a0, 1024
220
- ; V-NEXT: li a3, 32
221
- ; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
222
- ; V-NEXT: .LBB5_1: # %vector.body
223
- ; V-NEXT: # =>This Inner Loop Header: Depth=1
224
- ; V-NEXT: vlse8.v v8, (a1), zero
225
- ; V-NEXT: vle8.v v9, (a0)
226
- ; V-NEXT: vdivu.vv v8, v8, v9
227
- ; V-NEXT: vse8.v v8, (a0)
228
- ; V-NEXT: addi a0, a0, 32
229
- ; V-NEXT: addi a1, a1, 160
230
- ; V-NEXT: bne a0, a2, .LBB5_1
231
- ; V-NEXT: # %bb.2: # %for.cond.cleanup
232
- ; V-NEXT: ret
233
- ;
234
- ; ZVE32F-LABEL: gather_zero_stride_unfold:
235
- ; ZVE32F: # %bb.0: # %entry
236
- ; ZVE32F-NEXT: addi a2, a0, 1024
237
- ; ZVE32F-NEXT: li a3, 32
238
- ; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
239
- ; ZVE32F-NEXT: .LBB5_1: # %vector.body
240
- ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
241
- ; ZVE32F-NEXT: vlse8.v v8, (a1), zero
242
- ; ZVE32F-NEXT: vle8.v v9, (a0)
243
- ; ZVE32F-NEXT: vdivu.vv v8, v8, v9
244
- ; ZVE32F-NEXT: vse8.v v8, (a0)
245
- ; ZVE32F-NEXT: addi a0, a0, 32
246
- ; ZVE32F-NEXT: addi a1, a1, 160
247
- ; ZVE32F-NEXT: bne a0, a2, .LBB5_1
248
- ; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
249
- ; ZVE32F-NEXT: ret
250
- ;
251
- ; NOT-OPTIMIZED-LABEL: gather_zero_stride_unfold:
252
- ; NOT-OPTIMIZED: # %bb.0: # %entry
253
- ; NOT-OPTIMIZED-NEXT: addi a2, a0, 1024
254
- ; NOT-OPTIMIZED-NEXT: li a3, 32
255
- ; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma
256
- ; NOT-OPTIMIZED-NEXT: .LBB5_1: # %vector.body
257
- ; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1
258
- ; NOT-OPTIMIZED-NEXT: lbu a3, 0(a1)
259
- ; NOT-OPTIMIZED-NEXT: vle8.v v8, (a0)
260
- ; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a3
261
- ; NOT-OPTIMIZED-NEXT: vdivu.vv v8, v9, v8
262
- ; NOT-OPTIMIZED-NEXT: vse8.v v8, (a0)
263
- ; NOT-OPTIMIZED-NEXT: addi a0, a0, 32
264
- ; NOT-OPTIMIZED-NEXT: addi a1, a1, 160
265
- ; NOT-OPTIMIZED-NEXT: bne a0, a2, .LBB5_1
266
- ; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup
267
- ; NOT-OPTIMIZED-NEXT: ret
217
+ ; CHECK-LABEL: gather_zero_stride_unfold:
218
+ ; CHECK: # %bb.0: # %entry
219
+ ; CHECK-NEXT: addi a2, a0, 1024
220
+ ; CHECK-NEXT: li a3, 32
221
+ ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
222
+ ; CHECK-NEXT: .LBB5_1: # %vector.body
223
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
224
+ ; CHECK-NEXT: vlse8.v v8, (a1), zero
225
+ ; CHECK-NEXT: vle8.v v9, (a0)
226
+ ; CHECK-NEXT: vdivu.vv v8, v8, v9
227
+ ; CHECK-NEXT: vse8.v v8, (a0)
228
+ ; CHECK-NEXT: addi a0, a0, 32
229
+ ; CHECK-NEXT: addi a1, a1, 160
230
+ ; CHECK-NEXT: bne a0, a2, .LBB5_1
231
+ ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
232
+ ; CHECK-NEXT: ret
268
233
entry:
269
234
br label %vector.body
270
235
@@ -962,9 +927,9 @@ define void @gather_zero_stride_fp(ptr noalias nocapture %A, ptr noalias nocaptu
962
927
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
963
928
; CHECK-NEXT: .LBB16_1: # %vector.body
964
929
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
965
- ; CHECK-NEXT: flw fa5, 0 (a1)
966
- ; CHECK-NEXT: vle32.v v8 , (a0)
967
- ; CHECK-NEXT: vfadd.vf v8, v8, fa5
930
+ ; CHECK-NEXT: vlse32.v v8, (a1), zero
931
+ ; CHECK-NEXT: vle32.v v9 , (a0)
932
+ ; CHECK-NEXT: vfadd.vv v8, v9, v8
968
933
; CHECK-NEXT: vse32.v v8, (a0)
969
934
; CHECK-NEXT: addi a0, a0, 128
970
935
; CHECK-NEXT: addi a1, a1, 640
@@ -992,3 +957,5 @@ vector.body: ; preds = %vector.body, %entry
992
957
for.cond.cleanup: ; preds = %vector.body
993
958
ret void
994
959
}
960
+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
961
+ ; NOT-OPTIMIZED: {{.*}}
0 commit comments