|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 | 2 | ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,V
|
3 | 3 | ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zve32f -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,ZVE32F
|
| 4 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+no-optimized-zero-stride-load -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED |
| 5 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zve32f,+no-optimized-zero-stride-load -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,NOT-OPTIMIZED |
4 | 6 |
|
5 | 7 | %struct.foo = type { i32, i32, i32, i32 }
|
6 | 8 |
|
@@ -176,24 +178,62 @@ for.cond.cleanup: ; preds = %vector.body
|
176 | 178 |
|
177 | 179 | define void @gather_zero_stride(i8* noalias nocapture %A, i8* noalias nocapture readonly %B) {
|
178 | 180 | ;
|
179 |
| -; CHECK-LABEL: gather_zero_stride: |
180 |
| -; CHECK: # %bb.0: # %entry |
181 |
| -; CHECK-NEXT: li a2, 0 |
182 |
| -; CHECK-NEXT: li a3, 32 |
183 |
| -; CHECK-NEXT: li a4, 1024 |
184 |
| -; CHECK-NEXT: .LBB3_1: # %vector.body |
185 |
| -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
186 |
| -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma |
187 |
| -; CHECK-NEXT: vlse8.v v8, (a1), zero |
188 |
| -; CHECK-NEXT: add a5, a0, a2 |
189 |
| -; CHECK-NEXT: vle8.v v9, (a5) |
190 |
| -; CHECK-NEXT: vadd.vv v8, v9, v8 |
191 |
| -; CHECK-NEXT: vse8.v v8, (a5) |
192 |
| -; CHECK-NEXT: addi a2, a2, 32 |
193 |
| -; CHECK-NEXT: addi a1, a1, 160 |
194 |
| -; CHECK-NEXT: bne a2, a4, .LBB3_1 |
195 |
| -; CHECK-NEXT: # %bb.2: # %for.cond.cleanup |
196 |
| -; CHECK-NEXT: ret |
| 181 | +; V-LABEL: gather_zero_stride: |
| 182 | +; V: # %bb.0: # %entry |
| 183 | +; V-NEXT: li a2, 0 |
| 184 | +; V-NEXT: li a3, 32 |
| 185 | +; V-NEXT: li a4, 1024 |
| 186 | +; V-NEXT: .LBB3_1: # %vector.body |
| 187 | +; V-NEXT: # =>This Inner Loop Header: Depth=1 |
| 188 | +; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma |
| 189 | +; V-NEXT: vlse8.v v8, (a1), zero |
| 190 | +; V-NEXT: add a5, a0, a2 |
| 191 | +; V-NEXT: vle8.v v9, (a5) |
| 192 | +; V-NEXT: vadd.vv v8, v9, v8 |
| 193 | +; V-NEXT: vse8.v v8, (a5) |
| 194 | +; V-NEXT: addi a2, a2, 32 |
| 195 | +; V-NEXT: addi a1, a1, 160 |
| 196 | +; V-NEXT: bne a2, a4, .LBB3_1 |
| 197 | +; V-NEXT: # %bb.2: # %for.cond.cleanup |
| 198 | +; V-NEXT: ret |
| 199 | +; |
| 200 | +; ZVE32F-LABEL: gather_zero_stride: |
| 201 | +; ZVE32F: # %bb.0: # %entry |
| 202 | +; ZVE32F-NEXT: li a2, 0 |
| 203 | +; ZVE32F-NEXT: li a3, 32 |
| 204 | +; ZVE32F-NEXT: li a4, 1024 |
| 205 | +; ZVE32F-NEXT: .LBB3_1: # %vector.body |
| 206 | +; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 |
| 207 | +; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma |
| 208 | +; ZVE32F-NEXT: vlse8.v v8, (a1), zero |
| 209 | +; ZVE32F-NEXT: add a5, a0, a2 |
| 210 | +; ZVE32F-NEXT: vle8.v v9, (a5) |
| 211 | +; ZVE32F-NEXT: vadd.vv v8, v9, v8 |
| 212 | +; ZVE32F-NEXT: vse8.v v8, (a5) |
| 213 | +; ZVE32F-NEXT: addi a2, a2, 32 |
| 214 | +; ZVE32F-NEXT: addi a1, a1, 160 |
| 215 | +; ZVE32F-NEXT: bne a2, a4, .LBB3_1 |
| 216 | +; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup |
| 217 | +; ZVE32F-NEXT: ret |
| 218 | +; |
| 219 | +; NOT-OPTIMIZED-LABEL: gather_zero_stride: |
| 220 | +; NOT-OPTIMIZED: # %bb.0: # %entry |
| 221 | +; NOT-OPTIMIZED-NEXT: li a2, 0 |
| 222 | +; NOT-OPTIMIZED-NEXT: li a3, 32 |
| 223 | +; NOT-OPTIMIZED-NEXT: li a4, 1024 |
| 224 | +; NOT-OPTIMIZED-NEXT: .LBB3_1: # %vector.body |
| 225 | +; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1 |
| 226 | +; NOT-OPTIMIZED-NEXT: lbu a5, 0(a1) |
| 227 | +; NOT-OPTIMIZED-NEXT: add a6, a0, a2 |
| 228 | +; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma |
| 229 | +; NOT-OPTIMIZED-NEXT: vle8.v v8, (a6) |
| 230 | +; NOT-OPTIMIZED-NEXT: vadd.vx v8, v8, a5 |
| 231 | +; NOT-OPTIMIZED-NEXT: vse8.v v8, (a6) |
| 232 | +; NOT-OPTIMIZED-NEXT: addi a2, a2, 32 |
| 233 | +; NOT-OPTIMIZED-NEXT: addi a1, a1, 160 |
| 234 | +; NOT-OPTIMIZED-NEXT: bne a2, a4, .LBB3_1 |
| 235 | +; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup |
| 236 | +; NOT-OPTIMIZED-NEXT: ret |
197 | 237 | entry:
|
198 | 238 | br label %vector.body
|
199 | 239 |
|
|
0 commit comments