Skip to content

Commit d0bbe4f

Browse files
committed
[RISCV] Improve interleave load coverage (NF7, NF8, and one hot)
NF7 and NF8 were just missing from the coverage. The one active lane cases should be a strided load instead.
1 parent dd2c0b1 commit d0bbe4f

File tree

1 file changed

+161
-32
lines changed

1 file changed

+161
-32
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 161 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,56 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_
126126
ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5
127127
}
128128

129+
define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor7(ptr %ptr) {
130+
; CHECK-LABEL: load_factor7:
131+
; CHECK: # %bb.0:
132+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
133+
; CHECK-NEXT: vlseg7e16.v v8, (a0)
134+
; CHECK-NEXT: ret
135+
%interleaved.vec = load <14 x i16>, ptr %ptr
136+
%v0 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 0, i32 7>
137+
%v1 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 1, i32 8>
138+
%v2 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 2, i32 9>
139+
%v3 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 3, i32 10>
140+
%v4 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 4, i32 11>
141+
%v5 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 5, i32 12>
142+
%v6 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 6, i32 13>
143+
%res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
144+
%res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
145+
%res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
146+
%res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
147+
%res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
148+
%res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
149+
%res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6
150+
ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6
151+
}
152+
153+
define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor8(ptr %ptr) {
154+
; CHECK-LABEL: load_factor8:
155+
; CHECK: # %bb.0:
156+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
157+
; CHECK-NEXT: vlseg8e16.v v8, (a0)
158+
; CHECK-NEXT: ret
159+
%interleaved.vec = load <16 x i16>, ptr %ptr
160+
%v0 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 0, i32 8>
161+
%v1 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 1, i32 9>
162+
%v2 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 2, i32 10>
163+
%v3 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 3, i32 11>
164+
%v4 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 4, i32 12>
165+
%v5 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 5, i32 13>
166+
%v6 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 6, i32 14>
167+
%v7 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 7, i32 15>
168+
%res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
169+
%res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
170+
%res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
171+
%res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
172+
%res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
173+
%res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
174+
%res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6
175+
%res7 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6, <2 x i16> %v7, 7
176+
ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res7
177+
}
178+
129179
; LMUL * NF is > 8 here and so shouldn't be lowered to a vlseg
130180
define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_factor6_too_big(ptr %ptr) {
131181
; RV32-LABEL: load_factor6_too_big:
@@ -174,12 +224,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
174224
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
175225
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
176226
; RV32-NEXT: vslideup.vi v4, v8, 10, v0.t
177-
; RV32-NEXT: lui a4, %hi(.LCPI6_0)
178-
; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0)
227+
; RV32-NEXT: lui a4, %hi(.LCPI8_0)
228+
; RV32-NEXT: addi a4, a4, %lo(.LCPI8_0)
179229
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
180230
; RV32-NEXT: vle16.v v0, (a4)
181-
; RV32-NEXT: lui a4, %hi(.LCPI6_1)
182-
; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1)
231+
; RV32-NEXT: lui a4, %hi(.LCPI8_1)
232+
; RV32-NEXT: addi a4, a4, %lo(.LCPI8_1)
183233
; RV32-NEXT: lui a5, 1
184234
; RV32-NEXT: vle16.v v8, (a4)
185235
; RV32-NEXT: csrr a4, vlenb
@@ -260,10 +310,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
260310
; RV32-NEXT: add a1, sp, a1
261311
; RV32-NEXT: addi a1, a1, 16
262312
; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
263-
; RV32-NEXT: lui a1, %hi(.LCPI6_2)
264-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2)
265-
; RV32-NEXT: lui a3, %hi(.LCPI6_3)
266-
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3)
313+
; RV32-NEXT: lui a1, %hi(.LCPI8_2)
314+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_2)
315+
; RV32-NEXT: lui a3, %hi(.LCPI8_3)
316+
; RV32-NEXT: addi a3, a3, %lo(.LCPI8_3)
267317
; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
268318
; RV32-NEXT: vle16.v v12, (a1)
269319
; RV32-NEXT: vle16.v v8, (a3)
@@ -273,8 +323,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
273323
; RV32-NEXT: add a1, sp, a1
274324
; RV32-NEXT: addi a1, a1, 16
275325
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
276-
; RV32-NEXT: lui a1, %hi(.LCPI6_4)
277-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4)
326+
; RV32-NEXT: lui a1, %hi(.LCPI8_4)
327+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4)
278328
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
279329
; RV32-NEXT: vle16.v v2, (a1)
280330
; RV32-NEXT: csrr a1, vlenb
@@ -340,10 +390,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
340390
; RV32-NEXT: add a1, sp, a1
341391
; RV32-NEXT: addi a1, a1, 16
342392
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
343-
; RV32-NEXT: lui a1, %hi(.LCPI6_5)
344-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5)
345-
; RV32-NEXT: lui a3, %hi(.LCPI6_6)
346-
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6)
393+
; RV32-NEXT: lui a1, %hi(.LCPI8_5)
394+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_5)
395+
; RV32-NEXT: lui a3, %hi(.LCPI8_6)
396+
; RV32-NEXT: addi a3, a3, %lo(.LCPI8_6)
347397
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
348398
; RV32-NEXT: vle16.v v24, (a1)
349399
; RV32-NEXT: vle16.v v4, (a3)
@@ -368,14 +418,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
368418
; RV32-NEXT: add a1, sp, a1
369419
; RV32-NEXT: addi a1, a1, 16
370420
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
371-
; RV32-NEXT: lui a1, %hi(.LCPI6_7)
372-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7)
373-
; RV32-NEXT: lui a3, %hi(.LCPI6_8)
374-
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_8)
421+
; RV32-NEXT: lui a1, %hi(.LCPI8_7)
422+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7)
423+
; RV32-NEXT: lui a3, %hi(.LCPI8_8)
424+
; RV32-NEXT: addi a3, a3, %lo(.LCPI8_8)
375425
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
376426
; RV32-NEXT: vle16.v v16, (a1)
377-
; RV32-NEXT: lui a1, %hi(.LCPI6_9)
378-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9)
427+
; RV32-NEXT: lui a1, %hi(.LCPI8_9)
428+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9)
379429
; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
380430
; RV32-NEXT: vle16.v v8, (a3)
381431
; RV32-NEXT: csrr a3, vlenb
@@ -440,8 +490,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
440490
; RV32-NEXT: add a1, sp, a1
441491
; RV32-NEXT: addi a1, a1, 16
442492
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
443-
; RV32-NEXT: lui a1, %hi(.LCPI6_10)
444-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10)
493+
; RV32-NEXT: lui a1, %hi(.LCPI8_10)
494+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_10)
445495
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
446496
; RV32-NEXT: vle16.v v12, (a1)
447497
; RV32-NEXT: lui a1, 15
@@ -462,10 +512,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
462512
; RV32-NEXT: addi a1, a1, 16
463513
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
464514
; RV32-NEXT: vmv4r.v v24, v16
465-
; RV32-NEXT: lui a1, %hi(.LCPI6_11)
466-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11)
467-
; RV32-NEXT: lui a3, %hi(.LCPI6_12)
468-
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12)
515+
; RV32-NEXT: lui a1, %hi(.LCPI8_11)
516+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_11)
517+
; RV32-NEXT: lui a3, %hi(.LCPI8_12)
518+
; RV32-NEXT: addi a3, a3, %lo(.LCPI8_12)
469519
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
470520
; RV32-NEXT: vle16.v v28, (a1)
471521
; RV32-NEXT: vle16.v v4, (a3)
@@ -495,14 +545,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
495545
; RV32-NEXT: add a1, sp, a1
496546
; RV32-NEXT: addi a1, a1, 16
497547
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
498-
; RV32-NEXT: lui a1, %hi(.LCPI6_13)
499-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13)
500-
; RV32-NEXT: lui a3, %hi(.LCPI6_14)
501-
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_14)
548+
; RV32-NEXT: lui a1, %hi(.LCPI8_13)
549+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_13)
550+
; RV32-NEXT: lui a3, %hi(.LCPI8_14)
551+
; RV32-NEXT: addi a3, a3, %lo(.LCPI8_14)
502552
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
503553
; RV32-NEXT: vle16.v v8, (a1)
504-
; RV32-NEXT: lui a1, %hi(.LCPI6_15)
505-
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15)
554+
; RV32-NEXT: lui a1, %hi(.LCPI8_15)
555+
; RV32-NEXT: addi a1, a1, %lo(.LCPI8_15)
506556
; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
507557
; RV32-NEXT: vle16.v v28, (a3)
508558
; RV32-NEXT: vle16.v v12, (a1)
@@ -1131,3 +1181,82 @@ define void @store_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2
11311181
store <12 x i16> %interleaved.vec, ptr %ptr
11321182
ret void
11331183
}
1184+
1185+
1186+
define <4 x i32> @load_factor2_one_active(ptr %ptr) {
1187+
; CHECK-LABEL: load_factor2_one_active:
1188+
; CHECK: # %bb.0:
1189+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1190+
; CHECK-NEXT: vlseg2e32.v v8, (a0)
1191+
; CHECK-NEXT: ret
1192+
%interleaved.vec = load <8 x i32>, ptr %ptr
1193+
%v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1194+
ret <4 x i32> %v0
1195+
}
1196+
1197+
1198+
define <4 x i32> @load_factor3_one_active(ptr %ptr) {
1199+
; CHECK-LABEL: load_factor3_one_active:
1200+
; CHECK: # %bb.0:
1201+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1202+
; CHECK-NEXT: vlseg3e32.v v8, (a0)
1203+
; CHECK-NEXT: ret
1204+
%interleaved.vec = load <12 x i32>, ptr %ptr
1205+
%v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
1206+
ret <4 x i32> %v0
1207+
}
1208+
1209+
define <4 x i32> @load_factor4_one_active(ptr %ptr) {
1210+
; CHECK-LABEL: load_factor4_one_active:
1211+
; CHECK: # %bb.0:
1212+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1213+
; CHECK-NEXT: vlseg4e32.v v8, (a0)
1214+
; CHECK-NEXT: ret
1215+
%interleaved.vec = load <16 x i32>, ptr %ptr
1216+
%v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
1217+
ret <4 x i32> %v0
1218+
}
1219+
1220+
define <4 x i32> @load_factor5_one_active(ptr %ptr) {
1221+
; CHECK-LABEL: load_factor5_one_active:
1222+
; CHECK: # %bb.0:
1223+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1224+
; CHECK-NEXT: vlseg5e32.v v8, (a0)
1225+
; CHECK-NEXT: ret
1226+
%interleaved.vec = load <20 x i32>, ptr %ptr
1227+
%v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
1228+
ret <4 x i32> %v0
1229+
}
1230+
1231+
define <2 x i16> @load_factor6_one_active(ptr %ptr) {
1232+
; CHECK-LABEL: load_factor6_one_active:
1233+
; CHECK: # %bb.0:
1234+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1235+
; CHECK-NEXT: vlseg6e16.v v8, (a0)
1236+
; CHECK-NEXT: ret
1237+
%interleaved.vec = load <12 x i16>, ptr %ptr
1238+
%v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 0, i32 6>
1239+
ret <2 x i16> %v0
1240+
}
1241+
1242+
define <4 x i8> @load_factor7_one_active(ptr %ptr) vscale_range(8,1024) {
1243+
; CHECK-LABEL: load_factor7_one_active:
1244+
; CHECK: # %bb.0:
1245+
; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
1246+
; CHECK-NEXT: vlseg7e8.v v8, (a0)
1247+
; CHECK-NEXT: ret
1248+
%interleaved.vec = load <32 x i8>, ptr %ptr
1249+
%v0 = shufflevector <32 x i8> %interleaved.vec, <32 x i8> poison, <4 x i32> <i32 0, i32 7, i32 14, i32 21>
1250+
ret <4 x i8> %v0
1251+
}
1252+
1253+
define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) {
1254+
; CHECK-LABEL: load_factor8_one_active:
1255+
; CHECK: # %bb.0:
1256+
; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
1257+
; CHECK-NEXT: vlseg8e8.v v8, (a0)
1258+
; CHECK-NEXT: ret
1259+
%interleaved.vec = load <32 x i8>, ptr %ptr
1260+
%v0 = shufflevector <32 x i8> %interleaved.vec, <32 x i8> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1261+
ret <4 x i8> %v0
1262+
}

0 commit comments

Comments
 (0)