Skip to content

Commit a70190c

Browse files
committed
[RISCV] Always expand zero strided vp.strided.loads
This patch makes zero strided VP loads always be expanded to a scalar load and splat even if +optimized-zero-stride-load is present. Expanding it allows more .vx splat patterns to be matched, which is needed to prevent regressions in llvm#98111. If the feature is present, RISCVISelDAGToDAG will combine it back to a zero strided load. The RV32 test diff also shows how need to emit a zero strided load either way after expanding an SEW=64 strided load. We could maybe fix this in a later patch by not doing the expand if SEW>XLEN.
1 parent 3a189cd commit a70190c

File tree

3 files changed

+47
-17
lines changed

3 files changed

+47
-17
lines changed

llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,10 @@ bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
163163
return true;
164164
}
165165

166+
// Always expand zero strided loads so we match more .vx splat patterns, even if
167+
// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
168+
// it back to a strided load if it's optimized.
166169
bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
167-
if (ST->hasOptimizedZeroStrideLoad())
168-
return false;
169-
170170
Value *BasePtr, *VL;
171171

172172
using namespace PatternMatch;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64,
638638
define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
639639
; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
640640
; CHECK-OPT: # %bb.0:
641-
; CHECK-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
641+
; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
642642
; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero
643643
; CHECK-OPT-NEXT: ret
644644
;
@@ -657,7 +657,7 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
657657
define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
658658
; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
659659
; CHECK-OPT: # %bb.0:
660-
; CHECK-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
660+
; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
661661
; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
662662
; CHECK-OPT-NEXT: ret
663663
;
@@ -672,12 +672,27 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
672672
}
673673

674674
define <4 x i64> @zero_strided_vadd.vx(<4 x i64> %v, ptr %ptr) {
675-
; CHECK-OPT-LABEL: zero_strided_vadd.vx:
676-
; CHECK-OPT: # %bb.0:
677-
; CHECK-OPT-NEXT: vsetivli zero, 4, e64, m2, ta, ma
678-
; CHECK-OPT-NEXT: vlse64.v v10, (a0), zero
679-
; CHECK-OPT-NEXT: vadd.vv v8, v8, v10
680-
; CHECK-OPT-NEXT: ret
675+
; CHECK-RV32-LABEL: zero_strided_vadd.vx:
676+
; CHECK-RV32: # %bb.0:
677+
; CHECK-RV32-NEXT: addi sp, sp, -16
678+
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
679+
; CHECK-RV32-NEXT: lw a1, 4(a0)
680+
; CHECK-RV32-NEXT: lw a0, 0(a0)
681+
; CHECK-RV32-NEXT: sw a1, 12(sp)
682+
; CHECK-RV32-NEXT: sw a0, 8(sp)
683+
; CHECK-RV32-NEXT: addi a0, sp, 8
684+
; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
685+
; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero
686+
; CHECK-RV32-NEXT: vadd.vv v8, v8, v10
687+
; CHECK-RV32-NEXT: addi sp, sp, 16
688+
; CHECK-RV32-NEXT: ret
689+
;
690+
; CHECK-RV64-LABEL: zero_strided_vadd.vx:
691+
; CHECK-RV64: # %bb.0:
692+
; CHECK-RV64-NEXT: ld a0, 0(a0)
693+
; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
694+
; CHECK-RV64-NEXT: vadd.vx v8, v8, a0
695+
; CHECK-RV64-NEXT: ret
681696
%load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4)
682697
%w = add <4 x i64> %v, %load
683698
ret <4 x i64> %w

llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -824,12 +824,27 @@ define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
824824
}
825825

826826
define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr) {
827-
; CHECK-OPT-LABEL: zero_strided_vadd.vx:
828-
; CHECK-OPT: # %bb.0:
829-
; CHECK-OPT-NEXT: vsetvli a1, zero, e64, m1, ta, ma
830-
; CHECK-OPT-NEXT: vlse64.v v9, (a0), zero
831-
; CHECK-OPT-NEXT: vadd.vv v8, v8, v9
832-
; CHECK-OPT-NEXT: ret
827+
; CHECK-RV32-LABEL: zero_strided_vadd.vx:
828+
; CHECK-RV32: # %bb.0:
829+
; CHECK-RV32-NEXT: addi sp, sp, -16
830+
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
831+
; CHECK-RV32-NEXT: lw a1, 4(a0)
832+
; CHECK-RV32-NEXT: lw a0, 0(a0)
833+
; CHECK-RV32-NEXT: sw a1, 12(sp)
834+
; CHECK-RV32-NEXT: sw a0, 8(sp)
835+
; CHECK-RV32-NEXT: addi a0, sp, 8
836+
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
837+
; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero
838+
; CHECK-RV32-NEXT: vadd.vv v8, v8, v9
839+
; CHECK-RV32-NEXT: addi sp, sp, 16
840+
; CHECK-RV32-NEXT: ret
841+
;
842+
; CHECK-RV64-LABEL: zero_strided_vadd.vx:
843+
; CHECK-RV64: # %bb.0:
844+
; CHECK-RV64-NEXT: ld a0, 0(a0)
845+
; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
846+
; CHECK-RV64-NEXT: vadd.vx v8, v8, a0
847+
; CHECK-RV64-NEXT: ret
833848
%vscale = call i32 @llvm.vscale()
834849
%load = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 %vscale)
835850
%w = add <vscale x 1 x i64> %v, %load

0 commit comments

Comments
 (0)