11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \
33; RUN: -verify-machineinstrs < %s \
4- ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
4+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32
55; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \
66; RUN: -verify-machineinstrs < %s \
7- ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
7+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64
88; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \
99; RUN: -verify-machineinstrs < %s \
10- ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT
10+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32
1111; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \
1212; RUN: -verify-machineinstrs < %s \
13- ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT
13+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64
1414
1515declare <vscale x 1 x i8 > @llvm.experimental.vp.strided.load.nxv1i8.p0.i8 (ptr , i8 , <vscale x 1 x i1 >, i32 )
1616
@@ -823,15 +823,15 @@ define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
823823 ret <vscale x 1 x half > %load
824824}
825825
826- define <vscale x 1 x i64 > @zero_strided_vadd.vx (<vscale x 1 x i64 > %v , ptr %ptr ) {
827- ; CHECK-RV32-LABEL: zero_strided_vadd.vx :
826+ define <vscale x 1 x i64 > @zero_strided_vadd_nxv1i64 (<vscale x 1 x i64 > %v , ptr %ptr ) {
827+ ; CHECK-RV32-LABEL: zero_strided_vadd_nxv1i64 :
828828; CHECK-RV32: # %bb.0:
829829; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
830830; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero
831831; CHECK-RV32-NEXT: vadd.vv v8, v8, v9
832832; CHECK-RV32-NEXT: ret
833833;
834- ; CHECK-RV64-LABEL: zero_strided_vadd.vx :
834+ ; CHECK-RV64-LABEL: zero_strided_vadd_nxv1i64 :
835835; CHECK-RV64: # %bb.0:
836836; CHECK-RV64-NEXT: ld a0, 0(a0)
837837; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
@@ -842,3 +842,69 @@ define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr)
842842 %w = add <vscale x 1 x i64 > %v , %load
843843 ret <vscale x 1 x i64 > %w
844844}
845+
846+ define <vscale x 16 x i64 > @zero_strided_vadd_nxv16i64 (<vscale x 16 x i64 > %v , ptr %ptr ) {
847+ ; CHECK-RV32-LABEL: zero_strided_vadd_nxv16i64:
848+ ; CHECK-RV32: # %bb.0:
849+ ; CHECK-RV32-NEXT: csrr a1, vlenb
850+ ; CHECK-RV32-NEXT: srli a2, a1, 3
851+ ; CHECK-RV32-NEXT: sub a3, a2, a1
852+ ; CHECK-RV32-NEXT: sltu a4, a2, a3
853+ ; CHECK-RV32-NEXT: addi a4, a4, -1
854+ ; CHECK-RV32-NEXT: and a3, a4, a3
855+ ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
856+ ; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
857+ ; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2
858+ ; CHECK-RV32-NEXT: # %bb.1:
859+ ; CHECK-RV32-NEXT: mv a2, a1
860+ ; CHECK-RV32-NEXT: .LBB55_2:
861+ ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
862+ ; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero
863+ ; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
864+ ; CHECK-RV32-NEXT: vadd.vv v16, v16, v24
865+ ; CHECK-RV32-NEXT: vadd.vv v8, v8, v0
866+ ; CHECK-RV32-NEXT: ret
867+ ;
868+ ; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64:
869+ ; CHECK-RV64: # %bb.0:
870+ ; CHECK-RV64-NEXT: ld a0, 0(a0)
871+ ; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
872+ ; CHECK-RV64-NEXT: vadd.vx v8, v8, a0
873+ ; CHECK-RV64-NEXT: vadd.vx v16, v16, a0
874+ ; CHECK-RV64-NEXT: ret
875+ %vscale = call i32 @llvm.vscale ()
876+ %load = call <vscale x 16 x i64 > @llvm.experimental.vp.strided.load.nxv16i64.p0.i32 (ptr %ptr , i32 0 , <vscale x 16 x i1 > splat (i1 true ), i32 %vscale )
877+ %w = add <vscale x 16 x i64 > %v , %load
878+ ret <vscale x 16 x i64 > %w
879+ }
880+
881+ define <vscale x 1 x ptr > @zero_strided_vadd_nxv1p0 (<vscale x 1 x ptr > %v , ptr %ptr ) {
882+ ; CHECK-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0:
883+ ; CHECK-OPT-RV32: # %bb.0:
884+ ; CHECK-OPT-RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
885+ ; CHECK-OPT-RV32-NEXT: vlse32.v v8, (a0), zero
886+ ; CHECK-OPT-RV32-NEXT: ret
887+ ;
888+ ; CHECK-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0:
889+ ; CHECK-OPT-RV64: # %bb.0:
890+ ; CHECK-OPT-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
891+ ; CHECK-OPT-RV64-NEXT: vlse64.v v8, (a0), zero
892+ ; CHECK-OPT-RV64-NEXT: ret
893+ ;
894+ ; CHECK-NO-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0:
895+ ; CHECK-NO-OPT-RV32: # %bb.0:
896+ ; CHECK-NO-OPT-RV32-NEXT: lw a0, 0(a0)
897+ ; CHECK-NO-OPT-RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
898+ ; CHECK-NO-OPT-RV32-NEXT: vmv.v.x v8, a0
899+ ; CHECK-NO-OPT-RV32-NEXT: ret
900+ ;
901+ ; CHECK-NO-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0:
902+ ; CHECK-NO-OPT-RV64: # %bb.0:
903+ ; CHECK-NO-OPT-RV64-NEXT: ld a0, 0(a0)
904+ ; CHECK-NO-OPT-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
905+ ; CHECK-NO-OPT-RV64-NEXT: vmv.v.x v8, a0
906+ ; CHECK-NO-OPT-RV64-NEXT: ret
907+ %vscale = call i32 @llvm.vscale ()
908+ %load = call <vscale x 1 x ptr > @llvm.experimental.vp.strided.load.nxv1p0.p0.i32 (ptr %ptr , i32 0 , <vscale x 1 x i1 > splat (i1 true ), i32 %vscale )
909+ ret <vscale x 1 x ptr > %load
910+ }
0 commit comments