You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[RISCV] Expand zvqdotq partial.reduce test variants
Make sure to cover all the scalable types which are legal, plus
splitting. Make sure to cover all instructions. Not duplicating
vx testing at this time.
%a.sext = sext <vscale x 128 x i8> %ato <vscale x 128 x i32>
755
+
%b.sext = sext <vscale x 128 x i8> %bto <vscale x 128 x i32>
756
+
%mul = mulnuwnsw <vscale x 128 x i32> %a.sext, %b.sext
757
+
%res = call <vscale x 32 x i32> @llvm.experimental.vector.partial.reduce.add(<vscale x 32 x i32> zeroinitializer, <vscale x 128 x i32> %mul)
758
+
ret <vscale x 32 x i32> %res
759
+
}
760
+
761
+
define <vscale x 4 x i32> @partial_reduce_accum(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 4 x i32> %accum) {
762
+
; CHECK-LABEL: partial_reduce_accum:
548
763
; CHECK: # %bb.0: # %entry
549
764
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
550
765
; CHECK-NEXT: vsext.vf2 v24, v8
@@ -564,8 +779,8 @@ entry:
564
779
ret <vscale x 4 x i32> %res
565
780
}
566
781
567
-
define <vscale x 16 x i32> @vqdot_vv_partial_reduce3(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
568
-
; CHECK-LABEL: vqdot_vv_partial_reduce3:
782
+
define <vscale x 16 x i32> @partial_reduce_via_accum(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
783
+
; CHECK-LABEL: partial_reduce_via_accum:
569
784
; CHECK: # %bb.0: # %entry
570
785
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
571
786
; CHECK-NEXT: vsext.vf2 v16, v8
@@ -579,3 +794,53 @@ entry:
579
794
%res = call <vscale x 16 x i32> @llvm.experimental.vector.partial.reduce.add.nvx16i32.nvx16i32(<vscale x 16 x i32> %mul, <vscale x 16 x i32> zeroinitializer)
580
795
ret <vscale x 16 x i32> %res
581
796
}
797
+
798
+
define <vscale x 1 x i32> @partial_reduce_vqdotu(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b) {
799
+
; CHECK-LABEL: partial_reduce_vqdotu:
800
+
; CHECK: # %bb.0: # %entry
801
+
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
802
+
; CHECK-NEXT: vwmulu.vv v10, v8, v9
803
+
; CHECK-NEXT: csrr a0, vlenb
804
+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
805
+
; CHECK-NEXT: vzext.vf2 v8, v10
806
+
; CHECK-NEXT: srli a0, a0, 3
807
+
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
808
+
; CHECK-NEXT: vslidedown.vx v10, v9, a0
809
+
; CHECK-NEXT: vslidedown.vx v11, v8, a0
810
+
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
811
+
; CHECK-NEXT: vadd.vv v8, v10, v8
812
+
; CHECK-NEXT: vadd.vv v9, v11, v9
813
+
; CHECK-NEXT: vadd.vv v8, v9, v8
814
+
; CHECK-NEXT: ret
815
+
entry:
816
+
%a.sext = zext <vscale x 4 x i8> %ato <vscale x 4 x i32>
817
+
%b.sext = zext <vscale x 4 x i8> %bto <vscale x 4 x i32>
818
+
%mul = mulnuwnsw <vscale x 4 x i32> %a.sext, %b.sext
819
+
%res = call <vscale x 1 x i32> @llvm.experimental.vector.partial.reduce.add(<vscale x 1 x i32> zeroinitializer, <vscale x 4 x i32> %mul)
820
+
ret <vscale x 1 x i32> %res
821
+
}
822
+
823
+
define <vscale x 1 x i32> @partial_reduce_vqdotsu(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b) {
824
+
; CHECK-LABEL: partial_reduce_vqdotsu:
825
+
; CHECK: # %bb.0: # %entry
826
+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
827
+
; CHECK-NEXT: vsext.vf2 v10, v8
828
+
; CHECK-NEXT: vzext.vf2 v11, v9
829
+
; CHECK-NEXT: csrr a0, vlenb
830
+
; CHECK-NEXT: vwmulsu.vv v8, v10, v11
831
+
; CHECK-NEXT: srli a0, a0, 3
832
+
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
833
+
; CHECK-NEXT: vslidedown.vx v10, v9, a0
834
+
; CHECK-NEXT: vslidedown.vx v11, v8, a0
835
+
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
836
+
; CHECK-NEXT: vadd.vv v8, v10, v8
837
+
; CHECK-NEXT: vadd.vv v9, v11, v9
838
+
; CHECK-NEXT: vadd.vv v8, v9, v8
839
+
; CHECK-NEXT: ret
840
+
entry:
841
+
%a.sext = sext <vscale x 4 x i8> %ato <vscale x 4 x i32>
842
+
%b.sext = zext <vscale x 4 x i8> %bto <vscale x 4 x i32>
843
+
%mul = mulnuwnsw <vscale x 4 x i32> %a.sext, %b.sext
844
+
%res = call <vscale x 1 x i32> @llvm.experimental.vector.partial.reduce.add(<vscale x 1 x i32> zeroinitializer, <vscale x 4 x i32> %mul)
0 commit comments