You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[DAGCombiner] Add DAG combine for PARTIAL_REDUCE_MLA when no mul op
Generic DAG combine for ISD::PARTIAL_REDUCE_U/SMLA to convert:
PARTIAL_REDUCE_*MLA(Acc, ZEXT(UnextOp1), Splat(1)) into
PARTIAL_REDUCE_UMLA(Acc, UnextOp1, TRUNC(Splat(1)))
and
PARTIAL_REDUCE_*MLA(Acc, SEXT(UnextOp1), Splat(1)) into
PARTIAL_REDUCE_SMLA(Acc, UnextOp1, TRUNC(Splat(1))).
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll
+20-82Lines changed: 20 additions & 82 deletions
Original file line number
Diff line number
Diff line change
@@ -662,16 +662,8 @@ define <vscale x 4 x i32> @udot_no_bin_op(<vscale x 4 x i32> %acc, <vscale x 16
662
662
;
663
663
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op:
664
664
; CHECK-NEWLOWERING: // %bb.0:
665
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z1.b
666
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
667
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z2.h
668
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z1.h
669
-
; CHECK-NEWLOWERING-NEXT: uunpklo z1.s, z1.h
670
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
671
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
672
-
; CHECK-NEWLOWERING-NEXT: add z1.s, z2.s, z1.s
673
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z4.s, z0.s
674
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
665
+
; CHECK-NEWLOWERING-NEXT: mov z2.b, #1 // =0x1
666
+
; CHECK-NEWLOWERING-NEXT: udot z0.s, z1.b, z2.b
675
667
; CHECK-NEWLOWERING-NEXT: ret
676
668
%a.ext = zext <vscale x 16 x i8> %ato <vscale x 16 x i32>
677
669
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %a.ext)
@@ -687,16 +679,8 @@ define <vscale x 4 x i32> @sdot_no_bin_op(<vscale x 4 x i32> %acc, <vscale x 16
687
679
;
688
680
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op:
689
681
; CHECK-NEWLOWERING: // %bb.0:
690
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z1.b
691
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
692
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z2.h
693
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z1.h
694
-
; CHECK-NEWLOWERING-NEXT: sunpklo z1.s, z1.h
695
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
696
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
697
-
; CHECK-NEWLOWERING-NEXT: add z1.s, z2.s, z1.s
698
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z4.s, z0.s
699
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
682
+
; CHECK-NEWLOWERING-NEXT: mov z2.b, #1 // =0x1
683
+
; CHECK-NEWLOWERING-NEXT: sdot z0.s, z1.b, z2.b
700
684
; CHECK-NEWLOWERING-NEXT: ret
701
685
%a.ext = sext <vscale x 16 x i8> %ato <vscale x 16 x i32>
702
686
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %a.ext)
@@ -712,16 +696,8 @@ define <vscale x 2 x i64> @udot_no_bin_op_wide(<vscale x 2 x i64> %acc, <vscale
712
696
;
713
697
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_wide:
714
698
; CHECK-NEWLOWERING: // %bb.0: // %entry
715
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.s, z1.h
716
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
717
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.d, z2.s
718
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z1.s
719
-
; CHECK-NEWLOWERING-NEXT: uunpklo z1.d, z1.s
720
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
721
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z3.d
722
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
723
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
724
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z1.d, z0.d
699
+
; CHECK-NEWLOWERING-NEXT: mov z2.h, #1 // =0x1
700
+
; CHECK-NEWLOWERING-NEXT: udot z0.d, z1.h, z2.h
725
701
; CHECK-NEWLOWERING-NEXT: ret
726
702
entry:
727
703
%a.wide = zext <vscale x 8 x i16> %ato <vscale x 8 x i64>
@@ -738,16 +714,8 @@ define <vscale x 2 x i64> @sdot_no_bin_op_wide(<vscale x 2 x i64> %acc, <vscale
738
714
;
739
715
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_wide:
740
716
; CHECK-NEWLOWERING: // %bb.0: // %entry
741
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.s, z1.h
742
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
743
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.d, z2.s
744
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z1.s
745
-
; CHECK-NEWLOWERING-NEXT: sunpklo z1.d, z1.s
746
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
747
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z3.d
748
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
749
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
750
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z1.d, z0.d
717
+
; CHECK-NEWLOWERING-NEXT: mov z2.h, #1 // =0x1
718
+
; CHECK-NEWLOWERING-NEXT: sdot z0.d, z1.h, z2.h
751
719
; CHECK-NEWLOWERING-NEXT: ret
752
720
entry:
753
721
%a.wide = sext <vscale x 8 x i16> %ato <vscale x 8 x i64>
@@ -769,28 +737,13 @@ define <vscale x 4 x i64> @udot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
769
737
;
770
738
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_8to64:
771
739
; CHECK-NEWLOWERING: // %bb.0:
772
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z2.b
740
+
; CHECK-NEWLOWERING-NEXT: mov z3.b, #1 // =0x1
741
+
; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z2.b
773
742
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
774
-
; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z3.h
775
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z2.h
776
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.s, z2.h
777
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
778
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z4.s
779
-
; CHECK-NEWLOWERING-NEXT: uunpklo z4.d, z4.s
780
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z5.s
781
-
; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z2.s
782
-
; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z3.s
783
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
784
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
785
-
; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z5.s
786
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
787
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z6.d
788
-
; CHECK-NEWLOWERING-NEXT: add z4.d, z25.d, z24.d
789
-
; CHECK-NEWLOWERING-NEXT: add z2.d, z3.d, z2.d
790
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z5.d, z0.d
791
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z7.d, z1.d
792
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
793
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
743
+
; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z3.b
744
+
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
745
+
; CHECK-NEWLOWERING-NEXT: udot z0.d, z5.h, z4.h
746
+
; CHECK-NEWLOWERING-NEXT: udot z1.d, z2.h, z3.h
794
747
; CHECK-NEWLOWERING-NEXT: ret
795
748
%a.ext = zext <vscale x 16 x i8> %ato <vscale x 16 x i64>
796
749
%partial.reduce = tailcall <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64(<vscale x 4 x i64> %acc, <vscale x 16 x i64> %a.ext)
@@ -811,28 +764,13 @@ define <vscale x 4 x i64> @sdot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
811
764
;
812
765
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_8to64:
813
766
; CHECK-NEWLOWERING: // %bb.0:
814
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z2.b
767
+
; CHECK-NEWLOWERING-NEXT: mov z3.b, #1 // =0x1
768
+
; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z2.b
815
769
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
816
-
; CHECK-NEWLOWERING-NEXT: sunpklo z4.s, z3.h
817
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z2.h
818
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.s, z2.h
819
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
820
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z4.s
821
-
; CHECK-NEWLOWERING-NEXT: sunpklo z4.d, z4.s
822
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z5.s
823
-
; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z2.s
824
-
; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z3.s
825
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
826
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
827
-
; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z5.s
828
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
829
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z6.d
830
-
; CHECK-NEWLOWERING-NEXT: add z4.d, z25.d, z24.d
831
-
; CHECK-NEWLOWERING-NEXT: add z2.d, z3.d, z2.d
832
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z5.d, z0.d
833
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z7.d, z1.d
834
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
835
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
770
+
; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z3.b
771
+
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
772
+
; CHECK-NEWLOWERING-NEXT: sdot z0.d, z5.h, z4.h
773
+
; CHECK-NEWLOWERING-NEXT: sdot z1.d, z2.h, z3.h
836
774
; CHECK-NEWLOWERING-NEXT: ret
837
775
%a.ext = sext <vscale x 16 x i8> %ato <vscale x 16 x i64>
838
776
%partial.reduce = tailcall <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64(<vscale x 4 x i64> %acc, <vscale x 16 x i64> %a.ext)
0 commit comments