You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[DAGCombiner] Add DAG combine for PARTIAL_REDUCE_MLA when no mul op
Generic DAG combine for ISD::PARTIAL_REDUCE_U/SMLA to convert:
PARTIAL_REDUCE_*MLA(Acc, ZEXT(UnextOp1), Splat(1)) into
PARTIAL_REDUCE_UMLA(Acc, UnextOp1, TRUNC(Splat(1)))
and
PARTIAL_REDUCE_*MLA(Acc, SEXT(UnextOp1), Splat(1)) into
PARTIAL_REDUCE_SMLA(Acc, UnextOp1, TRUNC(Splat(1))).
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll
+20-82Lines changed: 20 additions & 82 deletions
Original file line number
Diff line number
Diff line change
@@ -594,16 +594,8 @@ define <vscale x 4 x i32> @udot_no_bin_op(<vscale x 4 x i32> %acc, <vscale x 16
594
594
;
595
595
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op:
596
596
; CHECK-NEWLOWERING: // %bb.0:
597
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z1.b
598
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
599
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z2.h
600
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z1.h
601
-
; CHECK-NEWLOWERING-NEXT: uunpklo z1.s, z1.h
602
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
603
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
604
-
; CHECK-NEWLOWERING-NEXT: add z1.s, z2.s, z1.s
605
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z4.s, z0.s
606
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
597
+
; CHECK-NEWLOWERING-NEXT: mov z2.b, #1 // =0x1
598
+
; CHECK-NEWLOWERING-NEXT: udot z0.s, z1.b, z2.b
607
599
; CHECK-NEWLOWERING-NEXT: ret
608
600
%a.ext = zext <vscale x 16 x i8> %ato <vscale x 16 x i32>
609
601
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %a.ext)
@@ -619,16 +611,8 @@ define <vscale x 4 x i32> @sdot_no_bin_op(<vscale x 4 x i32> %acc, <vscale x 16
619
611
;
620
612
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op:
621
613
; CHECK-NEWLOWERING: // %bb.0:
622
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z1.b
623
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
624
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z2.h
625
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z1.h
626
-
; CHECK-NEWLOWERING-NEXT: sunpklo z1.s, z1.h
627
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
628
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
629
-
; CHECK-NEWLOWERING-NEXT: add z1.s, z2.s, z1.s
630
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z4.s, z0.s
631
-
; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
614
+
; CHECK-NEWLOWERING-NEXT: mov z2.b, #1 // =0x1
615
+
; CHECK-NEWLOWERING-NEXT: sdot z0.s, z1.b, z2.b
632
616
; CHECK-NEWLOWERING-NEXT: ret
633
617
%a.ext = sext <vscale x 16 x i8> %ato <vscale x 16 x i32>
634
618
%partial.reduce = tailcall <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %a.ext)
@@ -644,16 +628,8 @@ define <vscale x 2 x i64> @udot_no_bin_op_wide(<vscale x 2 x i64> %acc, <vscale
644
628
;
645
629
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_wide:
646
630
; CHECK-NEWLOWERING: // %bb.0: // %entry
647
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.s, z1.h
648
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
649
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.d, z2.s
650
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z1.s
651
-
; CHECK-NEWLOWERING-NEXT: uunpklo z1.d, z1.s
652
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
653
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z3.d
654
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
655
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
656
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z1.d, z0.d
631
+
; CHECK-NEWLOWERING-NEXT: mov z2.h, #1 // =0x1
632
+
; CHECK-NEWLOWERING-NEXT: udot z0.d, z1.h, z2.h
657
633
; CHECK-NEWLOWERING-NEXT: ret
658
634
entry:
659
635
%a.wide = zext <vscale x 8 x i16> %ato <vscale x 8 x i64>
@@ -670,16 +646,8 @@ define <vscale x 2 x i64> @sdot_no_bin_op_wide(<vscale x 2 x i64> %acc, <vscale
670
646
;
671
647
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_wide:
672
648
; CHECK-NEWLOWERING: // %bb.0: // %entry
673
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.s, z1.h
674
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
675
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.d, z2.s
676
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z1.s
677
-
; CHECK-NEWLOWERING-NEXT: sunpklo z1.d, z1.s
678
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
679
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z3.d
680
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
681
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
682
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z1.d, z0.d
649
+
; CHECK-NEWLOWERING-NEXT: mov z2.h, #1 // =0x1
650
+
; CHECK-NEWLOWERING-NEXT: sdot z0.d, z1.h, z2.h
683
651
; CHECK-NEWLOWERING-NEXT: ret
684
652
entry:
685
653
%a.wide = sext <vscale x 8 x i16> %ato <vscale x 8 x i64>
@@ -701,28 +669,13 @@ define <vscale x 4 x i64> @udot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
701
669
;
702
670
; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_8to64:
703
671
; CHECK-NEWLOWERING: // %bb.0:
704
-
; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z2.b
672
+
; CHECK-NEWLOWERING-NEXT: mov z3.b, #1 // =0x1
673
+
; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z2.b
705
674
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
706
-
; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z3.h
707
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z2.h
708
-
; CHECK-NEWLOWERING-NEXT: uunpklo z2.s, z2.h
709
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
710
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z4.s
711
-
; CHECK-NEWLOWERING-NEXT: uunpklo z4.d, z4.s
712
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z5.s
713
-
; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z2.s
714
-
; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z3.s
715
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
716
-
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
717
-
; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z5.s
718
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
719
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z6.d
720
-
; CHECK-NEWLOWERING-NEXT: add z4.d, z25.d, z24.d
721
-
; CHECK-NEWLOWERING-NEXT: add z2.d, z3.d, z2.d
722
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z5.d, z0.d
723
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z7.d, z1.d
724
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
725
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
675
+
; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z3.b
676
+
; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
677
+
; CHECK-NEWLOWERING-NEXT: udot z0.d, z5.h, z4.h
678
+
; CHECK-NEWLOWERING-NEXT: udot z1.d, z2.h, z3.h
726
679
; CHECK-NEWLOWERING-NEXT: ret
727
680
%a.ext = zext <vscale x 16 x i8> %ato <vscale x 16 x i64>
728
681
%partial.reduce = tailcall <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64(<vscale x 4 x i64> %acc, <vscale x 16 x i64> %a.ext)
@@ -743,28 +696,13 @@ define <vscale x 4 x i64> @sdot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
743
696
;
744
697
; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_8to64:
745
698
; CHECK-NEWLOWERING: // %bb.0:
746
-
; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z2.b
699
+
; CHECK-NEWLOWERING-NEXT: mov z3.b, #1 // =0x1
700
+
; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z2.b
747
701
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
748
-
; CHECK-NEWLOWERING-NEXT: sunpklo z4.s, z3.h
749
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z2.h
750
-
; CHECK-NEWLOWERING-NEXT: sunpklo z2.s, z2.h
751
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
752
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z4.s
753
-
; CHECK-NEWLOWERING-NEXT: sunpklo z4.d, z4.s
754
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z5.s
755
-
; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z2.s
756
-
; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z3.s
757
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
758
-
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
759
-
; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z5.s
760
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z4.d
761
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z6.d
762
-
; CHECK-NEWLOWERING-NEXT: add z4.d, z25.d, z24.d
763
-
; CHECK-NEWLOWERING-NEXT: add z2.d, z3.d, z2.d
764
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z5.d, z0.d
765
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z7.d, z1.d
766
-
; CHECK-NEWLOWERING-NEXT: add z0.d, z4.d, z0.d
767
-
; CHECK-NEWLOWERING-NEXT: add z1.d, z2.d, z1.d
702
+
; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z3.b
703
+
; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
704
+
; CHECK-NEWLOWERING-NEXT: sdot z0.d, z5.h, z4.h
705
+
; CHECK-NEWLOWERING-NEXT: sdot z1.d, z2.h, z3.h
768
706
; CHECK-NEWLOWERING-NEXT: ret
769
707
%a.ext = sext <vscale x 16 x i8> %ato <vscale x 16 x i64>
770
708
%partial.reduce = tailcall <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64(<vscale x 4 x i64> %acc, <vscale x 16 x i64> %a.ext)
0 commit comments