Skip to content

Commit e564852

Browse files
committed
[AArch64] Add patterns for fadd(uzp1(x,y), uzp2(x, y)) -> faddp.
Similar to f7018ba, this adds patterns for floating point faddp from an fadd and shuffles.
1 parent adcf33f commit e564852

File tree

2 files changed

+17
-15
lines changed

2 files changed

+17
-15
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9419,6 +9419,17 @@ def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)),
94199419
(AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))),
94209420
(v16i8 (ADDPv16i8 $Rn, $Rm))>;
94219421

9422+
def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)),
9423+
(AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))),
9424+
(v2f64 (FADDPv2f64 $Rn, $Rm))>;
9425+
def : Pat<(v4f32 (fadd (AArch64uzp1 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)),
9426+
(AArch64uzp2 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)))),
9427+
(v4f32 (FADDPv4f32 $Rn, $Rm))>;
9428+
let Predicates = [HasFullFP16] in
9429+
def : Pat<(v8f16 (fadd (AArch64uzp1 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)),
9430+
(AArch64uzp2 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)))),
9431+
(v8f16 (FADDPv8f16 $Rn, $Rm))>;
9432+
94229433
// Scalar 64-bit shifts in FPR64 registers.
94239434
def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
94249435
(SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;

llvm/test/CodeGen/AArch64/addp-shuffle.ll

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,7 @@ define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
7676
define <4 x float> @deinterleave_shuffle_v8f32(<8 x float> %a) {
7777
; CHECK-LABEL: deinterleave_shuffle_v8f32:
7878
; CHECK: // %bb.0:
79-
; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
80-
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
81-
; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
79+
; CHECK-NEXT: faddp v0.4s, v0.4s, v1.4s
8280
; CHECK-NEXT: ret
8381
%r0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
8482
%r1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -89,9 +87,7 @@ define <4 x float> @deinterleave_shuffle_v8f32(<8 x float> %a) {
8987
define <4 x float> @deinterleave_shuffle_v8f32_c(<8 x float> %a) {
9088
; CHECK-LABEL: deinterleave_shuffle_v8f32_c:
9189
; CHECK: // %bb.0:
92-
; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
93-
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
94-
; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
90+
; CHECK-NEXT: faddp v0.4s, v0.4s, v1.4s
9591
; CHECK-NEXT: ret
9692
%r0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
9793
%r1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -116,9 +112,7 @@ define <8 x half> @deinterleave_shuffle_v16f16(<16 x half> %a) {
116112
;
117113
; CHECK-FP16-LABEL: deinterleave_shuffle_v16f16:
118114
; CHECK-FP16: // %bb.0:
119-
; CHECK-FP16-NEXT: uzp1 v2.8h, v0.8h, v1.8h
120-
; CHECK-FP16-NEXT: uzp2 v0.8h, v0.8h, v1.8h
121-
; CHECK-FP16-NEXT: fadd v0.8h, v2.8h, v0.8h
115+
; CHECK-FP16-NEXT: faddp v0.8h, v0.8h, v1.8h
122116
; CHECK-FP16-NEXT: ret
123117
%r0 = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
124118
%r1 = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -129,12 +123,9 @@ define <8 x half> @deinterleave_shuffle_v16f16(<16 x half> %a) {
129123
define <4 x double> @deinterleave_shuffle_v8f64(<8 x double> %a) {
130124
; CHECK-LABEL: deinterleave_shuffle_v8f64:
131125
; CHECK: // %bb.0:
132-
; CHECK-NEXT: zip1 v4.2d, v2.2d, v3.2d
133-
; CHECK-NEXT: zip1 v5.2d, v0.2d, v1.2d
134-
; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
135-
; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
136-
; CHECK-NEXT: fadd v1.2d, v4.2d, v2.2d
137-
; CHECK-NEXT: fadd v0.2d, v5.2d, v0.2d
126+
; CHECK-NEXT: faddp v2.2d, v2.2d, v3.2d
127+
; CHECK-NEXT: faddp v0.2d, v0.2d, v1.2d
128+
; CHECK-NEXT: mov v1.16b, v2.16b
138129
; CHECK-NEXT: ret
139130
%r0 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
140131
%r1 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>

0 commit comments

Comments
 (0)