Skip to content

Commit 651463e

Browse files
author
Diogo N. Sampaio
committed
[ARM] [FIX] Add missing f16 vector operations lowering
Summary: Add missing <8xhalf> shufflevectors pattern, when using concat_vector dag node. As well, allows <8xhalf> and <4xhalf> vldup1 operations. These instructions are required for v8.2a fp16 lowering of vmul_n_f16, vmulq_n_f16 and vmulq_lane_f16 intrinsics. Reviewers: olista01, pbarrio, LukeGeeson, efriedma Reviewed By: efriedma Subscribers: efriedma, javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60319 llvm-svn: 358081
1 parent 48e2eb0 commit 651463e

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2212,7 +2212,10 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
22122212
case MVT::v8i8:
22132213
case MVT::v16i8: OpcodeIndex = 0; break;
22142214
case MVT::v4i16:
2215-
case MVT::v8i16: OpcodeIndex = 1; break;
2215+
case MVT::v8i16:
2216+
case MVT::v4f16:
2217+
case MVT::v8f16:
2218+
OpcodeIndex = 1; break;
22162219
case MVT::v2f32:
22172220
case MVT::v2i32:
22182221
case MVT::v4f32:

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7576,6 +7576,8 @@ def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
75767576
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
75777577
def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
75787578
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7579+
def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7580+
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
75797581

75807582
//===----------------------------------------------------------------------===//
75817583
// Assembler aliases

llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,42 @@ entry:
12251225
ret <8 x half> %shuffle.i
12261226
}
12271227

1228+
define <4 x half> @test_vld_dup1_4xhalf(half* %b) {
1229+
; CHECK-LABEL: test_vld_dup1_4xhalf:
1230+
; CHECK: vld1.16 {d0[]}, [r0:16]
1231+
; CHECK-NEXT: bx lr
1232+
1233+
entry:
1234+
%b1 = load half, half* %b, align 2
1235+
%vecinit = insertelement <4 x half> undef, half %b1, i32 0
1236+
%vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1
1237+
%vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2
1238+
%vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3
1239+
ret <4 x half> %vecinit4
1240+
}
1241+
1242+
define <8 x half> @test_vld_dup1_8xhalf(half* %b) local_unnamed_addr {
1243+
; CHECK-LABEL: test_vld_dup1_8xhalf:
1244+
; CHECK: vld1.16 {d0[], d1[]}, [r0:16]
1245+
; CHECK-NEXT: bx lr
1246+
1247+
entry:
1248+
%b1 = load half, half* %b, align 2
1249+
%vecinit = insertelement <8 x half> undef, half %b1, i32 0
1250+
%vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1251+
ret <8 x half> %vecinit8
1252+
}
1253+
1254+
define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) {
1255+
; CHECK-LABEL: test_shufflevector8xhalf:
1256+
; CHECK: vmov.f64 d1, d0
1257+
; CHECK-NEXT: bx lr
1258+
1259+
entry:
1260+
%r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1261+
ret <8 x half> %r
1262+
}
1263+
12281264
declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
12291265
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
12301266
declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>)

0 commit comments

Comments
 (0)