Skip to content

Commit aae424a

Browse files
author
Diogo N. Sampaio
committed
[AArch64] Add lowering pattern for scalar fp16 facge and facgt
Summary: The fp16 scalar version of facge and facgt requires a custom patter matching, as the result type is not the same width of the operands. Reviewers: olista01, javed.absar, pbarrio Reviewed By: javed.absar Subscribers: kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60212 llvm-svn: 358083
1 parent 71660b0 commit aae424a

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5355,6 +5355,16 @@ def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
53555355
(i64 (IMPLICIT_DEF)),
53565356
(FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
53575357
hsub))>;
5358+
def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
5359+
(i32 (INSERT_SUBREG
5360+
(i32 (IMPLICIT_DEF)),
5361+
(FACGE16 FPR16:$Rn, FPR16:$Rm),
5362+
hsub))>;
5363+
def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
5364+
(i32 (INSERT_SUBREG
5365+
(i32 (IMPLICIT_DEF)),
5366+
(FACGT16 FPR16:$Rn, FPR16:$Rm),
5367+
hsub))>;
53585368

53595369
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
53605370
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;

llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ declare half @llvm.aarch64.neon.frsqrts.f16(half, half)
77
declare half @llvm.aarch64.neon.frecps.f16(half, half)
88
declare half @llvm.aarch64.neon.fmulx.f16(half, half)
99
declare half @llvm.fabs.f16(half)
10+
declare i32 @llvm.aarch64.neon.facge.i32.f16(half, half)
11+
declare i32 @llvm.aarch64.neon.facgt.i32.f16(half, half)
1012

1113
define dso_local half @t_vabdh_f16(half %a, half %b) {
1214
; CHECK-LABEL: t_vabdh_f16:
@@ -318,3 +320,25 @@ entry:
318320
%vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16)
319321
ret i32 %vcvth_n_u32_f16
320322
}
323+
324+
define dso_local i16 @vcageh_f16_test(half %a, half %b) {
325+
; CHECK-LABEL: vcageh_f16_test:
326+
; CHECK: facge h0, h0, h1
327+
; CHECK-NEXT: fmov w0, s0
328+
; CHECK-NEXT: ret
329+
entry:
330+
%facg = tail call i32 @llvm.aarch64.neon.facge.i32.f16(half %a, half %b)
331+
%0 = trunc i32 %facg to i16
332+
ret i16 %0
333+
}
334+
335+
define dso_local i16 @vcagth_f16_test(half %a, half %b) {
336+
; CHECK-LABEL: vcagth_f16_test:
337+
; CHECK: facgt h0, h0, h1
338+
; CHECK-NEXT: fmov w0, s0
339+
; CHECK-NEXT: ret
340+
entry:
341+
%facg = tail call i32 @llvm.aarch64.neon.facgt.i32.f16(half %a, half %b)
342+
%0 = trunc i32 %facg to i16
343+
ret i16 %0
344+
}

0 commit comments

Comments
 (0)