Skip to content

Commit 27e9e12

Browse files
[AArch64] Add patterns for conversions using fixed-point scvtf
Change-Id: If19131b160484aba942dbbef042fb67f0b98561d
1 parent ce1a0d8 commit 27e9e12

File tree

3 files changed

+129
-1
lines changed

3 files changed

+129
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14328,7 +14328,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
1432814328
unsigned Opc =
1432914329
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
1433014330
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
14331-
DAG.getConstant(Cnt, DL, MVT::i32));
14331+
DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
1433214332
}
1433314333

1433414334
// Right shift register. Note, there is not a shift right register

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,12 @@ def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
735735
def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
736736

737737
def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
738+
739+
def AArch64vashr_exact : PatFrag<(ops node:$lhs, node:$rhs),
740+
(AArch64vashr node:$lhs, node:$rhs), [{
741+
return N->getFlags().hasExact();
742+
}]>;
743+
738744
def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
739745
def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
740746
def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
@@ -7712,6 +7718,25 @@ defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
77127718
defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
77137719
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
77147720

7721+
let Predicates = [HasNEON] in {
7722+
def : Pat<(v2f32 (sint_to_fp (v2i32 (AArch64vashr_exact v2i32:$Vn, i32:$shift)))),
7723+
(SCVTFv2i32_shift $Vn, vecshiftR32:$shift)>;
7724+
7725+
def : Pat<(v4f32 (sint_to_fp (v4i32 (AArch64vashr_exact v4i32:$Vn, i32:$shift)))),
7726+
(SCVTFv4i32_shift $Vn, vecshiftR32:$shift)>;
7727+
7728+
def : Pat<(v2f64 (sint_to_fp (v2i64 (AArch64vashr_exact v2i64:$Vn, i32:$shift)))),
7729+
(SCVTFv2i64_shift $Vn, vecshiftR64:$shift)>;
7730+
}
7731+
7732+
let Predicates = [HasNEON, HasFullFP16] in {
7733+
def : Pat<(v4f16 (sint_to_fp (v4i16 (AArch64vashr_exact v4i16:$Vn, i32:$shift)))),
7734+
(SCVTFv4i16_shift $Vn, vecshiftR16:$shift)>;
7735+
7736+
def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift)))),
7737+
(SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>;
7738+
}
7739+
77157740
// X << 1 ==> X + X
77167741
class SHLToADDPat<ValueType ty, RegisterClass regtype>
77177742
: Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64"
5+
6+
; First some corner cases
7+
define <4 x float> @f_v4_s0(<4 x i32> %u) {
8+
; CHECK-LABEL: f_v4_s0:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: scvtf v0.4s, v0.4s
11+
; CHECK-NEXT: ret
12+
%s = ashr exact <4 x i32> %u, <i32 0, i32 0, i32 0, i32 0>
13+
%v = sitofp <4 x i32> %s to <4 x float>
14+
ret <4 x float> %v
15+
}
16+
17+
define <4 x float> @f_v4_s1(<4 x i32> %u) {
18+
; CHECK-LABEL: f_v4_s1:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: scvtf v0.4s, v0.4s, #1
21+
; CHECK-NEXT: ret
22+
%s = ashr exact <4 x i32> %u, <i32 1, i32 1, i32 1, i32 1>
23+
%v = sitofp <4 x i32> %s to <4 x float>
24+
ret <4 x float> %v
25+
}
26+
27+
define <4 x float> @f_v4_s24_inexact(<4 x i32> %u) {
28+
; CHECK-LABEL: f_v4_s24_inexact:
29+
; CHECK: // %bb.0:
30+
; CHECK-NEXT: sshr v0.4s, v0.4s, #24
31+
; CHECK-NEXT: scvtf v0.4s, v0.4s
32+
; CHECK-NEXT: ret
33+
%s = ashr <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24>
34+
%v = sitofp <4 x i32> %s to <4 x float>
35+
ret <4 x float> %v
36+
}
37+
38+
define <4 x float> @f_v4_s32(<4 x i32> %u) {
39+
; CHECK-LABEL: f_v4_s32:
40+
; CHECK: // %bb.0:
41+
; CHECK-NEXT: movi v0.2d, #0000000000000000
42+
; CHECK-NEXT: ret
43+
%s = ashr <4 x i32> %u, <i32 32, i32 32, i32 32, i32 32>
44+
%v = sitofp <4 x i32> %s to <4 x float>
45+
ret <4 x float> %v
46+
}
47+
48+
; Common cases for conversion from signed integer to floating point types
49+
define <2 x float> @f_v2_s24(<2 x i32> %u) {
50+
; CHECK-LABEL: f_v2_s24:
51+
; CHECK: // %bb.0:
52+
; CHECK-NEXT: scvtf v0.2s, v0.2s, #24
53+
; CHECK-NEXT: ret
54+
%s = ashr exact <2 x i32> %u, <i32 24, i32 24>
55+
%v = sitofp <2 x i32> %s to <2 x float>
56+
ret <2 x float> %v
57+
}
58+
59+
define <4 x float> @f_v4_s24(<4 x i32> %u) {
60+
; CHECK-LABEL: f_v4_s24:
61+
; CHECK: // %bb.0:
62+
; CHECK-NEXT: scvtf v0.4s, v0.4s, #24
63+
; CHECK-NEXT: ret
64+
%s = ashr exact <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24>
65+
%v = sitofp <4 x i32> %s to <4 x float>
66+
ret <4 x float> %v
67+
}
68+
69+
; Check legalisation to <2 x f64> does not get in the way
70+
define <8 x double> @d_v8_s64(<8 x i64> %u) {
71+
; CHECK-LABEL: d_v8_s64:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: scvtf v0.2d, v0.2d, #56
74+
; CHECK-NEXT: scvtf v1.2d, v1.2d, #56
75+
; CHECK-NEXT: scvtf v2.2d, v2.2d, #56
76+
; CHECK-NEXT: scvtf v3.2d, v3.2d, #56
77+
; CHECK-NEXT: ret
78+
%s = ashr exact <8 x i64> %u, <i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56>
79+
%v = sitofp <8 x i64> %s to <8 x double>
80+
ret <8 x double> %v
81+
}
82+
83+
define <4 x half> @h_v4_s8(<4 x i16> %u) #0 {
84+
; CHECK-LABEL: h_v4_s8:
85+
; CHECK: // %bb.0:
86+
; CHECK-NEXT: scvtf v0.4h, v0.4h, #8
87+
; CHECK-NEXT: ret
88+
%s = ashr exact <4 x i16> %u, <i16 8, i16 8, i16 8, i16 8>
89+
%v = sitofp <4 x i16> %s to <4 x half>
90+
ret <4 x half> %v
91+
}
92+
93+
define <8 x half> @h_v8_s8(<8 x i16> %u) #0 {
94+
; CHECK-LABEL: h_v8_s8:
95+
; CHECK: // %bb.0:
96+
; CHECK-NEXT: scvtf v0.8h, v0.8h, #8
97+
; CHECK-NEXT: ret
98+
%s = ashr exact <8 x i16> %u, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
99+
%v = sitofp <8 x i16> %s to <8 x half>
100+
ret <8 x half> %v
101+
}
102+
103+
attributes #0 = { "target-features"="+fullfp16"}

0 commit comments

Comments
 (0)