Skip to content

Commit 33fc675

Browse files
author
Krzysztof Parzyszek
committed
[Hexagon] Handle floating point vector loads/stores
1 parent 6d702a1 commit 33fc675

File tree

2 files changed

+181
-1
lines changed

2 files changed

+181
-1
lines changed

llvm/lib/Target/Hexagon/HexagonPatternsHVX.td

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,19 @@ let Predicates = [UseHVX] in {
165165
defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI8, IsVecOff>;
166166
defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI16, IsVecOff>;
167167
defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI32, IsVecOff>;
168-
169168
defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI8, IsVecOff>;
170169
defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI16, IsVecOff>;
171170
defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI32, IsVecOff>;
172171
}
173172

173+
let Predicates = [UseHVXV68] in {
174+
defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>;
175+
defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>;
176+
defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF16, IsVecOff>;
177+
defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF32, IsVecOff>;
178+
defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF16, IsVecOff>;
179+
defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF32, IsVecOff>;
180+
}
174181

175182
// HVX stores
176183

@@ -214,6 +221,15 @@ let Predicates = [UseHVX] in {
214221
defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI32, IsVecOff>;
215222
}
216223

224+
let Predicates = [UseHVXV68] in {
225+
defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF16, IsVecOff>;
226+
defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF32, IsVecOff>;
227+
defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF16, IsVecOff>;
228+
defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF32, IsVecOff>;
229+
defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF16, IsVecOff>;
230+
defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF32, IsVecOff>;
231+
}
232+
217233
// Bitcasts between same-size vector types are no-ops, except for the
218234
// actual type change.
219235
let Predicates = [UseHVX] in {
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -march=hexagon < %s | FileCheck %s
3+
4+
define void @f0(<128 x i8>* %a0, <128 x i8>* %a1) #0 {
5+
; CHECK-LABEL: f0:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: {
8+
; CHECK-NEXT: jumpr r31
9+
; CHECK-NEXT: v0.cur = vmem(r0+#1)
10+
; CHECK-NEXT: vmem(r1+#2) = v0
11+
; CHECK-NEXT: }
12+
%v0 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1
13+
%v1 = load <128 x i8>, <128 x i8>* %v0, align 128
14+
%v2 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 2
15+
store <128 x i8> %v1, <128 x i8>* %v2, align 128
16+
ret void
17+
}
18+
19+
define void @f1(<64 x i16>* %a0, <64 x i16>* %a1) #0 {
20+
; CHECK-LABEL: f1:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: {
23+
; CHECK-NEXT: jumpr r31
24+
; CHECK-NEXT: v0.cur = vmem(r0+#1)
25+
; CHECK-NEXT: vmem(r1+#2) = v0
26+
; CHECK-NEXT: }
27+
%v0 = getelementptr <64 x i16>, <64 x i16>* %a0, i32 1
28+
%v1 = load <64 x i16>, <64 x i16>* %v0, align 128
29+
%v2 = getelementptr <64 x i16>, <64 x i16>* %a1, i32 2
30+
store <64 x i16> %v1, <64 x i16>* %v2, align 128
31+
ret void
32+
}
33+
34+
define void @f2(<32 x i32>* %a0, <32 x i32>* %a1) #0 {
35+
; CHECK-LABEL: f2:
36+
; CHECK: // %bb.0:
37+
; CHECK-NEXT: {
38+
; CHECK-NEXT: jumpr r31
39+
; CHECK-NEXT: v0.cur = vmem(r0+#1)
40+
; CHECK-NEXT: vmem(r1+#2) = v0
41+
; CHECK-NEXT: }
42+
%v0 = getelementptr <32 x i32>, <32 x i32>* %a0, i32 1
43+
%v1 = load <32 x i32>, <32 x i32>* %v0, align 128
44+
%v2 = getelementptr <32 x i32>, <32 x i32>* %a1, i32 2
45+
store <32 x i32> %v1, <32 x i32>* %v2, align 128
46+
ret void
47+
}
48+
49+
define void @f3(<64 x half>* %a0, <64 x half>* %a1) #0 {
50+
; CHECK-LABEL: f3:
51+
; CHECK: // %bb.0:
52+
; CHECK-NEXT: {
53+
; CHECK-NEXT: jumpr r31
54+
; CHECK-NEXT: v0.cur = vmem(r0+#1)
55+
; CHECK-NEXT: vmem(r1+#2) = v0
56+
; CHECK-NEXT: }
57+
%v0 = getelementptr <64 x half>, <64 x half>* %a0, i32 1
58+
%v1 = load <64 x half>, <64 x half>* %v0, align 128
59+
%v2 = getelementptr <64 x half>, <64 x half>* %a1, i32 2
60+
store <64 x half> %v1, <64 x half>* %v2, align 128
61+
ret void
62+
}
63+
64+
define void @f4(<32 x float>* %a0, <32 x float>* %a1) #0 {
65+
; CHECK-LABEL: f4:
66+
; CHECK: // %bb.0:
67+
; CHECK-NEXT: {
68+
; CHECK-NEXT: jumpr r31
69+
; CHECK-NEXT: v0.cur = vmem(r0+#1)
70+
; CHECK-NEXT: vmem(r1+#2) = v0
71+
; CHECK-NEXT: }
72+
%v0 = getelementptr <32 x float>, <32 x float>* %a0, i32 1
73+
%v1 = load <32 x float>, <32 x float>* %v0, align 128
74+
%v2 = getelementptr <32 x float>, <32 x float>* %a1, i32 2
75+
store <32 x float> %v1, <32 x float>* %v2, align 128
76+
ret void
77+
}
78+
79+
define void @f5(<128 x i8>* %a0, <128 x i8>* %a1) #0 {
80+
; CHECK-LABEL: f5:
81+
; CHECK: // %bb.0:
82+
; CHECK-NEXT: {
83+
; CHECK-NEXT: v0 = vmemu(r0+#1)
84+
; CHECK-NEXT: }
85+
; CHECK-NEXT: {
86+
; CHECK-NEXT: jumpr r31
87+
; CHECK-NEXT: vmemu(r1+#2) = v0
88+
; CHECK-NEXT: }
89+
%v0 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1
90+
%v1 = load <128 x i8>, <128 x i8>* %v0, align 1
91+
%v2 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 2
92+
store <128 x i8> %v1, <128 x i8>* %v2, align 1
93+
ret void
94+
}
95+
96+
define void @f6(<64 x i16>* %a0, <64 x i16>* %a1) #0 {
97+
; CHECK-LABEL: f6:
98+
; CHECK: // %bb.0:
99+
; CHECK-NEXT: {
100+
; CHECK-NEXT: v0 = vmemu(r0+#1)
101+
; CHECK-NEXT: }
102+
; CHECK-NEXT: {
103+
; CHECK-NEXT: jumpr r31
104+
; CHECK-NEXT: vmemu(r1+#2) = v0
105+
; CHECK-NEXT: }
106+
%v0 = getelementptr <64 x i16>, <64 x i16>* %a0, i32 1
107+
%v1 = load <64 x i16>, <64 x i16>* %v0, align 1
108+
%v2 = getelementptr <64 x i16>, <64 x i16>* %a1, i32 2
109+
store <64 x i16> %v1, <64 x i16>* %v2, align 1
110+
ret void
111+
}
112+
113+
define void @f7(<32 x i32>* %a0, <32 x i32>* %a1) #0 {
114+
; CHECK-LABEL: f7:
115+
; CHECK: // %bb.0:
116+
; CHECK-NEXT: {
117+
; CHECK-NEXT: v0 = vmemu(r0+#1)
118+
; CHECK-NEXT: }
119+
; CHECK-NEXT: {
120+
; CHECK-NEXT: jumpr r31
121+
; CHECK-NEXT: vmemu(r1+#2) = v0
122+
; CHECK-NEXT: }
123+
%v0 = getelementptr <32 x i32>, <32 x i32>* %a0, i32 1
124+
%v1 = load <32 x i32>, <32 x i32>* %v0, align 1
125+
%v2 = getelementptr <32 x i32>, <32 x i32>* %a1, i32 2
126+
store <32 x i32> %v1, <32 x i32>* %v2, align 1
127+
ret void
128+
}
129+
130+
define void @f8(<64 x half>* %a0, <64 x half>* %a1) #0 {
131+
; CHECK-LABEL: f8:
132+
; CHECK: // %bb.0:
133+
; CHECK-NEXT: {
134+
; CHECK-NEXT: v0 = vmemu(r0+#1)
135+
; CHECK-NEXT: }
136+
; CHECK-NEXT: {
137+
; CHECK-NEXT: jumpr r31
138+
; CHECK-NEXT: vmemu(r1+#2) = v0
139+
; CHECK-NEXT: }
140+
%v0 = getelementptr <64 x half>, <64 x half>* %a0, i32 1
141+
%v1 = load <64 x half>, <64 x half>* %v0, align 1
142+
%v2 = getelementptr <64 x half>, <64 x half>* %a1, i32 2
143+
store <64 x half> %v1, <64 x half>* %v2, align 1
144+
ret void
145+
}
146+
147+
define void @f9(<32 x float>* %a0, <32 x float>* %a1) #0 {
148+
; CHECK-LABEL: f9:
149+
; CHECK: // %bb.0:
150+
; CHECK-NEXT: {
151+
; CHECK-NEXT: v0 = vmemu(r0+#1)
152+
; CHECK-NEXT: }
153+
; CHECK-NEXT: {
154+
; CHECK-NEXT: jumpr r31
155+
; CHECK-NEXT: vmemu(r1+#2) = v0
156+
; CHECK-NEXT: }
157+
%v0 = getelementptr <32 x float>, <32 x float>* %a0, i32 1
158+
%v1 = load <32 x float>, <32 x float>* %v0, align 1
159+
%v2 = getelementptr <32 x float>, <32 x float>* %a1, i32 2
160+
store <32 x float> %v1, <32 x float>* %v2, align 1
161+
ret void
162+
}
163+
164+
attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }

0 commit comments

Comments
 (0)