@@ -70,22 +70,20 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
70
70
; NOFP16-NEXT: .cfi_offset w22, -32
71
71
; NOFP16-NEXT: .cfi_offset w30, -48
72
72
; NOFP16-NEXT: mov w21, w0
73
- ; NOFP16-NEXT: and w0, w2 , #0xffff
73
+ ; NOFP16-NEXT: and w0, w1 , #0xffff
74
74
; NOFP16-NEXT: mov x19, x3
75
- ; NOFP16-NEXT: mov w20, w1
75
+ ; NOFP16-NEXT: mov w20, w2
76
76
; NOFP16-NEXT: bl __gnu_h2f_ieee
77
77
; NOFP16-NEXT: mov w22, w0
78
78
; NOFP16-NEXT: and w0, w21, #0xffff
79
79
; NOFP16-NEXT: bl __gnu_h2f_ieee
80
- ; NOFP16-NEXT: mov w21 , w0
80
+ ; NOFP16-NEXT: mov w8 , w0
81
81
; NOFP16-NEXT: and w0, w20, #0xffff
82
+ ; NOFP16-NEXT: orr x21, x8, x22, lsl #32
82
83
; NOFP16-NEXT: bl __gnu_h2f_ieee
83
- ; NOFP16-NEXT: mov w8, w21
84
- ; NOFP16-NEXT: // kill: def $w0 killed $w0 def $x0
85
- ; NOFP16-NEXT: str w22, [x19, #8]
86
- ; NOFP16-NEXT: orr x8, x8, x0, lsl #32
84
+ ; NOFP16-NEXT: str x21, [x19]
87
85
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
88
- ; NOFP16-NEXT: str x8 , [x19]
86
+ ; NOFP16-NEXT: str w0 , [x19, #8 ]
89
87
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
90
88
; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
91
89
; NOFP16-NEXT: ret
@@ -182,46 +180,17 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
182
180
define void @outgoing_v4f16_return (ptr %ptr ) #0 {
183
181
; NOFP16-LABEL: outgoing_v4f16_return:
184
182
; NOFP16: // %bb.0:
185
- ; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
186
- ; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
187
- ; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
188
- ; NOFP16-NEXT: .cfi_def_cfa_offset 48
183
+ ; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
184
+ ; NOFP16-NEXT: .cfi_def_cfa_offset 16
189
185
; NOFP16-NEXT: .cfi_offset w19, -8
190
- ; NOFP16-NEXT: .cfi_offset w20, -16
191
- ; NOFP16-NEXT: .cfi_offset w21, -24
192
- ; NOFP16-NEXT: .cfi_offset w22, -32
193
- ; NOFP16-NEXT: .cfi_offset w23, -40
194
- ; NOFP16-NEXT: .cfi_offset w30, -48
186
+ ; NOFP16-NEXT: .cfi_offset w30, -16
195
187
; NOFP16-NEXT: mov x19, x0
196
188
; NOFP16-NEXT: bl v4f16_result
197
- ; NOFP16-NEXT: and w0, w0, #0xffff
198
- ; NOFP16-NEXT: mov w20, w1
199
- ; NOFP16-NEXT: mov w21, w2
200
- ; NOFP16-NEXT: mov w22, w3
201
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
202
- ; NOFP16-NEXT: mov w23, w0
203
- ; NOFP16-NEXT: and w0, w20, #0xffff
204
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
205
- ; NOFP16-NEXT: mov w20, w0
206
- ; NOFP16-NEXT: and w0, w21, #0xffff
207
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
208
- ; NOFP16-NEXT: mov w21, w0
209
- ; NOFP16-NEXT: and w0, w22, #0xffff
210
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
211
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
212
- ; NOFP16-NEXT: strh w0, [x19, #6]
213
- ; NOFP16-NEXT: mov w0, w21
214
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
215
- ; NOFP16-NEXT: strh w0, [x19, #4]
216
- ; NOFP16-NEXT: mov w0, w20
217
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
218
- ; NOFP16-NEXT: strh w0, [x19, #2]
219
- ; NOFP16-NEXT: mov w0, w23
220
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
189
+ ; NOFP16-NEXT: strh w2, [x19, #4]
190
+ ; NOFP16-NEXT: strh w3, [x19, #6]
191
+ ; NOFP16-NEXT: strh w1, [x19, #2]
221
192
; NOFP16-NEXT: strh w0, [x19]
222
- ; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
223
- ; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
224
- ; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
193
+ ; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
225
194
; NOFP16-NEXT: ret
226
195
%val = call <4 x half > @v4f16_result ()
227
196
store <4 x half > %val , ptr %ptr
@@ -231,82 +200,21 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
231
200
define void @outgoing_v8f16_return (ptr %ptr ) #0 {
232
201
; NOFP16-LABEL: outgoing_v8f16_return:
233
202
; NOFP16: // %bb.0:
234
- ; NOFP16-NEXT: stp x30, x27, [sp, #-80]! // 16-byte Folded Spill
235
- ; NOFP16-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
236
- ; NOFP16-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
237
- ; NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
238
- ; NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
239
- ; NOFP16-NEXT: .cfi_def_cfa_offset 80
203
+ ; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
204
+ ; NOFP16-NEXT: .cfi_def_cfa_offset 16
240
205
; NOFP16-NEXT: .cfi_offset w19, -8
241
- ; NOFP16-NEXT: .cfi_offset w20, -16
242
- ; NOFP16-NEXT: .cfi_offset w21, -24
243
- ; NOFP16-NEXT: .cfi_offset w22, -32
244
- ; NOFP16-NEXT: .cfi_offset w23, -40
245
- ; NOFP16-NEXT: .cfi_offset w24, -48
246
- ; NOFP16-NEXT: .cfi_offset w25, -56
247
- ; NOFP16-NEXT: .cfi_offset w26, -64
248
- ; NOFP16-NEXT: .cfi_offset w27, -72
249
- ; NOFP16-NEXT: .cfi_offset w30, -80
206
+ ; NOFP16-NEXT: .cfi_offset w30, -16
250
207
; NOFP16-NEXT: mov x19, x0
251
208
; NOFP16-NEXT: bl v8f16_result
252
- ; NOFP16-NEXT: and w0, w0, #0xffff
253
- ; NOFP16-NEXT: mov w21, w1
254
- ; NOFP16-NEXT: mov w22, w2
255
- ; NOFP16-NEXT: mov w23, w3
256
- ; NOFP16-NEXT: mov w24, w4
257
- ; NOFP16-NEXT: mov w25, w5
258
- ; NOFP16-NEXT: mov w26, w6
259
- ; NOFP16-NEXT: mov w27, w7
260
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
261
- ; NOFP16-NEXT: mov w20, w0
262
- ; NOFP16-NEXT: and w0, w21, #0xffff
263
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
264
- ; NOFP16-NEXT: mov w21, w0
265
- ; NOFP16-NEXT: and w0, w22, #0xffff
266
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
267
- ; NOFP16-NEXT: mov w22, w0
268
- ; NOFP16-NEXT: and w0, w23, #0xffff
269
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
270
- ; NOFP16-NEXT: mov w23, w0
271
- ; NOFP16-NEXT: and w0, w24, #0xffff
272
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
273
- ; NOFP16-NEXT: mov w24, w0
274
- ; NOFP16-NEXT: and w0, w25, #0xffff
275
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
276
- ; NOFP16-NEXT: mov w25, w0
277
- ; NOFP16-NEXT: and w0, w26, #0xffff
278
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
279
- ; NOFP16-NEXT: mov w26, w0
280
- ; NOFP16-NEXT: and w0, w27, #0xffff
281
- ; NOFP16-NEXT: bl __gnu_h2f_ieee
282
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
283
- ; NOFP16-NEXT: strh w0, [x19, #14]
284
- ; NOFP16-NEXT: mov w0, w26
285
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
286
- ; NOFP16-NEXT: strh w0, [x19, #12]
287
- ; NOFP16-NEXT: mov w0, w25
288
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
289
- ; NOFP16-NEXT: strh w0, [x19, #10]
290
- ; NOFP16-NEXT: mov w0, w24
291
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
292
- ; NOFP16-NEXT: strh w0, [x19, #8]
293
- ; NOFP16-NEXT: mov w0, w23
294
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
295
- ; NOFP16-NEXT: strh w0, [x19, #6]
296
- ; NOFP16-NEXT: mov w0, w22
297
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
298
- ; NOFP16-NEXT: strh w0, [x19, #4]
299
- ; NOFP16-NEXT: mov w0, w21
300
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
301
- ; NOFP16-NEXT: strh w0, [x19, #2]
302
- ; NOFP16-NEXT: mov w0, w20
303
- ; NOFP16-NEXT: bl __gnu_f2h_ieee
209
+ ; NOFP16-NEXT: strh w5, [x19, #10]
210
+ ; NOFP16-NEXT: strh w7, [x19, #14]
211
+ ; NOFP16-NEXT: strh w6, [x19, #12]
212
+ ; NOFP16-NEXT: strh w4, [x19, #8]
213
+ ; NOFP16-NEXT: strh w3, [x19, #6]
214
+ ; NOFP16-NEXT: strh w2, [x19, #4]
215
+ ; NOFP16-NEXT: strh w1, [x19, #2]
304
216
; NOFP16-NEXT: strh w0, [x19]
305
- ; NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
306
- ; NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
307
- ; NOFP16-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
308
- ; NOFP16-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
309
- ; NOFP16-NEXT: ldp x30, x27, [sp], #80 // 16-byte Folded Reload
217
+ ; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
310
218
; NOFP16-NEXT: ret
311
219
%val = call <8 x half > @v8f16_result ()
312
220
store <8 x half > %val , ptr %ptr
0 commit comments