Skip to content

Commit d563b51

Browse files
committed
[AArch64] Switch to soft promoting half types.
The traditional promotion is known to generate wrong code.
1 parent 1da2921 commit d563b51

File tree

2 files changed

+26
-116
lines changed

2 files changed

+26
-116
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,6 +1308,8 @@ class AArch64TargetLowering : public TargetLowering {
13081308
bool preferScalarizeSplat(SDNode *N) const override;
13091309

13101310
unsigned getMinimumJumpTableEntries() const override;
1311+
1312+
bool softPromoteHalfType() const override { return true; }
13111313
};
13121314

13131315
namespace AArch64 {

llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll

Lines changed: 24 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -70,22 +70,20 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
7070
; NOFP16-NEXT: .cfi_offset w22, -32
7171
; NOFP16-NEXT: .cfi_offset w30, -48
7272
; NOFP16-NEXT: mov w21, w0
73-
; NOFP16-NEXT: and w0, w2, #0xffff
73+
; NOFP16-NEXT: and w0, w1, #0xffff
7474
; NOFP16-NEXT: mov x19, x3
75-
; NOFP16-NEXT: mov w20, w1
75+
; NOFP16-NEXT: mov w20, w2
7676
; NOFP16-NEXT: bl __gnu_h2f_ieee
7777
; NOFP16-NEXT: mov w22, w0
7878
; NOFP16-NEXT: and w0, w21, #0xffff
7979
; NOFP16-NEXT: bl __gnu_h2f_ieee
80-
; NOFP16-NEXT: mov w21, w0
80+
; NOFP16-NEXT: mov w8, w0
8181
; NOFP16-NEXT: and w0, w20, #0xffff
82+
; NOFP16-NEXT: orr x21, x8, x22, lsl #32
8283
; NOFP16-NEXT: bl __gnu_h2f_ieee
83-
; NOFP16-NEXT: mov w8, w21
84-
; NOFP16-NEXT: // kill: def $w0 killed $w0 def $x0
85-
; NOFP16-NEXT: str w22, [x19, #8]
86-
; NOFP16-NEXT: orr x8, x8, x0, lsl #32
84+
; NOFP16-NEXT: str x21, [x19]
8785
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
88-
; NOFP16-NEXT: str x8, [x19]
86+
; NOFP16-NEXT: str w0, [x19, #8]
8987
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
9088
; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
9189
; NOFP16-NEXT: ret
@@ -182,46 +180,17 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
182180
define void @outgoing_v4f16_return(ptr %ptr) #0 {
183181
; NOFP16-LABEL: outgoing_v4f16_return:
184182
; NOFP16: // %bb.0:
185-
; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
186-
; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
187-
; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
188-
; NOFP16-NEXT: .cfi_def_cfa_offset 48
183+
; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
184+
; NOFP16-NEXT: .cfi_def_cfa_offset 16
189185
; NOFP16-NEXT: .cfi_offset w19, -8
190-
; NOFP16-NEXT: .cfi_offset w20, -16
191-
; NOFP16-NEXT: .cfi_offset w21, -24
192-
; NOFP16-NEXT: .cfi_offset w22, -32
193-
; NOFP16-NEXT: .cfi_offset w23, -40
194-
; NOFP16-NEXT: .cfi_offset w30, -48
186+
; NOFP16-NEXT: .cfi_offset w30, -16
195187
; NOFP16-NEXT: mov x19, x0
196188
; NOFP16-NEXT: bl v4f16_result
197-
; NOFP16-NEXT: and w0, w0, #0xffff
198-
; NOFP16-NEXT: mov w20, w1
199-
; NOFP16-NEXT: mov w21, w2
200-
; NOFP16-NEXT: mov w22, w3
201-
; NOFP16-NEXT: bl __gnu_h2f_ieee
202-
; NOFP16-NEXT: mov w23, w0
203-
; NOFP16-NEXT: and w0, w20, #0xffff
204-
; NOFP16-NEXT: bl __gnu_h2f_ieee
205-
; NOFP16-NEXT: mov w20, w0
206-
; NOFP16-NEXT: and w0, w21, #0xffff
207-
; NOFP16-NEXT: bl __gnu_h2f_ieee
208-
; NOFP16-NEXT: mov w21, w0
209-
; NOFP16-NEXT: and w0, w22, #0xffff
210-
; NOFP16-NEXT: bl __gnu_h2f_ieee
211-
; NOFP16-NEXT: bl __gnu_f2h_ieee
212-
; NOFP16-NEXT: strh w0, [x19, #6]
213-
; NOFP16-NEXT: mov w0, w21
214-
; NOFP16-NEXT: bl __gnu_f2h_ieee
215-
; NOFP16-NEXT: strh w0, [x19, #4]
216-
; NOFP16-NEXT: mov w0, w20
217-
; NOFP16-NEXT: bl __gnu_f2h_ieee
218-
; NOFP16-NEXT: strh w0, [x19, #2]
219-
; NOFP16-NEXT: mov w0, w23
220-
; NOFP16-NEXT: bl __gnu_f2h_ieee
189+
; NOFP16-NEXT: strh w2, [x19, #4]
190+
; NOFP16-NEXT: strh w3, [x19, #6]
191+
; NOFP16-NEXT: strh w1, [x19, #2]
221192
; NOFP16-NEXT: strh w0, [x19]
222-
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
223-
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
224-
; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
193+
; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
225194
; NOFP16-NEXT: ret
226195
%val = call <4 x half> @v4f16_result()
227196
store <4 x half> %val, ptr %ptr
@@ -231,82 +200,21 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
231200
define void @outgoing_v8f16_return(ptr %ptr) #0 {
232201
; NOFP16-LABEL: outgoing_v8f16_return:
233202
; NOFP16: // %bb.0:
234-
; NOFP16-NEXT: stp x30, x27, [sp, #-80]! // 16-byte Folded Spill
235-
; NOFP16-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
236-
; NOFP16-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
237-
; NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
238-
; NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
239-
; NOFP16-NEXT: .cfi_def_cfa_offset 80
203+
; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
204+
; NOFP16-NEXT: .cfi_def_cfa_offset 16
240205
; NOFP16-NEXT: .cfi_offset w19, -8
241-
; NOFP16-NEXT: .cfi_offset w20, -16
242-
; NOFP16-NEXT: .cfi_offset w21, -24
243-
; NOFP16-NEXT: .cfi_offset w22, -32
244-
; NOFP16-NEXT: .cfi_offset w23, -40
245-
; NOFP16-NEXT: .cfi_offset w24, -48
246-
; NOFP16-NEXT: .cfi_offset w25, -56
247-
; NOFP16-NEXT: .cfi_offset w26, -64
248-
; NOFP16-NEXT: .cfi_offset w27, -72
249-
; NOFP16-NEXT: .cfi_offset w30, -80
206+
; NOFP16-NEXT: .cfi_offset w30, -16
250207
; NOFP16-NEXT: mov x19, x0
251208
; NOFP16-NEXT: bl v8f16_result
252-
; NOFP16-NEXT: and w0, w0, #0xffff
253-
; NOFP16-NEXT: mov w21, w1
254-
; NOFP16-NEXT: mov w22, w2
255-
; NOFP16-NEXT: mov w23, w3
256-
; NOFP16-NEXT: mov w24, w4
257-
; NOFP16-NEXT: mov w25, w5
258-
; NOFP16-NEXT: mov w26, w6
259-
; NOFP16-NEXT: mov w27, w7
260-
; NOFP16-NEXT: bl __gnu_h2f_ieee
261-
; NOFP16-NEXT: mov w20, w0
262-
; NOFP16-NEXT: and w0, w21, #0xffff
263-
; NOFP16-NEXT: bl __gnu_h2f_ieee
264-
; NOFP16-NEXT: mov w21, w0
265-
; NOFP16-NEXT: and w0, w22, #0xffff
266-
; NOFP16-NEXT: bl __gnu_h2f_ieee
267-
; NOFP16-NEXT: mov w22, w0
268-
; NOFP16-NEXT: and w0, w23, #0xffff
269-
; NOFP16-NEXT: bl __gnu_h2f_ieee
270-
; NOFP16-NEXT: mov w23, w0
271-
; NOFP16-NEXT: and w0, w24, #0xffff
272-
; NOFP16-NEXT: bl __gnu_h2f_ieee
273-
; NOFP16-NEXT: mov w24, w0
274-
; NOFP16-NEXT: and w0, w25, #0xffff
275-
; NOFP16-NEXT: bl __gnu_h2f_ieee
276-
; NOFP16-NEXT: mov w25, w0
277-
; NOFP16-NEXT: and w0, w26, #0xffff
278-
; NOFP16-NEXT: bl __gnu_h2f_ieee
279-
; NOFP16-NEXT: mov w26, w0
280-
; NOFP16-NEXT: and w0, w27, #0xffff
281-
; NOFP16-NEXT: bl __gnu_h2f_ieee
282-
; NOFP16-NEXT: bl __gnu_f2h_ieee
283-
; NOFP16-NEXT: strh w0, [x19, #14]
284-
; NOFP16-NEXT: mov w0, w26
285-
; NOFP16-NEXT: bl __gnu_f2h_ieee
286-
; NOFP16-NEXT: strh w0, [x19, #12]
287-
; NOFP16-NEXT: mov w0, w25
288-
; NOFP16-NEXT: bl __gnu_f2h_ieee
289-
; NOFP16-NEXT: strh w0, [x19, #10]
290-
; NOFP16-NEXT: mov w0, w24
291-
; NOFP16-NEXT: bl __gnu_f2h_ieee
292-
; NOFP16-NEXT: strh w0, [x19, #8]
293-
; NOFP16-NEXT: mov w0, w23
294-
; NOFP16-NEXT: bl __gnu_f2h_ieee
295-
; NOFP16-NEXT: strh w0, [x19, #6]
296-
; NOFP16-NEXT: mov w0, w22
297-
; NOFP16-NEXT: bl __gnu_f2h_ieee
298-
; NOFP16-NEXT: strh w0, [x19, #4]
299-
; NOFP16-NEXT: mov w0, w21
300-
; NOFP16-NEXT: bl __gnu_f2h_ieee
301-
; NOFP16-NEXT: strh w0, [x19, #2]
302-
; NOFP16-NEXT: mov w0, w20
303-
; NOFP16-NEXT: bl __gnu_f2h_ieee
209+
; NOFP16-NEXT: strh w5, [x19, #10]
210+
; NOFP16-NEXT: strh w7, [x19, #14]
211+
; NOFP16-NEXT: strh w6, [x19, #12]
212+
; NOFP16-NEXT: strh w4, [x19, #8]
213+
; NOFP16-NEXT: strh w3, [x19, #6]
214+
; NOFP16-NEXT: strh w2, [x19, #4]
215+
; NOFP16-NEXT: strh w1, [x19, #2]
304216
; NOFP16-NEXT: strh w0, [x19]
305-
; NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
306-
; NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
307-
; NOFP16-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
308-
; NOFP16-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
309-
; NOFP16-NEXT: ldp x30, x27, [sp], #80 // 16-byte Folded Reload
217+
; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
310218
; NOFP16-NEXT: ret
311219
%val = call <8 x half> @v8f16_result()
312220
store <8 x half> %val, ptr %ptr

0 commit comments

Comments
 (0)