Skip to content

[AArch64] Switch to soft promoting half types. #80576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1308,6 +1308,8 @@ class AArch64TargetLowering : public TargetLowering {
bool preferScalarizeSplat(SDNode *N) const override;

unsigned getMinimumJumpTableEntries() const override;

bool softPromoteHalfType() const override { return true; }
};

namespace AArch64 {
Expand Down
140 changes: 24 additions & 116 deletions llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,20 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
; NOFP16-NEXT: .cfi_offset w22, -32
; NOFP16-NEXT: .cfi_offset w30, -48
; NOFP16-NEXT: mov w21, w0
; NOFP16-NEXT: and w0, w2, #0xffff
; NOFP16-NEXT: and w0, w1, #0xffff
; NOFP16-NEXT: mov x19, x3
; NOFP16-NEXT: mov w20, w1
; NOFP16-NEXT: mov w20, w2
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w22, w0
; NOFP16-NEXT: and w0, w21, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w21, w0
; NOFP16-NEXT: mov w8, w0
; NOFP16-NEXT: and w0, w20, #0xffff
; NOFP16-NEXT: orr x21, x8, x22, lsl #32
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w8, w21
; NOFP16-NEXT: // kill: def $w0 killed $w0 def $x0
; NOFP16-NEXT: str w22, [x19, #8]
; NOFP16-NEXT: orr x8, x8, x0, lsl #32
; NOFP16-NEXT: str x21, [x19]
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
; NOFP16-NEXT: str x8, [x19]
; NOFP16-NEXT: str w0, [x19, #8]
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; NOFP16-NEXT: ret
Expand Down Expand Up @@ -182,46 +180,17 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
define void @outgoing_v4f16_return(ptr %ptr) #0 {
; NOFP16-LABEL: outgoing_v4f16_return:
; NOFP16: // %bb.0:
; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; NOFP16-NEXT: .cfi_def_cfa_offset 48
; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; NOFP16-NEXT: .cfi_def_cfa_offset 16
; NOFP16-NEXT: .cfi_offset w19, -8
; NOFP16-NEXT: .cfi_offset w20, -16
; NOFP16-NEXT: .cfi_offset w21, -24
; NOFP16-NEXT: .cfi_offset w22, -32
; NOFP16-NEXT: .cfi_offset w23, -40
; NOFP16-NEXT: .cfi_offset w30, -48
; NOFP16-NEXT: .cfi_offset w30, -16
; NOFP16-NEXT: mov x19, x0
; NOFP16-NEXT: bl v4f16_result
; NOFP16-NEXT: and w0, w0, #0xffff
; NOFP16-NEXT: mov w20, w1
; NOFP16-NEXT: mov w21, w2
; NOFP16-NEXT: mov w22, w3
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w23, w0
; NOFP16-NEXT: and w0, w20, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w20, w0
; NOFP16-NEXT: and w0, w21, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w21, w0
; NOFP16-NEXT: and w0, w22, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #6]
; NOFP16-NEXT: mov w0, w21
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #4]
; NOFP16-NEXT: mov w0, w20
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #2]
; NOFP16-NEXT: mov w0, w23
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w2, [x19, #4]
; NOFP16-NEXT: strh w3, [x19, #6]
; NOFP16-NEXT: strh w1, [x19, #2]
; NOFP16-NEXT: strh w0, [x19]
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; NOFP16-NEXT: ret
%val = call <4 x half> @v4f16_result()
store <4 x half> %val, ptr %ptr
Expand All @@ -231,82 +200,21 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
define void @outgoing_v8f16_return(ptr %ptr) #0 {
; NOFP16-LABEL: outgoing_v8f16_return:
; NOFP16: // %bb.0:
; NOFP16-NEXT: stp x30, x27, [sp, #-80]! // 16-byte Folded Spill
; NOFP16-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
; NOFP16-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
; NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
; NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
; NOFP16-NEXT: .cfi_def_cfa_offset 80
; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; NOFP16-NEXT: .cfi_def_cfa_offset 16
; NOFP16-NEXT: .cfi_offset w19, -8
; NOFP16-NEXT: .cfi_offset w20, -16
; NOFP16-NEXT: .cfi_offset w21, -24
; NOFP16-NEXT: .cfi_offset w22, -32
; NOFP16-NEXT: .cfi_offset w23, -40
; NOFP16-NEXT: .cfi_offset w24, -48
; NOFP16-NEXT: .cfi_offset w25, -56
; NOFP16-NEXT: .cfi_offset w26, -64
; NOFP16-NEXT: .cfi_offset w27, -72
; NOFP16-NEXT: .cfi_offset w30, -80
; NOFP16-NEXT: .cfi_offset w30, -16
; NOFP16-NEXT: mov x19, x0
; NOFP16-NEXT: bl v8f16_result
; NOFP16-NEXT: and w0, w0, #0xffff
; NOFP16-NEXT: mov w21, w1
; NOFP16-NEXT: mov w22, w2
; NOFP16-NEXT: mov w23, w3
; NOFP16-NEXT: mov w24, w4
; NOFP16-NEXT: mov w25, w5
; NOFP16-NEXT: mov w26, w6
; NOFP16-NEXT: mov w27, w7
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w20, w0
; NOFP16-NEXT: and w0, w21, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w21, w0
; NOFP16-NEXT: and w0, w22, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w22, w0
; NOFP16-NEXT: and w0, w23, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w23, w0
; NOFP16-NEXT: and w0, w24, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w24, w0
; NOFP16-NEXT: and w0, w25, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w25, w0
; NOFP16-NEXT: and w0, w26, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w26, w0
; NOFP16-NEXT: and w0, w27, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #14]
; NOFP16-NEXT: mov w0, w26
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #12]
; NOFP16-NEXT: mov w0, w25
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #10]
; NOFP16-NEXT: mov w0, w24
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #8]
; NOFP16-NEXT: mov w0, w23
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #6]
; NOFP16-NEXT: mov w0, w22
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #4]
; NOFP16-NEXT: mov w0, w21
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w0, [x19, #2]
; NOFP16-NEXT: mov w0, w20
; NOFP16-NEXT: bl __gnu_f2h_ieee
; NOFP16-NEXT: strh w5, [x19, #10]
; NOFP16-NEXT: strh w7, [x19, #14]
; NOFP16-NEXT: strh w6, [x19, #12]
; NOFP16-NEXT: strh w4, [x19, #8]
; NOFP16-NEXT: strh w3, [x19, #6]
; NOFP16-NEXT: strh w2, [x19, #4]
; NOFP16-NEXT: strh w1, [x19, #2]
; NOFP16-NEXT: strh w0, [x19]
; NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
; NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
; NOFP16-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
; NOFP16-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
; NOFP16-NEXT: ldp x30, x27, [sp], #80 // 16-byte Folded Reload
; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; NOFP16-NEXT: ret
%val = call <8 x half> @v8f16_result()
store <8 x half> %val, ptr %ptr
Expand Down