Skip to content

Commit 56673ee

Browse files
author
v01dxyz
committed
[X86][LegalizeDAG] Legalize FPOWI (f16) with Promote
Instead of defaulting to Expand. Without that, since Expand is not implemented for FPOWI, it fallthroughs to Libcall. As there are no f16 libcalls, the program aborts when asserts are enabled.
1 parent 9f0f90f commit 56673ee

File tree

2 files changed

+12
-31
lines changed

2 files changed

+12
-31
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
614614
setOperationAction(ISD::FTAN, VT, Action);
615615
setOperationAction(ISD::FSQRT, VT, Action);
616616
setOperationAction(ISD::FPOW, VT, Action);
617+
setOperationAction(ISD::FPOWI, VT, Action);
617618
setOperationAction(ISD::FLOG, VT, Action);
618619
setOperationAction(ISD::FLOG2, VT, Action);
619620
setOperationAction(ISD::FLOG10, VT, Action);

llvm/test/CodeGen/X86/fp16-libcalls.ll

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -932,12 +932,9 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
932932
; F16C-NEXT: pushq %rbx
933933
; F16C-NEXT: movq %rsi, %rbx
934934
; F16C-NEXT: vpextrw $0, %xmm0, %eax
935-
; F16C-NEXT: vcvtsi2ss %edi, %xmm1, %xmm0
936-
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
937-
; F16C-NEXT: vcvtph2ps %xmm0, %xmm1
938935
; F16C-NEXT: vmovd %eax, %xmm0
939936
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
940-
; F16C-NEXT: callq powf@PLT
937+
; F16C-NEXT: callq __powisf2@PLT
941938
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
942939
; F16C-NEXT: vmovd %xmm0, %eax
943940
; F16C-NEXT: movw %ax, (%rbx)
@@ -949,9 +946,7 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
949946
; FP16-NEXT: pushq %rbx
950947
; FP16-NEXT: movq %rsi, %rbx
951948
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
952-
; FP16-NEXT: vcvtsi2sh %edi, %xmm1, %xmm1
953-
; FP16-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
954-
; FP16-NEXT: callq powf@PLT
949+
; FP16-NEXT: callq __powisf2@PLT
955950
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
956951
; FP16-NEXT: vmovsh %xmm0, (%rbx)
957952
; FP16-NEXT: popq %rbx
@@ -965,15 +960,8 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
965960
; X64-NEXT: movq %rsi, %rbx
966961
; X64-NEXT: movl %edi, %ebp
967962
; X64-NEXT: callq __extendhfsf2@PLT
968-
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
969-
; X64-NEXT: xorps %xmm0, %xmm0
970-
; X64-NEXT: cvtsi2ss %ebp, %xmm0
971-
; X64-NEXT: callq __truncsfhf2@PLT
972-
; X64-NEXT: callq __extendhfsf2@PLT
973-
; X64-NEXT: movaps %xmm0, %xmm1
974-
; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
975-
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
976-
; X64-NEXT: callq powf@PLT
963+
; X64-NEXT: movl %ebp, %edi
964+
; X64-NEXT: callq __powisf2@PLT
977965
; X64-NEXT: callq __truncsfhf2@PLT
978966
; X64-NEXT: pextrw $0, %xmm0, %eax
979967
; X64-NEXT: movw %ax, (%rbx)
@@ -984,33 +972,25 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
984972
;
985973
; X86-LABEL: test_half_powi:
986974
; X86: # %bb.0:
975+
; X86-NEXT: pushl %edi
987976
; X86-NEXT: pushl %esi
988-
; X86-NEXT: subl $56, %esp
977+
; X86-NEXT: subl $20, %esp
989978
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
990-
; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
991979
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
992-
; X86-NEXT: xorps %xmm0, %xmm0
993-
; X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
994-
; X86-NEXT: movss %xmm0, (%esp)
995-
; X86-NEXT: calll __truncsfhf2
996-
; X86-NEXT: pextrw $0, %xmm0, %eax
997-
; X86-NEXT: movw %ax, (%esp)
998-
; X86-NEXT: calll __extendhfsf2
999-
; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1000-
; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
980+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1001981
; X86-NEXT: pextrw $0, %xmm0, %eax
1002982
; X86-NEXT: movw %ax, (%esp)
1003983
; X86-NEXT: calll __extendhfsf2
984+
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
1004985
; X86-NEXT: fstps (%esp)
1005-
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1006-
; X86-NEXT: fstps {{[0-9]+}}(%esp)
1007-
; X86-NEXT: calll powf
986+
; X86-NEXT: calll __powisf2
1008987
; X86-NEXT: fstps (%esp)
1009988
; X86-NEXT: calll __truncsfhf2
1010989
; X86-NEXT: pextrw $0, %xmm0, %eax
1011990
; X86-NEXT: movw %ax, (%esi)
1012-
; X86-NEXT: addl $56, %esp
991+
; X86-NEXT: addl $20, %esp
1013992
; X86-NEXT: popl %esi
993+
; X86-NEXT: popl %edi
1014994
; X86-NEXT: retl
1015995
%res = call half @llvm.powi.half(half %a0, i32 %a1)
1016996
store half %res, ptr %p0, align 2

0 commit comments

Comments
 (0)