@@ -932,12 +932,9 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
932
932
; F16C-NEXT: pushq %rbx
933
933
; F16C-NEXT: movq %rsi, %rbx
934
934
; F16C-NEXT: vpextrw $0, %xmm0, %eax
935
- ; F16C-NEXT: vcvtsi2ss %edi, %xmm1, %xmm0
936
- ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
937
- ; F16C-NEXT: vcvtph2ps %xmm0, %xmm1
938
935
; F16C-NEXT: vmovd %eax, %xmm0
939
936
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
940
- ; F16C-NEXT: callq powf @PLT
937
+ ; F16C-NEXT: callq __powisf2 @PLT
941
938
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
942
939
; F16C-NEXT: vmovd %xmm0, %eax
943
940
; F16C-NEXT: movw %ax, (%rbx)
@@ -949,9 +946,7 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
949
946
; FP16-NEXT: pushq %rbx
950
947
; FP16-NEXT: movq %rsi, %rbx
951
948
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
952
- ; FP16-NEXT: vcvtsi2sh %edi, %xmm1, %xmm1
953
- ; FP16-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
954
- ; FP16-NEXT: callq powf@PLT
949
+ ; FP16-NEXT: callq __powisf2@PLT
955
950
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
956
951
; FP16-NEXT: vmovsh %xmm0, (%rbx)
957
952
; FP16-NEXT: popq %rbx
@@ -965,15 +960,8 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
965
960
; X64-NEXT: movq %rsi, %rbx
966
961
; X64-NEXT: movl %edi, %ebp
967
962
; X64-NEXT: callq __extendhfsf2@PLT
968
- ; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
969
- ; X64-NEXT: xorps %xmm0, %xmm0
970
- ; X64-NEXT: cvtsi2ss %ebp, %xmm0
971
- ; X64-NEXT: callq __truncsfhf2@PLT
972
- ; X64-NEXT: callq __extendhfsf2@PLT
973
- ; X64-NEXT: movaps %xmm0, %xmm1
974
- ; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
975
- ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
976
- ; X64-NEXT: callq powf@PLT
963
+ ; X64-NEXT: movl %ebp, %edi
964
+ ; X64-NEXT: callq __powisf2@PLT
977
965
; X64-NEXT: callq __truncsfhf2@PLT
978
966
; X64-NEXT: pextrw $0, %xmm0, %eax
979
967
; X64-NEXT: movw %ax, (%rbx)
@@ -984,33 +972,25 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
984
972
;
985
973
; X86-LABEL: test_half_powi:
986
974
; X86: # %bb.0:
975
+ ; X86-NEXT: pushl %edi
987
976
; X86-NEXT: pushl %esi
988
- ; X86-NEXT: subl $56 , %esp
977
+ ; X86-NEXT: subl $20 , %esp
989
978
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
990
- ; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
991
979
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
992
- ; X86-NEXT: xorps %xmm0, %xmm0
993
- ; X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
994
- ; X86-NEXT: movss %xmm0, (%esp)
995
- ; X86-NEXT: calll __truncsfhf2
996
- ; X86-NEXT: pextrw $0, %xmm0, %eax
997
- ; X86-NEXT: movw %ax, (%esp)
998
- ; X86-NEXT: calll __extendhfsf2
999
- ; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
1000
- ; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
980
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1001
981
; X86-NEXT: pextrw $0, %xmm0, %eax
1002
982
; X86-NEXT: movw %ax, (%esp)
1003
983
; X86-NEXT: calll __extendhfsf2
984
+ ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
1004
985
; X86-NEXT: fstps (%esp)
1005
- ; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
1006
- ; X86-NEXT: fstps {{[0-9]+}}(%esp)
1007
- ; X86-NEXT: calll powf
986
+ ; X86-NEXT: calll __powisf2
1008
987
; X86-NEXT: fstps (%esp)
1009
988
; X86-NEXT: calll __truncsfhf2
1010
989
; X86-NEXT: pextrw $0, %xmm0, %eax
1011
990
; X86-NEXT: movw %ax, (%esi)
1012
- ; X86-NEXT: addl $56 , %esp
991
+ ; X86-NEXT: addl $20 , %esp
1013
992
; X86-NEXT: popl %esi
993
+ ; X86-NEXT: popl %edi
1014
994
; X86-NEXT: retl
1015
995
%res = call half @llvm.powi.half (half %a0 , i32 %a1 )
1016
996
store half %res , ptr %p0 , align 2
0 commit comments