-
Notifications
You must be signed in to change notification settings - Fork 18k
/
Copy pathexp_loong64.s
236 lines (206 loc) · 5.34 KB
/
exp_loong64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
#define NearZero 0x3e30000000000000 // 2**-28
#define PosInf 0x7ff0000000000000
#define FracMask 0x000fffffffffffff
#define C1 0x3cb0000000000000 // 2**-52
DATA exprodata<>+0(SB)/8, $0.0
DATA exprodata<>+8(SB)/8, $0.5
DATA exprodata<>+16(SB)/8, $1.0
DATA exprodata<>+24(SB)/8, $2.0
DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi
DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo
DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e
DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow
DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow
DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2
DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2
DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero
GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96
DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1
DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2
DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3
DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4
DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5
GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40
// Exp returns e**x, the base-e exponential of x.
// This is an assembly implementation of the method used for function Exp in file exp.go.
//
// func Exp(x float64) float64
TEXT ·archExp(SB),$0-16
MOVD x+0(FP), F0 // F0 = x
MOVV $exprodata<>+0(SB), R10
MOVD 56(R10), F1 // Overflow
MOVD 64(R10), F2 // Underflow
MOVD 88(R10), F3 // NearZero
MOVD 16(R10), F17 // 1.0
CMPEQD F0, F0, FCC0
BFPF isNaN // x = NaN, return NaN
CMPGTD F0, F1, FCC0
BFPT overflow // x > Overflow, return PosInf
CMPGTD F2, F0, FCC0
BFPT underflow // x < Underflow, return 0
ABSD F0, F5
CMPGTD F3, F5, FCC0
BFPT nearzero // fabs(x) < NearZero, return 1 + x
// argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2
// computed as r = hi - lo for extra precision.
MOVD 0(R10), F5
MOVD 8(R10), F3
MOVD 48(R10), F2
CMPGTD F0, F5, FCC0
BFPT add // x > 0
sub:
FMSUBD F3, F2, F0, F3 // Log2e*x - 0.5
JMP 2(PC)
add:
FMADDD F3, F2, F0, F3 // Log2e*x + 0.5
FTINTRZVD F3, F4 // float64 -> int64
MOVV F4, R5 // R5 = int(k)
FFINTDV F4, F3 // int64 -> float64
MOVD 32(R10), F4
MOVD 40(R10), F5
FNMSUBD F0, F3, F4, F4
MULD F3, F5, F5
SUBD F5, F4, F6
MULD F6, F6, F7
// compute c
MOVV $expmultirodata<>+0(SB), R11
MOVD 32(R11), F8
MOVD 24(R11), F9
FMADDD F9, F8, F7, F13
MOVD 16(R11), F10
FMADDD F10, F13, F7, F13
MOVD 8(R11), F11
FMADDD F11, F13, F7, F13
MOVD 0(R11), F12
FMADDD F12, F13, F7, F13
FNMSUBD F6, F13, F7, F13
// compute y
MOVD 24(R10), F14
SUBD F13, F14, F14
MULD F6, F13, F15
DIVD F14, F15, F15
SUBD F15, F5, F15
SUBD F4, F15, F15
SUBD F15, F17, F16
// inline Ldexp(y, k), benefit:
// 1, no parameter pass overhead.
// 2, skip unnecessary checks for Inf/NaN/Zero
MOVV F16, R4
MOVV $FracMask, R9
AND R9, R4, R6 // fraction
SRLV $52, R4, R7 // exponent
ADDV R5, R7
MOVV $1, R12
BGE R7, R12, normal
ADDV $52, R7 // denormal
MOVV $C1, R8
MOVV R8, F17
normal:
SLLV $52, R7
OR R7, R6, R4
MOVV R4, F0
MULD F17, F0 // return m * x
MOVD F0, ret+8(FP)
RET
nearzero:
ADDD F17, F0, F0
isNaN:
MOVD F0, ret+8(FP)
RET
underflow:
MOVV R0, ret+8(FP)
RET
overflow:
MOVV $PosInf, R4
MOVV R4, ret+8(FP)
RET
// Exp2 returns 2**x, the base-2 exponential of x.
// This is an assembly implementation of the method used for function Exp2 in file exp.go.
//
// func Exp2(x float64) float64
TEXT ·archExp2(SB),$0-16
MOVD x+0(FP), F0 // F0 = x
MOVV $exprodata<>+0(SB), R10
MOVD 72(R10), F1 // Overflow2
MOVD 80(R10), F2 // Underflow2
MOVD 88(R10), F3 // NearZero
CMPEQD F0, F0, FCC0
BFPF isNaN // x = NaN, return NaN
CMPGTD F0, F1, FCC0
BFPT overflow // x > Overflow, return PosInf
CMPGTD F2, F0, FCC0
BFPT underflow // x < Underflow, return 0
// argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
// computed as r = hi - lo for extra precision.
MOVD 0(R10), F10
MOVD 8(R10), F2
CMPGTD F0, F10, FCC0
BFPT add
sub:
SUBD F2, F0, F3 // x - 0.5
JMP 2(PC)
add:
ADDD F2, F0, F3 // x + 0.5
FTINTRZVD F3, F4
MOVV F4, R5
FFINTDV F4, F3
MOVD 32(R10), F4
MOVD 40(R10), F5
SUBD F3, F0, F3
MULD F3, F4
FNMSUBD F10, F3, F5, F5
SUBD F5, F4, F6
MULD F6, F6, F7
// compute c
MOVV $expmultirodata<>+0(SB), R11
MOVD 32(R11), F8
MOVD 24(R11), F9
FMADDD F9, F8, F7, F13
MOVD 16(R11), F10
FMADDD F10, F13, F7, F13
MOVD 8(R11), F11
FMADDD F11, F13, F7, F13
MOVD 0(R11), F12
FMADDD F12, F13, F7, F13
FNMSUBD F6, F13, F7, F13
// compute y
MOVD 24(R10), F14
SUBD F13, F14, F14
MULD F6, F13, F15
DIVD F14, F15
MOVD 16(R10), F17
SUBD F15, F5, F15
SUBD F4, F15, F15
SUBD F15, F17, F16
// inline Ldexp(y, k), benefit:
// 1, no parameter pass overhead.
// 2, skip unnecessary checks for Inf/NaN/Zero
MOVV F16, R4
MOVV $FracMask, R9
SRLV $52, R4, R7 // exponent
AND R9, R4, R6 // fraction
ADDV R5, R7
MOVV $1, R12
BGE R7, R12, normal
ADDV $52, R7 // denormal
MOVV $C1, R8
MOVV R8, F17
normal:
SLLV $52, R7
OR R7, R6, R4
MOVV R4, F0
MULD F17, F0
isNaN:
MOVD F0, ret+8(FP)
RET
underflow:
MOVV R0, ret+8(FP)
RET
overflow:
MOVV $PosInf, R4
MOVV R4, ret+8(FP)
RET