Skip to content

Commit 529c558

Browse files
committed
(u)lltod can now raise FE_INEXACT (disabled by default). Removed (u)itod for now
1 parent 8313728 commit 529c558

File tree

4 files changed

+204
-123
lines changed

4 files changed

+204
-123
lines changed

src/crt/ltod.src

Lines changed: 62 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
assume adl=1
22

3+
__lltod_signal_FE_INEXACT := 0
4+
35
;-------------------------------------------------------------------------------
46

57
section .text
68

79
public __ulltod
810
; (long double)unsigned long long
911
__ulltod:
10-
cp a, a ; set Z flag
12+
cp a, a ; set Z flag
1113
push af
1214
jq __lltod_common
1315

@@ -31,7 +33,7 @@ __lltod:
3133
private __lltod_common
3234
__lltod_common:
3335
call __llctlz
34-
sub a, 63 ; normalize clz_result
36+
sub a, 63 ; normalize clz_result
3537
; filter out exponent of $000 (zero) and $3FF (one)
3638
jr nc, __int_to_f64_zero_or_one
3739
; A is [-63, -1]
@@ -49,54 +51,71 @@ __lltod_common:
4951
push hl
5052
push bc
5153
ld b, a
52-
ld c, 1
54+
ld c, a
55+
xor a, a
5356
.shift_loop:
54-
jr nc, .no_carry
55-
inc c
56-
.no_carry:
57+
adc a, 0
5758
srl h
5859
rr l
5960
djnz .shift_loop
60-
; test round bit
61+
; round upwards to even if (round && (guard || sticky))
6162
jr nc, .no_round
62-
; test sticky bits
63-
dec c
63+
; we must ensure that FE_INEXACT is raised since rounding has occured
64+
or a, a ; test sticky bits
6465
jr nz, .round_up
65-
; test guard bit
66-
bit 0, l
67-
jr nc, .no_round
66+
inc a ; ld a, 1
67+
and a, l ; test guard bit
68+
jr z, .no_round_inexact
6869
.round_up:
69-
inc b ; round up after shifting
70+
inc b ; round up after shifting
7071
.no_round:
72+
if __lltod_signal_FE_INEXACT
73+
adc a, a ; test sticky and round bits
74+
jr z, .result_is_exact
75+
.no_round_inexact:
76+
ld hl, ___fe_cur_env
77+
set 5, (hl) ; FE_INEXACT
78+
.result_is_exact:
79+
else
80+
.no_round_inexact:
81+
end if
7182
ld h, b
83+
ld a, c
84+
ld l, c
7285
pop bc
7386

74-
ld l, a
7587
ex (sp), hl ; (SP) = shift
7688
call __llshru
77-
ex (sp), hl ; (SP) = shifted HL, H = rounding, L = shift
78-
add a, 51
79-
80-
dec h
81-
push af
82-
; exponent = ($400 + (base2_logarithm - 1)) << 4
83-
; BC = $4EEM
84-
ld l, a
85-
ld h, $04
86-
; clear the implicit mantissa bit
87-
res 4, c ; 52 % 8 == 4
88-
add hl, hl
89-
add hl, hl
90-
add hl, hl
91-
add hl, hl
92-
ld a, l
93-
or a, c
89+
add a, 51 - 1 ; compensate for the implicit mantissa bit
90+
91+
; BC/exponent = [$434*, $43E*]
92+
add a, a
93+
add a, a
94+
add a, a
95+
add a, a
96+
add a, c
9497
ld c, a
95-
ld b, h
96-
pop af
97-
pop hl ; restore shifted HL
98-
call z, __lladd_1 ; round up to even
98+
pop af ; A = rounding
99+
or a, a ; NZ = round-up, Z = no-round
100+
ld b, $43
101+
if 0
102+
jr z, __int_to_f64_shl.no_round
103+
; inlined __lladd_1
104+
inc hl
105+
add hl, de
106+
or a, a
107+
sbc hl, de
108+
jr nz, __int_to_f64_shl.finish
109+
inc de
110+
sbc hl, de
111+
add hl, de
112+
jr nz, __int_to_f64_shl.finish
113+
inc bc
99114
jr __int_to_f64_shl.finish
115+
else
116+
call nz, __lladd_1 ; round up to even
117+
jr __int_to_f64_shl.finish
118+
end if
100119

101120
;-------------------------------------------------------------------------------
102121

@@ -110,45 +129,20 @@ __int_to_f64_zero_or_one:
110129
ld c, h
111130
jr nz, .ret_zero
112131
ld bc, $3FF0
113-
dec hl ; ld hl, 0
132+
dec hl ; ld hl, 0
114133
.ret_zero:
115134
ex de, hl
116135
sbc hl, hl
117136
jr __int_to_f64_shl.finish
118137

119-
;-------------------------------------------------------------------------------
120-
121-
section .text
122-
123-
public __itod
124-
; (long double)int
125-
__itod:
126-
push hl
127-
add hl, hl ; extract signbit
128-
sbc hl, hl ; set Z flag
129-
ld e, l ; sign extend UHL to E:UHL
130-
pop hl
131-
jq __ltod
132-
133-
;-------------------------------------------------------------------------------
134-
135-
section .text
136-
137-
public __utod
138-
; (long double)unsigned int
139-
__utod:
140-
ld e, 0
141-
142-
require __ultod
143-
144138
;-------------------------------------------------------------------------------
145139

146140
section .text
147141

148142
public __ultod
149143
; (long double)unsigned long
150144
__ultod:
151-
cp a, a ; set Z flag
145+
cp a, a ; set Z flag
152146
push af
153147
jq __ltod_common
154148

@@ -160,16 +154,6 @@ __ultod:
160154
; (long double)long
161155
__ltod:
162156
bit 7, e
163-
164-
require __ltod.hijack_itod
165-
166-
;-------------------------------------------------------------------------------
167-
168-
section .text
169-
170-
private __ltod.hijack_itod
171-
__ltod.hijack_itod:
172-
173157
push af
174158
call nz, __lneg ; abs(E:UHL)
175159

@@ -182,7 +166,7 @@ __ltod.hijack_itod:
182166
private __ltod_common
183167
__ltod_common:
184168
call __lctlz
185-
sub a, 31 ; normalize clz_result
169+
sub a, 31 ; normalize clz_result
186170

187171
; filter out exponent of $000 (zero) and $3FF (one)
188172
jr nc, __int_to_f64_zero_or_one
@@ -212,20 +196,21 @@ __int_to_f64_shl:
212196
sub a, l
213197

214198
; exponent = ($400 + (base2_logarithm - 1)) << 4
215-
; BC = $4EEM
199+
; BC = $4PPM
216200
ld l, a
217201
ld h, $04
218202
; clear the implicit mantissa bit
219-
res 4, c ; 52 % 8 == 4
220203
add hl, hl
221204
add hl, hl
222205
add hl, hl
223206
add hl, hl
224207
ld a, l
208+
res 4, c ; 52 % 8 == 4
225209
or a, c
226210
ld c, a
227211
ld b, h
228-
pop hl ; restore shifted HL
212+
pop hl ; restore shifted HL
213+
.no_round:
229214
.finish:
230215
pop af
231216
ret z
@@ -234,11 +219,11 @@ __int_to_f64_shl:
234219

235220
;-------------------------------------------------------------------------------
236221

237-
extern __ineg
238222
extern __lneg
239223
extern __lctlz
240224
extern __llctlz
241225
extern __llshl
242226
extern __llshru
243227
extern __llneg
244228
extern __lladd_1
229+
extern ___fe_cur_env
Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,44 @@
11
assume adl=1
22

3+
;-------------------------------------------------------------------------------
4+
5+
section .text
6+
7+
public _clear_fe_cur_env
8+
_clear_fe_cur_env:
9+
ld a, (___fe_cur_env)
10+
and a, -125 ; feclearexcept(FE_ALL_EXCEPT)
11+
ld (___fe_cur_env), a
12+
ret
13+
14+
public _get_fe_cur_env
15+
_get_fe_cur_env:
16+
ld a, (___fe_cur_env)
17+
ret
18+
19+
;-------------------------------------------------------------------------------
20+
321
section .text
422

5-
public _CRT_utod, _CRT_itod
23+
public _CRT_uitod, _CRT_itod
624

7-
_CRT_utod:
25+
_CRT_uitod:
826
ld hl, 3
927
add hl, sp
1028
ld hl, (hl)
11-
jp __utod
29+
jp __uitod
1230

1331
_CRT_itod:
1432
ld hl, 3
1533
add hl, sp
1634
ld hl, (hl)
1735
jp __itod
1836

19-
extern __utod
37+
;-------------------------------------------------------------------------------
38+
39+
extern __ultod
40+
extern __ltod
41+
extern ___fe_cur_env
42+
43+
extern __uitod
2044
extern __itod

test/floating_point/float64_from_integer/src/f64_from_integer_LUT.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ typedef struct { uint32_t u32; uint64_t u64; } input_type;
99

1010
typedef struct { uint64_t fu32; uint64_t fi32; uint64_t fu64; uint64_t fi64; } output_type;
1111

12-
static const input_type f64_from_integer_LUT_input[256] = {
12+
static const input_type f64_from_integer_LUT_input[259] = {
1313
/* 0 */ {UINT32_C(0x00000000), UINT64_C(0x0000000000000000)},
1414
/* 1 */ {UINT32_C(0x00000001), UINT64_C(0x0000000000000001)},
1515
/* 2 */ {UINT32_C(0xFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF)},
1616
/* 3 */ {UINT32_C(0x7FFFFFFF), UINT64_C(0x7FFFFFFFFFFFFFFF)},
1717
/* 4 */ {UINT32_C(0x80000000), UINT64_C(0x8000000000000000)},
18-
/* 5 */ {UINT32_C(0xCFA72379), UINT64_C(0x9022BDBCE12368EA)},
19-
/* 6 */ {UINT32_C(0xBCFC9E4C), UINT64_C(0xC53B5C41E4F559D2)},
20-
/* 7 */ {UINT32_C(0x83930797), UINT64_C(0x2F954ADDBC9A079B)},
18+
/* 5 */ {UINT32_C(0x80000001), UINT64_C(0x8000000000000001)},
19+
/* 6 */ {UINT32_C(0x00000002), UINT64_C(0x0000000000000002)},
20+
/* 7 */ {UINT32_C(0xFFFFFFFE), UINT64_C(0xFFFFFFFFFFFFFFFE)},
2121
/* 8 */ {UINT32_C(0xC66AAAFC), UINT64_C(0x8B8B8D6D3691C649)},
2222
/* 9 */ {UINT32_C(0xB3FE2104), UINT64_C(0xA32AC22CB1C97A60)},
2323
/* 10 */ {UINT32_C(0xE02F635F), UINT64_C(0xB36FE887C58B1EC0)},
@@ -266,17 +266,20 @@ static const input_type f64_from_integer_LUT_input[256] = {
266266
/* 253 */ {UINT32_C(0x89FE6A31), UINT64_C(0x0B23A5C0041A0FEA)},
267267
/* 254 */ {UINT32_C(0x1469770E), UINT64_C(0xCDB4EDD42210BA66)},
268268
/* 255 */ {UINT32_C(0xD8B6EA42), UINT64_C(0x34931BF01A51A099)},
269+
/* 256 */ {UINT32_C(0xCFA72379), UINT64_C(0x9022BDBCE12368EA)},
270+
/* 257 */ {UINT32_C(0xBCFC9E4C), UINT64_C(0xC53B5C41E4F559D2)},
271+
/* 258 */ {UINT32_C(0x83930797), UINT64_C(0x2F954ADDBC9A079B)},
269272
};
270273

271-
const output_type f64_from_integer_LUT_output[256] = {
274+
const output_type f64_from_integer_LUT_output[259] = {
272275
/* 0 */ {UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000)},
273276
/* 1 */ {UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000)},
274277
/* 2 */ {UINT64_C(0x41EFFFFFFFE00000), UINT64_C(0xBFF0000000000000), UINT64_C(0x43F0000000000000), UINT64_C(0xBFF0000000000000)},
275278
/* 3 */ {UINT64_C(0x41DFFFFFFFC00000), UINT64_C(0x41DFFFFFFFC00000), UINT64_C(0x43E0000000000000), UINT64_C(0x43E0000000000000)},
276279
/* 4 */ {UINT64_C(0x41E0000000000000), UINT64_C(0xC1E0000000000000), UINT64_C(0x43E0000000000000), UINT64_C(0xC3E0000000000000)},
277-
/* 5 */ {UINT64_C(0x41E9F4E46F200000), UINT64_C(0xC1C82C6E43800000), UINT64_C(0x43E20457B79C246D), UINT64_C(0xC3DBF75090C7B726)},
278-
/* 6 */ {UINT64_C(0x41E79F93C9800000), UINT64_C(0xC1D0C0D86D000000), UINT64_C(0x43E8A76B883C9EAB), UINT64_C(0xC3CD6251DF0D8553)},
279-
/* 7 */ {UINT64_C(0x41E07260F2E00000), UINT64_C(0xC1DF1B3E1A400000), UINT64_C(0x43C7CAA56EDE4D04), UINT64_C(0x43C7CAA56EDE4D04)},
280+
/* 5 */ {UINT64_C(0x41E0000000200000), UINT64_C(0xC1DFFFFFFFC00000), UINT64_C(0x43E0000000000000), UINT64_C(0xC3E0000000000000)},
281+
/* 6 */ {UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000)},
282+
/* 7 */ {UINT64_C(0x41EFFFFFFFC00000), UINT64_C(0xC000000000000000), UINT64_C(0x43F0000000000000), UINT64_C(0xC000000000000000)},
280283
/* 8 */ {UINT64_C(0x41E8CD555F800000), UINT64_C(0xC1CCCAAA82000000), UINT64_C(0x43E17171ADA6D239), UINT64_C(0xC3DD1D1CA4B25B8E)},
281284
/* 9 */ {UINT64_C(0x41E67FC420800000), UINT64_C(0xC1D30077BF000000), UINT64_C(0x43E465584596392F), UINT64_C(0xC3D7354F74D38DA1)},
282285
/* 10 */ {UINT64_C(0x41EC05EC6BE00000), UINT64_C(0xC1BFD09CA1000000), UINT64_C(0x43E66DFD10F8B164), UINT64_C(0xC3D32405DE0E9D38)},
@@ -525,6 +528,9 @@ const output_type f64_from_integer_LUT_output[256] = {
525528
/* 253 */ {UINT64_C(0x41E13FCD46200000), UINT64_C(0xC1DD806573C00000), UINT64_C(0x43A6474B80083420), UINT64_C(0x43A6474B80083420)},
526529
/* 254 */ {UINT64_C(0x41B469770E000000), UINT64_C(0x41B469770E000000), UINT64_C(0x43E9B69DBA844217), UINT64_C(0xC3C9258915EEF7A3)},
527530
/* 255 */ {UINT64_C(0x41EB16DD48400000), UINT64_C(0xC1C3A48ADF000000), UINT64_C(0x43CA498DF80D28D0), UINT64_C(0x43CA498DF80D28D0)},
531+
/* 256 */ {UINT64_C(0x41E9F4E46F200000), UINT64_C(0xC1C82C6E43800000), UINT64_C(0x43E20457B79C246D), UINT64_C(0xC3DBF75090C7B726)},
532+
/* 257 */ {UINT64_C(0x41E79F93C9800000), UINT64_C(0xC1D0C0D86D000000), UINT64_C(0x43E8A76B883C9EAB), UINT64_C(0xC3CD6251DF0D8553)},
533+
/* 258 */ {UINT64_C(0x41E07260F2E00000), UINT64_C(0xC1DF1B3E1A400000), UINT64_C(0x43C7CAA56EDE4D04), UINT64_C(0x43C7CAA56EDE4D04)},
528534
};
529535

530536
#endif /* F64_FROM_INTEGER_LUT_H */

0 commit comments

Comments
 (0)