Skip to content

Implemented dtof in assembly #618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 224 additions & 18 deletions src/crt/dtof.src
Original file line number Diff line number Diff line change
@@ -1,18 +1,224 @@
assume adl=1

section .text

public __dtof

__dtof:
; f64_ret_f32
push af, iy, bc, de, hl
call ___f64_to_f32
pop af
ld a, e
pop de
ld e, a
pop bc, iy, af
ret

extern ___f64_to_f32
assume adl=1

section .text

public __dtof

private __dtof_helper
__dtof_helper:
; Moving this block of code to be behind __dtof ensures that
; __dtof.ret_copysign can always be reached by jr in all paths.
.overflow:
; carry is set here
pop hl
; A = $10
add a, c ; attempts to overflow the low 4 bits of the exponent
rl b ; (0x7F << 1) | 1 if the input is inf/NaN
inc b ; B will only be zero if the input was inf/NaN
jr nz, .not_inf_nan

; carry is cleared
adc hl, hl
jr nz, .has_payload
ld a, e
rla
and a, $3F
jr z, .no_payload
.has_payload:
set 5, e ; ensure that NaN stays NaN
.no_payload:
ld a, c
push de
pop bc
ld l, 5
call __lshru
push bc
pop hl
.finish_inf_nan:
ld a, $7F
jr __dtof.ret_copysign
.not_inf_nan:
; return infinity
ld hl, $800000
jr .finish_inf_nan

; Convert BC:UDE:UHL F64 to E:UHL F32
; Rounding: round to nearest with ties to even
; Behaviour:
; Underflow: Returns signed zero. No signals raised.
; Subnormal: No signals raised.
; Rounded to Infinity: No signals raised.
; Overflow: Returns signed infinity. No signals raised.
; Signaling NaN: Quiet bit preserved. No signals raised.
; Quiet NaN: Quiet bit preserved. No signals raised.
; NaN Payloads: Copies the most significant payload bits. The LSB of mantissa is set if payload bits were discarded/truncated out.
__dtof:
bit 7, b
push af ; preserve A and signbit
push bc
push de
push hl
; clear UBC
inc bc
dec.s bc
res 7, b
ld hl, -$3810
add hl, bc
jr nc, .maybe_subnormal
ld hl, -$47F0 ; $FFB810
ld a, l ; ld a, $10
add hl, bc
jr c, __dtof_helper.overflow
; result is normal or rounds to infinity
; calculate new exponent
; we only need the low 8 bits of the exponent
add hl, hl
add hl, hl
add hl, hl
add hl, hl
; offset = -$380 - -$47F = $FF = -1 ; therefore decrement
dec h ; store new exponent
ld l, 29 ; f64_mant_bits - f32_mant_bits = 52 - 23 = 29
ex (sp), hl ; (SP) = exponent/shift, HL = lo24

; clear exponent
dec a ; ld a, $0F
and a, c
ld c, a
xor a, a
ld b, a
; test round bit
bit 4, e
jr z, .round_down
; test guard bit
ld a, e
and a, $20
jr nz, .round_up
; test sticky bits
inc a ; make A non-zero
adc hl, hl
jr nz, .round_up
ld a, e
rla
and a, $1F
.round_up:
.round_down:
call __llshru
or a, a
jr z, .no_round
inc hl ; does not overflow
.no_round:
pop af ; a = exponent, flags = 29 = ---5H3V-C
or a, a
rra
jr nc, .even_exponent
ld bc, $800000
add hl, bc ; the result might be rounded to infinity here
adc a, c ; adc a, 0 ; wont overflow
.even_exponent:
.subnormal_no_round:
.ret_copysign:
pop de
ld e, a
pop bc
pop af
ret z
set 7, e
ret

.ret_zero:
; carry is cleared
pop hl
xor a, a
sbc hl, hl
jr .ret_copysign

.maybe_subnormal:
ld hl, -$3690
add hl, bc
jr nc, .ret_zero
; calculate shift
; A = (uint8_t)((BC - $3690) >> 4)
; A = (uint8_t)((HL << 4) >> 8)
add hl, hl
add hl, hl
add hl, hl
add hl, hl
ld a, h
; Shift = -A + 4 + 24
cpl
add a, (4 + 24) + 1 ; (4 + 24) + CPL trick
; maximum shift = 24 + 4 + 25 = 24 + 29 = 53
; minimum shift = 24 + 4 + 1 = 24 + 5 = 29
ld b, a
ld e, a ; store shift amount
xor a, a
; calculate sticky bits
ld hl, 1
.shift_loop:
add hl, hl
rla
djnz .shift_loop
; carry won't be set
; set C:UDE to A:UHL
; shift by an additional 24 bits
dec hl
or a, a
jr z, .the_set_bit_is_in_hl
dec a
.the_set_bit_is_in_hl:
ld c, a
ld a, e ; restore shift amount
ex de, hl
scf
sbc hl, hl
; BC:UDE:UHL = 1 << shift
; (SP) = X
call __lland
; test if BC:UDE:UHL is zero
; UBC must be zero for this to work
add hl, de ; carry may be set
adc hl, bc ; wont overflow
pop hl
; DE and BC are swapped here
pop bc
pop de
push de
push bc

; clear exponent and include the implicit mantissa bit
ld d, 0
jr z, .no_sticky_bits
inc d
.no_sticky_bits:

ld l, a ; L = shift
ld a, e
and a, $0F
or a, $10

call __lshru
xor a, a ; subnormal exponent
; HL = BC >> 1
scf
sbc hl, hl ; ld hl, -1
add hl, sp
push bc
srl (hl)
pop hl
rr h
rr l ; round bit shifted out

jr nc, .subnormal_no_round
dec d
jr z, .subnormal_round_up
bit 0, l
jr z, .subnormal_no_round
.subnormal_round_up:
inc hl ; wont overflow, but may become FLT_MIN
; .subnormal_no_round:
jr .ret_copysign

extern __lland
extern __llshru
extern __lshru
14 changes: 12 additions & 2 deletions test/floating_point/float64_to_float32/src/f64_to_f32_LUT.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ typedef uint64_t input_type;

typedef uint32_t output_type;

static const input_type f64_to_f32_LUT_input[256] = {
static const input_type f64_to_f32_LUT_input[260] = {
/* 0 */ UINT64_C(0x0000000000000000),
/* 1 */ UINT64_C(0x0000000000000001),
/* 2 */ UINT64_C(0x0010000000000000),
Expand Down Expand Up @@ -266,9 +266,14 @@ static const input_type f64_to_f32_LUT_input[256] = {
/* 253 */ UINT64_C(0xD22D38D57ABF3991),
/* 254 */ UINT64_C(0xA86498F2933913FB),
/* 255 */ UINT64_C(0x4841C1F00831E908),
/* bonus edge cases */
/* 256 */ UINT64_C(0x369F82B925D1BFBA),
/* 257 */ UINT64_C(0xB76634D97D4F585C),
/* 258 */ UINT64_C(0x36DD000000000000),
/* 259 */ UINT64_C(0xB80E0000A0000000),
};

const output_type f64_to_f32_LUT_output[256] = {
const output_type f64_to_f32_LUT_output[260] = {
/* 0 */ UINT32_C(0x00000000),
/* 1 */ UINT32_C(0x00000000),
/* 2 */ UINT32_C(0x00000000),
Expand Down Expand Up @@ -525,6 +530,11 @@ const output_type f64_to_f32_LUT_output[256] = {
/* 253 */ UINT32_C(0xFF800000),
/* 254 */ UINT32_C(0x80000000),
/* 255 */ UINT32_C(0x7F800000),
/* bonus edge cases */
/* 256 */ UINT32_C(0x00000001),
/* 257 */ UINT32_C(0x80001635),
/* 258 */ UINT32_C(0x0000000E),
/* 259 */ UINT32_C(0x80780002),
};

#endif /* F64_TO_F32_LUT_H */
Loading
Loading