Skip to content

Commit b5bed04

Browse files
simonbyrneKristofferC
authored andcommitted
Improve floating-point Euclidean division for Float16 and Float32 (#49637)
(cherry picked from commit cb7d446)
1 parent 1d71dea commit b5bed04

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

base/div.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,3 +385,9 @@ end
385385
# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
386386
# so it is used here as the basis of float div().
387387
div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
388+
389+
# Vincent Lefèvre: "The Euclidean Division Implemented with a Floating-Point Division and a Floor"
390+
# https://inria.hal.science/inria-00070403
391+
# Theorem 1 implies that the following are exact if eps(x/y) <= 1
392+
div(x::Float32, y::Float32, r::RoundingMode) = Float32(round(Float64(x) / Float64(y), r))
393+
div(x::Float16, y::Float16, r::RoundingMode) = Float16(round(Float32(x) / Float32(y), r))

test/numbers.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1739,6 +1739,27 @@ end
17391739
@test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp) == ceil(big(-1.1)/big(0.1)) == -11.0
17401740
@test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0
17411741
end
1742+
@testset "issue #49450" begin
1743+
@test div(514, Float16(0.75)) === Float16(685)
1744+
@test fld(514, Float16(0.75)) === Float16(685)
1745+
@test cld(515, Float16(0.75)) === Float16(687)
1746+
1747+
@test cld(1, Float16(0.000999)) === Float16(1001)
1748+
@test cld(2, Float16(0.001999)) === Float16(1001)
1749+
@test cld(3, Float16(0.002934)) === Float16(1023)
1750+
@test cld(4, Float16(0.003998)) === Float16(1001)
1751+
@test fld(5, Float16(0.004925)) === Float16(1015)
1752+
1753+
@test div(4_194_307, Float32(0.75)) === Float32(5_592_409)
1754+
@test fld(4_194_307, Float32(0.75)) === Float32(5_592_409)
1755+
@test cld(4_194_308, Float32(0.75)) === Float32(5_592_411)
1756+
1757+
@test fld(5, Float32(6.556511e-7)) === Float32(7_626_007)
1758+
@test fld(10, Float32(1.3113022e-6)) === Float32(7_626_007)
1759+
@test fld(11, Float32(1.4305115e-6)) === Float32(7_689_557)
1760+
@test cld(16, Float32(2.8014183e-6)) === Float32(5_711_393)
1761+
@test cld(17, Float32(2.2053719e-6)) === Float32(7_708_451)
1762+
end
17421763
end
17431764
@testset "return types" begin
17441765
for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)

0 commit comments

Comments
 (0)