From a29046556ebfbc5c3818eaf5e1fd699299eb0637 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 8 Apr 2020 09:52:56 -0500 Subject: [PATCH 1/5] Put information in README regarding overflow, and add CONTRIBUTING (#178) --- CONTRIBUTING.md | 11 +++++++ README.md | 84 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..b753b591 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,11 @@ +# Style guide + +This outlines recommended practices for contributors to this package. + +## Naming + +- Type parameters: use `F` for `F <: Fixed`, `N` for `N <: Normed`, + and `X` for `X <: FixedPoint`. Use `f` for the number of fractional bits. +- Use `Ti` for `Ti <: Integer`, `Tf` for `Tf <: AbstractFloat`, and `Tw` + for `widen`ed types. +- `T` should refer to the underlying "raw" type. diff --git a/README.md b/README.md index dd0d086d..464c1018 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![codecov.io](http://codecov.io/github/JuliaMath/FixedPointNumbers.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaMath/FixedPointNumbers.jl?branch=master) This library implements fixed-point number types. A -[fixed-point number][wikipedia] represents a fractional, or +[fixed-point number] represents a fractional, or non-integral, number. In contrast with the more widely known floating-point numbers, with fixed-point numbers the decimal point doesn't "float": fixed-point numbers are effectively integers that are @@ -37,7 +37,7 @@ except the sign bit) for the fractional part. The value of the number is interpreted as if the integer representation has been divided by `2^f`. Consequently, `Fixed{Int8,7}` numbers `x` satisfy -``` +```julia -1.0 = -128/128 ≤ x ≤ 127/128 ≈ 0.992. ``` @@ -64,4 +64,82 @@ More generally, an arbitrary number of bits from any of the standard unsigned integer widths can be used for the fractional part. For example: `Normed{UInt32,16}`, `Normed{UInt64,3}`, `Normed{UInt128,7}`. -[wikipedia]: http://en.wikipedia.org/wiki/Fixed-point_arithmetic +# Computation with Fixed and Normed numbers + +You can perform mathematical operations with `FixedPoint` numbers, but keep in mind +that they are vulnerable to both [rounding] and [overflow]. For example: + +```julia +julia> x = N0f8(0.8) +0.8N0f8 + +julia> float(x) + x +1.6f0 + +julia> x + x +0.596N0f8 +``` + +This is a consequence of the rules that govern overflow in integer arithmetic: + +```julia +julia> y = reinterpret(x) # `reinterpret(x::FixedPoint)` reinterprets as the underlying "raw" type +0xcc + +julia> reinterpret(N0f8, y + y) # add two UInt8s and then reinterpret as N0f8 +0.596N0f8 +``` + +Similarly, + +```julia +julia> x = eps(N0f8) # smallest nonzero `N0f8` number +0.004N0f8 + +julia> x*x +0.0N0f8 +``` + +which is rounding-induced [underflow]. Finally, + +```julia +julia> x = N4f12(15) +15.0N4f12 + +julia> x*x +ERROR: ArgumentError: Normed{UInt16,12} is a 16-bit type representing 65536 values from 0.0 to 16.0037; cannot represent 225.0 +Stacktrace: + [1] throw_converterror(::Type{Normed{UInt16,12}}, ::Float32) at /home/tim/.julia/dev/FixedPointNumbers/src/FixedPointNumbers.jl:251 + [2] _convert at /home/tim/.julia/dev/FixedPointNumbers/src/normed.jl:77 [inlined] + [3] FixedPoint at /home/tim/.julia/dev/FixedPointNumbers/src/FixedPointNumbers.jl:51 [inlined] + [4] convert at ./number.jl:7 [inlined] + [5] *(::Normed{UInt16,12}, ::Normed{UInt16,12}) at /home/tim/.julia/dev/FixedPointNumbers/src/normed.jl:254 + [6] top-level scope at REPL[16]:1 +``` + +In some circumstances, it may make most sense to think of `FixedPoint` numbers as *storage types* +rather than computational types. You can call `float(x)` to convert `x` to a floating-point equivalent that is reasonably +safe for computation; in the type domain, `floattype(T::Type)` returns the corresponding type. +Note that in some cases `floattype(T)` differs from `float`'s behavior on the corresponding "raw" type: + +```julia +julia> float(UInt8) +Float64 + +julia> floattype(N0f8) +Float32 +``` + +Because of the role of FixedPointNumbers in domains such as image-processing, this package tries to limit the expansion of the +number of bits needed to store results. + + +## Contributing to this package + +Please see [CONTRIBUTING.md](CONTRIBUTING.md) for information about improving this package. + + +[fixed-point number]: http://en.wikipedia.org/wiki/Fixed-point_arithmetic +[overflow]: https://en.wikipedia.org/wiki/Integer_overflow +[rounding]: https://en.wikipedia.org/wiki/Round-off_error +[underflow]: https://en.wikipedia.org/wiki/Arithmetic_underflow From 989cca49991e345f691a43d3ac06613bc95c0603 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 15 Jun 2020 05:54:50 -0500 Subject: [PATCH 2/5] Reduce invalidations of other methods (#180) These changes, combined with more extensive changes to Julia itself, greatly reduce latency stemming from loading FixedPointNumbers. Ref https://github.com/JuliaLang/julia/pull/35733. There will be very little benefit to this on its own, but we can at least find out if it works across Julia versions. --- src/FixedPointNumbers.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl index 160c1331..3c3bd338 100644 --- a/src/FixedPointNumbers.jl +++ b/src/FixedPointNumbers.jl @@ -48,7 +48,8 @@ rawtype(::Type{X}) where {T, X <: FixedPoint{T}} = T *(x::Real, ::Type{X}) where {X <: FixedPoint} = _convert(X, x) # constructor-style conversions -(::Type{X})(x::Real) where {X <: FixedPoint} = _convert(X, x) +(::Type{X})(x::X) where {X <: FixedPoint} = x +(::Type{X})(x::Number) where {X <: FixedPoint} = _convert(X, x) function (::Type{<:FixedPoint})(x::AbstractChar) throw(ArgumentError("FixedPoint (Fixed or Normed) cannot be constructed from a Char")) @@ -97,7 +98,6 @@ one(::Type{X}) where {X <: FixedPoint} = oneunit(X) inv_rawone(x) = (@generated) ? (y = 1.0 / rawone(x); :($y)) : 1.0 / rawone(x) # traits -sizeof(::Type{X}) where {X <: FixedPoint} = sizeof(rawtype(X)) eps(::Type{X}) where {X <: FixedPoint} = X(oneunit(rawtype(X)), 0) typemax(::Type{T}) where {T <: FixedPoint} = T(typemax(rawtype(T)), 0) typemin(::Type{T}) where {T <: FixedPoint} = T(typemin(rawtype(T)), 0) From 0872417a8dbbd04518d3a7769732c177ebf26ea0 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 15 Jun 2020 05:56:09 -0500 Subject: [PATCH 3/5] Version 0.8.1 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 92043e1d..8ddfcc5a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "FixedPointNumbers" uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" -version = "0.8.0" +version = "0.8.1" [compat] julia = "1" From f0257ee2c93d5c0de8957998f503e60cc7f333fb Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 17 Mar 2020 11:27:10 -0500 Subject: [PATCH 4/5] Support `floattype(Rational)` and require <:AbstractFloat for fallback It seems to be a non-sequitur to allow `floattype` to return a type that is not an `AbstractFloat`, unless it has been extended as such. The docs have been enhanced to clarify when it is OK to extend `floattype` in ways that don't return an `AbstractFloat`. Breaking change: no, not counting the deprecation warning. --- src/FixedPointNumbers.jl | 36 ++++++++++++++++++++++++++++-------- src/deprecations.jl | 8 ++++++++ test/traits.jl | 7 +++++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl index 3c3bd338..858ac74a 100644 --- a/src/FixedPointNumbers.jl +++ b/src/FixedPointNumbers.jl @@ -106,34 +106,54 @@ floatmax(::Type{T}) where {T <: FixedPoint} = typemax(T) """ - floattype(::Type{T}) + floattype(::Type{T})::Type{<:AbstractFloat} -Return the minimum float type that represents `T` without overflow to `Inf`. +Return a minimal type suitable for performing computations with instances of type `T` without integer overflow. -# Example +The fallback definition of `floattype(T)` applies only to `T<:AbstractFloat`. +However, it is permissible to extend `floattype` to return types that are not subtypes of +`AbstractFloat`; the key characteristic is that the return type should support computation without integer overflow. + +In general the returned type should have the minimum bitwidth needed to encode the full precision of the input type. +however, a priority should be placed on computational efficiency; consequently, types like `Float16` should be avoided +except in scenarios where they are guaranteed to have hardware support. + +# Examples A classic usage is to avoid overflow behavior by promoting `FixedPoint` to `AbstractFloat` -```julia +```jldoctest julia> x = N0f8(1.0) 1.0N0f8 julia> x + x # overflow 0.996N0f8 -julia> float_x = floattype(eltype(x))(x) -1.0f0 +julia> T = floattype(x) +Float32 -julia> float_x + float_x +julia> T(x) + T(x) 2.0f0 ``` + +The following represents a valid extension of `floattype` to non-AbstractFloats: + +```julia +julia> using FixedPointNumbers, ColorTypes + +julia> floattype(RGB{N0f8}) +RGB{Float32} +``` + +`RGB` itself is not a subtype of `AbstractFloat`, but unlike `RGB{N0f8}` operations with `RGB{Float32}` are not subject to integer overflow. """ -floattype(::Type{T}) where {T <: Real} = T # fallback +floattype(::Type{T}) where {T <: AbstractFloat} = T # fallback (we want a MethodError if no method producing AbstractFloat is defined) floattype(::Type{T}) where {T <: Union{ShortInts, Bool}} = Float32 floattype(::Type{T}) where {T <: Integer} = Float64 floattype(::Type{T}) where {T <: LongInts} = BigFloat floattype(::Type{X}) where {T <: ShortInts, X <: FixedPoint{T}} = Float32 floattype(::Type{X}) where {T <: Integer, X <: FixedPoint{T}} = Float64 +floattype(::Type{X}) where {T <: Integer, X <: Rational{T}} = typeof(zero(T)/oneunit(T)) floattype(::Type{X}) where {T <: LongInts, X <: FixedPoint{T}} = BigFloat float(x::FixedPoint) = convert(floattype(x), x) diff --git a/src/deprecations.jl b/src/deprecations.jl index b768d486..30d586a8 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -1 +1,9 @@ import Base.@deprecate_binding + +function floattype(::Type{T}) where {T <: Real} + Base.depwarn(""" + In a future release, the fallback definition of `floattype` will throw a MethodError if it cannot return a type `<:AbstractFloat`. + See the documentation on `floattype` for guidance on whether to define a custom `floattype(::Type{$T})` method. + """, :floattype) + return T +end diff --git a/test/traits.jl b/test/traits.jl index 874a47b4..ae700d09 100644 --- a/test/traits.jl +++ b/test/traits.jl @@ -1,3 +1,7 @@ +using FixedPointNumbers, Test + +struct MyReal <: Real end + @testset "floattype" begin function _is_fixed_type(x::Symbol) try @@ -16,4 +20,7 @@ for T in exact_types @test typemax(T) <= maxintfloat(floattype(T)) end + @test floattype(Rational{Int}) === Float64 + + @test_skip(@test_throws MethodError floattype(MyReal)) # TODO: eliminate `@test_skipped` when depwarn is eliminated. See #177. end From 54fda32738684156522d506c0f013d0f63e6df6f Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 18 Mar 2020 09:58:43 -0500 Subject: [PATCH 5/5] Provide `floattype` methods for non-Real types --- src/FixedPointNumbers.jl | 6 ++++++ test/traits.jl | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl index 858ac74a..cb6a6b6d 100644 --- a/src/FixedPointNumbers.jl +++ b/src/FixedPointNumbers.jl @@ -156,6 +156,12 @@ floattype(::Type{X}) where {T <: Integer, X <: FixedPoint{T}} = Float64 floattype(::Type{X}) where {T <: Integer, X <: Rational{T}} = typeof(zero(T)/oneunit(T)) floattype(::Type{X}) where {T <: LongInts, X <: FixedPoint{T}} = BigFloat +# Non-Real types +floattype(::Type{Complex{T}}) where T = Complex{floattype(T)} +floattype(::Type{<:Irrational}) = Float64 +floattype(::Type{Base.TwicePrecision{Float64}}) = Float64 # wider would be nice, but hardware support is paramount +floattype(::Type{Base.TwicePrecision{T}}) where T<:Union{Float16,Float32} = widen(T) + float(x::FixedPoint) = convert(floattype(x), x) function minmax(x::X, y::X) where {X <: FixedPoint} diff --git a/test/traits.jl b/test/traits.jl index ae700d09..9aee60b0 100644 --- a/test/traits.jl +++ b/test/traits.jl @@ -21,6 +21,12 @@ struct MyReal <: Real end @test typemax(T) <= maxintfloat(floattype(T)) end @test floattype(Rational{Int}) === Float64 + @test floattype(Complex{Int16}) === Complex{Float32} + @test floattype(Complex{Float32}) === Complex{Float32} + @test floattype(Base.TwicePrecision{Float16}) === Float32 + @test floattype(Base.TwicePrecision{Float32}) === Float64 + @test floattype(Base.TwicePrecision{Float64}) === Float64 + @test floattype(typeof(π)) === Float64 @test_skip(@test_throws MethodError floattype(MyReal)) # TODO: eliminate `@test_skipped` when depwarn is eliminated. See #177. end