Skip to content

Commit 36874c3

Browse files
authored
Implement floating point conversion with ryu (#365)
* Implement floating point conversion with ryu * Use manual strictness - Strict extension is from ghc >= 8.0.1 * Use checked shifts - ghc 9.0.1 doesn't shiftL on negative unsafeShiftR and this is more straightforward regardless * Use builtin float-to-word conversion functions * Use builtin conversion to Bool * Remove dependency on array package * Handle non-exhaustive patterns * Try using prim conversions directly * Revert "Try using prim conversions directly" This reverts commit 10809d3. * Dispatch to slow cast when builtin unavailable - GHC.Float exports them starting from base-4.10.0.0 which starts with with ghc 8.2.1 * Try bumping min version to 8.4.x * Fix log10pow5 approximation and add unit test - expose RealFloat.Internal so that tests can import and verify that the approximations match in the expected ranges * Re-export floatDec and doubleDec to maintain public API * Improve documentation and fixes for initial code review - make double-polymorphic functions singly-polymorphic for clarity - use canonical Control.Arrow.first - name boolean values in special string formatting and add explanation - explain magic numbers in logarithm approximations and reciprocal multiplication - other misc comments * Improve table generation documentation and clean-up - add general overview of floating point conversion algorithm and idea behind ryu algorithm - remove unused Num Word128 instance * Improve documentation of f2s and d2s and cleanup - deduplicate shortest and rounding handling - add some comments and explanations of algorithm steps inline * Use stricter integral types and annotate fromIntegral usages - a closer inspection of `fromIntegral` usages shows that a lot of that conversion scaffolding is unnecessary if types are chose correctly - also fixes a delayed to-signed-int conversion that caused unsigned wraparound on subtraction * Add module descriptions and fix typos * Use internal FloatFormat instead of GHC.Float.FFFormat - avoids dependency especially while we don't actually support the full API of GHC.Float.formatRealFloat * Use monomorphic helpers for remaining integral conversions used by RealFloat * Remove usage of TemplateHaskell in RealFloat * Fix LUT usage on big-endian systems - Do a runtime byteswap. bswap64 is ~1 cycle on most architectures - NB: multiline string literals are parsed differently with language CPP * Add header for endianness detection * Fix big-endian word16 packing in fast digit formatting * Fix big-endian word128 read from raw addr * Clean up unused functions - finv and fnorm kept as comments for reference to table computation * Fix incorrect reciprocal function usage - Doesn't actually affect correctness vs show since round-even is not implemented (acceptBounds is always False) - Adds a couple explorative test cases and a comment anyways * Add more test coverage and fix doc example - Fixed-format fixed-precision tests - Re-exports TableGenerator constants to allow sanity checks for computed bit-range constants * Use quickcheck equality property in tests * Format haddock headers more similarly to existing ones * Use simpler reciprocal math for 32-bit words - Clarity and marginal performance improvement * Use boxed arithmetic in logic flow - more portable wrt internal ghc prim and 32- vs 64-bit - more readable (less syntax cruft in hashes and verbose operation names) - not much of a performance difference measured * Support ghc 9.2 prim word changes - Data.Word wraps fixed-sized word types - array operations now use fixed-sized word types * Fix 32-bit support - Removes most of the raw Word# usage to facilitate support of fixed-size sub-word types and 32-bit systems. Benchmarking shows little difference (<5%) - Implements manual multiplication of 64-bit words for 32-bit systems * Skip conversion to Double before fixed Float formatting - otherwise produces unnecessarily long results since imprecise representations get much more significance with Double precision * Tweak doc wording and add examples - per sjakobi suggestions * Rename FExponent to FScientific - More intuitive name for scientific notation since we're moving away from GHC.Float.FFFormat anyway * Use an opaque FloatFormat type for compatibility - while precision handling is not fully implemented * Rename float fixed-format to standard-format and other naming tweaks - `fixed` was confusing terminology adopted from FFFormat - `FormatFloat'` -> `FormatMode` - some doc tweaks * Encourage inlining by removing partial application * Fix some haddock links and accidental monospacing * Add explanation about difference between implementation and reference paper * Clarify default precision * Point to ryu paper for more details * Fix non-exhaustive warning for ghc 9.2 - add redundant pattern matching. now symmetrical with the e >= 0 case
1 parent 963d625 commit 36874c3

File tree

9 files changed

+2548
-31
lines changed

9 files changed

+2548
-31
lines changed

Data/ByteString/Builder.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ module Data.ByteString.Builder
252252
, stringUtf8
253253

254254
, module Data.ByteString.Builder.ASCII
255+
, module Data.ByteString.Builder.RealFloat
255256

256257
) where
257258

@@ -261,6 +262,7 @@ import Data.ByteString.Builder.Internal
261262
import qualified Data.ByteString.Builder.Prim as P
262263
import qualified Data.ByteString.Lazy.Internal as L
263264
import Data.ByteString.Builder.ASCII
265+
import Data.ByteString.Builder.RealFloat
264266

265267
import Data.String (IsString(..))
266268
import System.IO (Handle, IOMode(..), withBinaryFile)

Data/ByteString/Builder/ASCII.hs

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ import Data.ByteString.Lazy as L
8181
import Data.ByteString.Builder.Internal (Builder)
8282
import qualified Data.ByteString.Builder.Prim as P
8383
import qualified Data.ByteString.Builder.Prim.Internal as P
84+
import Data.ByteString.Builder.RealFloat (floatDec, doubleDec)
8485

8586
import Foreign
8687
import Foreign.C.Types
@@ -89,16 +90,6 @@ import Foreign.C.Types
8990
-- Decimal Encoding
9091
------------------------------------------------------------------------------
9192

92-
93-
-- | Encode a 'String' using 'P.char7'.
94-
{-# INLINE string7 #-}
95-
string7 :: String -> Builder
96-
string7 = P.primMapListFixed P.char7
97-
98-
------------------------------------------------------------------------------
99-
-- Decimal Encoding
100-
------------------------------------------------------------------------------
101-
10293
-- Signed integers
10394
------------------
10495

@@ -163,22 +154,6 @@ wordDec :: Word -> Builder
163154
wordDec = P.primBounded P.wordDec
164155

165156

166-
-- Floating point numbers
167-
-------------------------
168-
169-
-- TODO: Use Bryan O'Sullivan's double-conversion package to speed it up.
170-
171-
-- | /Currently slow./ Decimal encoding of an IEEE 'Float'.
172-
{-# INLINE floatDec #-}
173-
floatDec :: Float -> Builder
174-
floatDec = string7 . show
175-
176-
-- | /Currently slow./ Decimal encoding of an IEEE 'Double'.
177-
{-# INLINE doubleDec #-}
178-
doubleDec :: Double -> Builder
179-
doubleDec = string7 . show
180-
181-
182157
------------------------------------------------------------------------------
183158
-- Hexadecimal Encoding
184159
------------------------------------------------------------------------------

Data/ByteString/Builder/RealFloat.hs

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
-- |
2+
-- Module : Data.ByteString.Builder.RealFloat
3+
-- Copyright : (c) Lawrence Wu 2021
4+
-- License : BSD-style
5+
-- Maintainer : lawrencejwu@gmail.com
6+
--
7+
-- Floating point formatting for @Bytestring.Builder@
8+
--
9+
-- This module primarily exposes `floatDec` and `doubleDec` which do the
10+
-- equivalent of converting through @'Data.ByteString.Builder.string7' . 'show'@.
11+
--
12+
-- It also exposes `formatFloat` and `formatDouble` with a similar API as
13+
-- `GHC.Float.formatRealFloat`.
14+
--
15+
-- NB: The float-to-string conversions exposed by this module match `show`'s
16+
-- output (specifically with respect to default rounding and length). In
17+
-- particular, there are boundary cases where the closest and \'shortest\'
18+
-- string representations are not used. Mentions of \'shortest\' in the docs
19+
-- below are with this caveat.
20+
--
21+
-- For example, for fidelity, we match `show` on the output below.
22+
--
23+
-- >>> show (1.0e23 :: Float)
24+
-- "1.0e23"
25+
-- >>> show (1.0e23 :: Double)
26+
-- "9.999999999999999e22"
27+
-- >>> floatDec 1.0e23
28+
-- "1.0e23"
29+
-- >>> doubleDec 1.0e23
30+
-- "9.999999999999999e22"
31+
--
32+
-- Simplifying, we can build a shorter, lossless representation by just using
33+
-- @"1.0e23"@ since the floating point values that are 1 ULP away are
34+
--
35+
-- >>> showHex (castDoubleToWord64 1.0e23) []
36+
-- "44b52d02c7e14af6"
37+
-- >>> castWord64ToDouble 0x44b52d02c7e14af5
38+
-- 9.999999999999997e22
39+
-- >>> castWord64ToDouble 0x44b52d02c7e14af6
40+
-- 9.999999999999999e22
41+
-- >>> castWord64ToDouble 0x44b52d02c7e14af7
42+
-- 1.0000000000000001e23
43+
--
44+
-- In particular, we could use the exact boundary if it is the shortest
45+
-- representation and the original floating number is even. To experiment with
46+
-- the shorter rounding, refer to
47+
-- `Data.ByteString.Builder.RealFloat.Internal.acceptBounds`. This will give us
48+
--
49+
-- >>> floatDec 1.0e23
50+
-- "1.0e23"
51+
-- >>> doubleDec 1.0e23
52+
-- "1.0e23"
53+
--
54+
-- For more details, please refer to the
55+
-- <https://dl.acm.org/doi/10.1145/3192366.3192369 Ryu paper>.
56+
57+
58+
module Data.ByteString.Builder.RealFloat
59+
( floatDec
60+
, doubleDec
61+
62+
-- * Custom formatting
63+
, formatFloat
64+
, formatDouble
65+
, FloatFormat
66+
, standard
67+
, standardDefaultPrecision
68+
, scientific
69+
, generic
70+
) where
71+
72+
import Data.ByteString.Builder.Internal (Builder)
73+
import qualified Data.ByteString.Builder.RealFloat.Internal as R
74+
import qualified Data.ByteString.Builder.RealFloat.F2S as RF
75+
import qualified Data.ByteString.Builder.RealFloat.D2S as RD
76+
import qualified Data.ByteString.Builder.Prim as BP
77+
import GHC.Float (roundTo)
78+
import GHC.Word (Word64)
79+
import GHC.Show (intToDigit)
80+
81+
-- | Returns a rendered Float. Matches `show` in displaying in standard or
82+
-- scientific notation
83+
--
84+
-- @
85+
-- floatDec = 'formatFloat' 'generic'
86+
-- @
87+
{-# INLINABLE floatDec #-}
88+
floatDec :: Float -> Builder
89+
floatDec = formatFloat generic
90+
91+
-- | Returns a rendered Double. Matches `show` in displaying in standard or
92+
-- scientific notation
93+
--
94+
-- @
95+
-- doubleDec = 'formatDouble' 'generic'
96+
-- @
97+
{-# INLINABLE doubleDec #-}
98+
doubleDec :: Double -> Builder
99+
doubleDec = formatDouble generic
100+
101+
-- | Format type for use with `formatFloat` and `formatDouble`.
102+
data FloatFormat = MkFloatFormat FormatMode (Maybe Int)
103+
104+
-- | Standard notation with `n` decimal places
105+
standard :: Int -> FloatFormat
106+
standard n = MkFloatFormat FStandard (Just n)
107+
108+
-- | Standard notation with the \'default precision\' (decimal places matching `show`)
109+
standardDefaultPrecision :: FloatFormat
110+
standardDefaultPrecision = MkFloatFormat FStandard Nothing
111+
112+
-- | Scientific notation with \'default precision\' (decimal places matching `show`)
113+
scientific :: FloatFormat
114+
scientific = MkFloatFormat FScientific Nothing
115+
116+
-- | Standard or scientific notation depending on the exponent. Matches `show`
117+
generic :: FloatFormat
118+
generic = MkFloatFormat FGeneric Nothing
119+
120+
-- | ByteString float-to-string format
121+
data FormatMode
122+
= FScientific -- ^ scientific notation
123+
| FStandard -- ^ standard notation with `Maybe Int` digits after the decimal
124+
| FGeneric -- ^ dispatches to scientific or standard notation based on the exponent
125+
deriving Show
126+
127+
-- TODO: support precision argument for FGeneric and FScientific
128+
-- | Returns a rendered Float. Returns the \'shortest\' representation in
129+
-- scientific notation and takes an optional precision argument in standard
130+
-- notation. Also see `floatDec`.
131+
--
132+
-- With standard notation, the precision argument is used to truncate (or
133+
-- extend with 0s) the \'shortest\' rendered Float. The \'default precision\' does
134+
-- no such modifications and will return as many decimal places as the
135+
-- representation demands.
136+
--
137+
-- e.g
138+
--
139+
-- >>> formatFloat (standard 1) 1.2345e-2
140+
-- "0.0"
141+
-- >>> formatFloat (standard 10) 1.2345e-2
142+
-- "0.0123450000"
143+
-- >>> formatFloat standardDefaultPrecision 1.2345e-2
144+
-- "0.01234"
145+
-- >>> formatFloat scientific 12.345
146+
-- "1.2345e1"
147+
-- >>> formatFloat generic 12.345
148+
-- "12.345"
149+
{-# INLINABLE formatFloat #-}
150+
formatFloat :: FloatFormat -> Float -> Builder
151+
formatFloat (MkFloatFormat fmt prec) = \f ->
152+
let (RF.FloatingDecimal m e) = RF.f2Intermediate f
153+
e' = R.int32ToInt e + R.decimalLength9 m in
154+
case fmt of
155+
FGeneric ->
156+
case specialStr f of
157+
Just b -> b
158+
Nothing ->
159+
if e' >= 0 && e' <= 7
160+
then sign f `mappend` showStandard (R.word32ToWord64 m) e' prec
161+
else BP.primBounded (R.toCharsScientific (f < 0) m e) ()
162+
FScientific -> RF.f2s f
163+
FStandard ->
164+
case specialStr f of
165+
Just b -> b
166+
Nothing -> sign f `mappend` showStandard (R.word32ToWord64 m) e' prec
167+
168+
-- TODO: support precision argument for FGeneric and FScientific
169+
-- | Returns a rendered Double. Returns the \'shortest\' representation in
170+
-- scientific notation and takes an optional precision argument in standard
171+
-- notation. Also see `doubleDec`.
172+
--
173+
-- With standard notation, the precision argument is used to truncate (or
174+
-- extend with 0s) the \'shortest\' rendered Float. The \'default precision\'
175+
-- does no such modifications and will return as many decimal places as the
176+
-- representation demands.
177+
--
178+
-- e.g
179+
--
180+
-- >>> formatDouble (standard 1) 1.2345e-2
181+
-- "0.0"
182+
-- >>> formatDouble (standard 10) 1.2345e-2
183+
-- "0.0123450000"
184+
-- >>> formatDouble standardDefaultPrecision 1.2345e-2
185+
-- "0.01234"
186+
-- >>> formatDouble scientific 12.345
187+
-- "1.2345e1"
188+
-- >>> formatDouble generic 12.345
189+
-- "12.345"
190+
{-# INLINABLE formatDouble #-}
191+
formatDouble :: FloatFormat -> Double -> Builder
192+
formatDouble (MkFloatFormat fmt prec) = \f ->
193+
let (RD.FloatingDecimal m e) = RD.d2Intermediate f
194+
e' = R.int32ToInt e + R.decimalLength17 m in
195+
case fmt of
196+
FGeneric ->
197+
case specialStr f of
198+
Just b -> b
199+
Nothing ->
200+
if e' >= 0 && e' <= 7
201+
then sign f `mappend` showStandard m e' prec
202+
else BP.primBounded (R.toCharsScientific (f < 0) m e) ()
203+
FScientific -> RD.d2s f
204+
FStandard ->
205+
case specialStr f of
206+
Just b -> b
207+
Nothing -> sign f `mappend` showStandard m e' prec
208+
209+
-- | Char7 encode a 'Char'.
210+
{-# INLINE char7 #-}
211+
char7 :: Char -> Builder
212+
char7 = BP.primFixed BP.char7
213+
214+
-- | Char7 encode a 'String'.
215+
{-# INLINE string7 #-}
216+
string7 :: String -> Builder
217+
string7 = BP.primMapListFixed BP.char7
218+
219+
-- | Encodes a `-` if input is negative
220+
sign :: RealFloat a => a -> Builder
221+
sign f = if f < 0 then char7 '-' else mempty
222+
223+
-- | Special rendering for Nan, Infinity, and 0. See
224+
-- RealFloat.Internal.NonNumbersAndZero
225+
specialStr :: RealFloat a => a -> Maybe Builder
226+
specialStr f
227+
| isNaN f = Just $ string7 "NaN"
228+
| isInfinite f = Just $ sign f `mappend` string7 "Infinity"
229+
| isNegativeZero f = Just $ string7 "-0.0"
230+
| f == 0 = Just $ string7 "0.0"
231+
| otherwise = Nothing
232+
233+
-- | Returns a list of decimal digits in a Word64
234+
digits :: Word64 -> [Int]
235+
digits w = go [] w
236+
where go ds 0 = ds
237+
go ds c = let (q, r) = R.dquotRem10 c
238+
in go ((R.word64ToInt r) : ds) q
239+
240+
-- | Show a floating point value in standard notation. Based on GHC.Float.showFloat
241+
showStandard :: Word64 -> Int -> Maybe Int -> Builder
242+
showStandard m e prec =
243+
case prec of
244+
Nothing
245+
| e <= 0 -> char7 '0'
246+
`mappend` char7 '.'
247+
`mappend` string7 (replicate (-e) '0')
248+
`mappend` mconcat (digitsToBuilder ds)
249+
| otherwise ->
250+
let f 0 s rs = mk0 (reverse s) `mappend` char7 '.' `mappend` mk0 rs
251+
f n s [] = f (n-1) (char7 '0':s) []
252+
f n s (r:rs) = f (n-1) (r:s) rs
253+
in f e [] (digitsToBuilder ds)
254+
Just p
255+
| e >= 0 ->
256+
let (ei, is') = roundTo 10 (p' + e) ds
257+
(ls, rs) = splitAt (e + ei) (digitsToBuilder is')
258+
in mk0 ls `mappend` mkDot rs
259+
| otherwise ->
260+
let (ei, is') = roundTo 10 p' (replicate (-e) 0 ++ ds)
261+
-- ds' should always be non-empty but use redundant pattern
262+
-- matching to silence warning
263+
ds' = if ei > 0 then is' else 0:is'
264+
(ls, rs) = splitAt 1 $ digitsToBuilder ds'
265+
in mk0 ls `mappend` mkDot rs
266+
where p' = max p 0
267+
where
268+
mk0 ls = case ls of [] -> char7 '0'; _ -> mconcat ls
269+
mkDot rs = if null rs then mempty else char7 '.' `mappend` mconcat rs
270+
ds = digits m
271+
digitsToBuilder = fmap (char7 . intToDigit)
272+

0 commit comments

Comments
 (0)