Skip to content

Commit

Permalink
Add Data.Aeson.Decoding.Text, decodeStrictText :: Text -> ...
Browse files Browse the repository at this point in the history
We avoid intermediate ByteString copy by not doing
`decode .  TE.encodeUtf8`, but instead working on `Text` value directly.
As we know that the stream is valid Unicode (UTF8 or UTF16),
we can also take some shortcuts.

One gotcha is that internal Text values (in Keys or Value Strings)
most likely retain the original input `Text` value (its Array).
It shouldn't be an issue if the Value is actually decoded so these
`Text` values disapper, but if not (e.g. `Object` keys survive)
then users might want to use `Data.Text.copy`.

With GHC-9.6.2 (text-2.0.2; UTF-8) the speedup is not huge, but
noticeable anyway:

    aeson/strict:                 OK (0.26s)
      462  μs ±  23 μs
    aeson/text:                   OK (0.22s)
      399  μs ±  25 μs
    aeson/text-via-bs:            OK (0.14s)
      473  μs ±  45 μs

With GHC-8.6.5 (text-1.2.3.0; UTF-16) the speedup is relatively more:

    aeson/strict:                 OK (0.22s)
      819  μs ±  74 μs
    aeson/text:                   OK (0.17s)
      593  μs ±  46 μs
    aeson/text-via-bs:            OK (0.23s)
      875  μs ±  62 μs
  • Loading branch information
phadej committed Oct 3, 2023
1 parent 54d3c33 commit 297caa5
Show file tree
Hide file tree
Showing 229 changed files with 401,634 additions and 13 deletions.
3 changes: 3 additions & 0 deletions aeson.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ library
Data.Aeson.Decoding
Data.Aeson.Decoding.ByteString
Data.Aeson.Decoding.ByteString.Lazy
Data.Aeson.Decoding.Text
Data.Aeson.Decoding.Tokens
Data.Aeson.Encoding
Data.Aeson.Encoding.Internal
Expand All @@ -80,7 +81,9 @@ library
Data.Aeson.Internal.Text
Data.Aeson.Internal.TH
Data.Aeson.Internal.Unescape
Data.Aeson.Internal.UnescapeFromText
Data.Aeson.Internal.Word8
Data.Aeson.Internal.Word16
Data.Aeson.Parser.Time
Data.Aeson.Types.Class
Data.Aeson.Types.FromJSON
Expand Down
14 changes: 14 additions & 0 deletions benchmarks/bench/CompareWithJSON.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import qualified Data.Aeson.Text as A
import qualified Data.Aeson.Parser.Internal as I
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy.Encoding as TLE
Expand Down Expand Up @@ -52,6 +54,12 @@ decodeS s = fromMaybe (error "fail to parse via Aeson") $ A.decodeStrict s
decodeS' :: BS.ByteString -> A.Value
decodeS' s = fromMaybe (error "fail to parse via Aeson") $ A.decodeStrict' s

decodeT :: T.Text -> A.Value
decodeT t = fromMaybe (error "fail to parse via Aeson") $ A.decodeStrictText t

decodeTviaBS :: T.Text -> A.Value
decodeTviaBS t = fromMaybe (error "fail to parse via Aeson") $ A.decodeStrict $ TE.encodeUtf8 t

decodeAtto :: BL.ByteString -> A.Value
decodeAtto s = fromMaybe (error "fail to parse via Parser.decodeWith") $
I.decodeWith I.jsonEOF A.fromJSON s
Expand Down Expand Up @@ -82,9 +90,11 @@ benchmark =
env (readL enFile) $ \enA ->
env (readS enFile) $ \enS ->
env (readStr enFile) $ \enJ ->
env (readT enFile) $ \enT ->
env (readL jpFile) $ \jpA ->
env (readS jpFile) $ \jpS ->
env (readStr jpFile) $ \jpJ ->
env (readT jpFile) $ \jpT ->
bgroup "compare-json" [
bgroup "decode" [
bgroup "whnf" [
Expand All @@ -94,6 +104,8 @@ benchmark =
, bench "aeson/normal'" $ whnf decode' enA
, bench "aeson/strict" $ whnf decodeS enS
, bench "aeson/strict'" $ whnf decodeS' enS
, bench "aeson/text" $ whnf decodeT enT
, bench "aeson/text-via-bs" $ whnf decodeTviaBS enT

-- attoparsec-aeson package
, bench "aeson/atto" $ whnf decodeAtto enA
Expand Down Expand Up @@ -123,6 +135,8 @@ benchmark =
, bgroup "jp" [
bench "aeson/normal" $ whnf decode jpA
, bench "aeson/strict" $ whnf decodeS jpS
, bench "aeson/text" $ whnf decodeT jpT
, bench "aeson/text-via-bs" $ whnf decodeTviaBS jpT
, bench "json" $ whnf decodeJ jpJ
]
]
Expand Down
12 changes: 8 additions & 4 deletions benchmarks/bench/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,25 @@ import qualified Data.Aeson as A
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE

readStr :: FilePath -> IO String
readStr fp = do
dataDir <- lookupEnv "AESON_BENCH_DATADIR"
fmap BS8.unpack $ BS.readFile $ fromMaybe "json-data" dataDir </> fp
fmap BS8.unpack $ readS fp

readS :: FilePath -> IO BS.ByteString
readS fp = do
dataDir <- lookupEnv "AESON_BENCH_DATADIR"
BS.readFile $ fromMaybe "json-data" dataDir </> fp
BS.readFile $ fromMaybe "benchmarks/json-data" dataDir </> fp

readL :: FilePath -> IO LBS.ByteString
readL fp = do
dataDir <- lookupEnv "AESON_BENCH_DATADIR"
LBS.readFile $ fromMaybe "json-data" dataDir </> fp
LBS.readFile $ fromMaybe "benchmarks/json-data" dataDir </> fp

readT :: FilePath -> IO T.Text
readT fp = fmap TE.decodeUtf8 $ readS fp

readV :: A.FromJSON a => FilePath -> IO a
readV fileName = do
Expand Down
5 changes: 5 additions & 0 deletions src/Data/Aeson.hs
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,14 @@ module Data.Aeson
, eitherDecodeFileStrict
, eitherDecodeStrict'
, eitherDecodeFileStrict'
-- ** Variants for strict text
, decodeStrictText
, eitherDecodeStrictText
-- ** Exception throwing variants
, AesonException (..)
, throwDecode
, throwDecodeStrict
, throwDecodeStrictText
, throwDecode'
, throwDecodeStrict'
-- * Core JSON types
Expand Down Expand Up @@ -171,6 +175,7 @@ import Data.Aeson.Types
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import Data.Aeson.Decoding (decode, eitherDecode, throwDecode, decodeStrict, eitherDecodeStrict, throwDecodeStrict)
import Data.Aeson.Decoding (decodeStrictText, eitherDecodeStrictText, throwDecodeStrictText)

-- $setup
-- >>> :set -XOverloadedStrings
Expand Down
57 changes: 49 additions & 8 deletions src/Data/Aeson/Decoding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ module Data.Aeson.Decoding (
decodeStrict,
eitherDecodeStrict,
throwDecodeStrict,
decodeStrictText,
eitherDecodeStrictText,
throwDecodeStrictText,
toEitherValue,
unescapeText,
) where
Expand All @@ -17,11 +20,13 @@ import Control.Monad.Catch (MonadThrow (..))
import Data.Aeson.Types.Internal (AesonException (..), formatError)

import qualified Data.Aeson.Types as A
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as T

import Data.Aeson.Decoding.ByteString
import Data.Aeson.Decoding.ByteString.Lazy
import Data.Aeson.Decoding.Text
import Data.Aeson.Decoding.Conversion
import Data.Aeson.Internal.Unescape (unescapeText)

Expand All @@ -32,23 +37,23 @@ import Data.Aeson.Internal.Unescape (unescapeText)
-- | Efficiently deserialize a JSON value from a strict 'B.ByteString'.
-- If this fails due to incomplete or invalid input, 'Nothing' is
-- returned.
decodeStrict :: (A.FromJSON a) => B.ByteString -> Maybe a
decodeStrict :: (A.FromJSON a) => BS.ByteString -> Maybe a
decodeStrict bs = unResult (toResultValue (bsToTokens bs)) (\_ -> Nothing) $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| bsSpace bs' -> Just x
| otherwise -> Nothing
A.IError _ _ -> Nothing

-- | Like 'decodeStrict' but returns an error message when decoding fails.
eitherDecodeStrict :: (A.FromJSON a) => B.ByteString -> Either String a
eitherDecodeStrict :: (A.FromJSON a) => BS.ByteString -> Either String a
eitherDecodeStrict bs = unResult (toResultValue (bsToTokens bs)) Left $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| bsSpace bs' -> Right x
| otherwise -> Left "Trailing garbage"
A.IError path msg -> Left $ formatError path msg

-- | Like 'decodeStrict' but throws an 'AesonException' when decoding fails.
throwDecodeStrict :: forall a m. (A.FromJSON a, MonadThrow m) => B.ByteString -> m a
throwDecodeStrict :: forall a m. (A.FromJSON a, MonadThrow m) => BS.ByteString -> m a
throwDecodeStrict bs = unResult (toResultValue (bsToTokens bs)) (throwM . AesonException) $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| bsSpace bs' -> pure x
Expand All @@ -62,15 +67,15 @@ throwDecodeStrict bs = unResult (toResultValue (bsToTokens bs)) (throwM . AesonE
-- | Efficiently deserialize a JSON value from a lazy 'L.ByteString'.
-- If this fails due to incomplete or invalid input, 'Nothing' is
-- returned.
decode :: (A.FromJSON a) => L.ByteString -> Maybe a
decode :: (A.FromJSON a) => LBS.ByteString -> Maybe a
decode bs = unResult (toResultValue (lbsToTokens bs)) (\_ -> Nothing) $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| lbsSpace bs' -> Just x
| otherwise -> Nothing
A.IError _ _ -> Nothing

-- | Like 'decode' but returns an error message when decoding fails.
eitherDecode :: (A.FromJSON a) => L.ByteString -> Either String a
eitherDecode :: (A.FromJSON a) => LBS.ByteString -> Either String a
eitherDecode bs = unResult (toResultValue (lbsToTokens bs)) Left $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| lbsSpace bs' -> Right x
Expand All @@ -80,9 +85,45 @@ eitherDecode bs = unResult (toResultValue (lbsToTokens bs)) Left $ \v bs' -> cas
-- | Like 'decode' but throws an 'AesonException' when decoding fails.
--
-- 'throwDecode' is in @aeson@ since 2.1.2.0, but this variant is added later.
throwDecode :: forall a m. (A.FromJSON a, MonadThrow m) => L.ByteString -> m a
throwDecode :: forall a m. (A.FromJSON a, MonadThrow m) => LBS.ByteString -> m a
throwDecode bs = unResult (toResultValue (lbsToTokens bs)) (throwM . AesonException) $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| lbsSpace bs' -> pure x
| otherwise -> throwM $ AesonException "Trailing garbage"
A.IError path msg -> throwM $ AesonException $ formatError path msg

-------------------------------------------------------------------------------
-- Decoding: strict text
-------------------------------------------------------------------------------

-- | Efficiently deserialize a JSON value from a strict 'B.ByteString'.
-- If this fails due to incomplete or invalid input, 'Nothing' is
-- returned.
--
-- @since 2.2.1.0
decodeStrictText :: (A.FromJSON a) => T.Text -> Maybe a
decodeStrictText bs = unResult (toResultValue (textToTokens bs)) (\_ -> Nothing) $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| textSpace bs' -> Just x
| otherwise -> Nothing
A.IError _ _ -> Nothing

-- | Like 'decodeStrictText' but returns an error message when decoding fails.
--
-- @since 2.2.1.0
eitherDecodeStrictText :: (A.FromJSON a) => T.Text -> Either String a
eitherDecodeStrictText bs = unResult (toResultValue (textToTokens bs)) Left $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| textSpace bs' -> Right x
| otherwise -> Left "Trailing garbage"
A.IError path msg -> Left $ formatError path msg

-- | Like 'decodeStrictText' but throws an 'AesonException' when decoding fails.
--
-- @since 2.2.1.0
throwDecodeStrictText :: forall a m. (A.FromJSON a, MonadThrow m) => T.Text -> m a
throwDecodeStrictText bs = unResult (toResultValue (textToTokens bs)) (throwM . AesonException) $ \v bs' -> case A.ifromJSON v of
A.ISuccess x
| textSpace bs' -> pure x
| otherwise -> throwM $ AesonException "Trailing garbage"
A.IError path msg -> throwM $ AesonException $ formatError path msg
2 changes: 1 addition & 1 deletion src/Data/Aeson/Decoding/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import Data.Aeson.Internal.Word8

-- | Lex (and parse) strict 'ByteString' into 'Tokens' stream.
--
-- @since 2.1.2.0
-- @since 2.2.1.0
--
bsToTokens :: ByteString -> Tokens ByteString String
bsToTokens bs0 = goT bs0 id where
Expand Down
10 changes: 10 additions & 0 deletions src/Data/Aeson/Decoding/Conversion.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
module Data.Aeson.Decoding.Conversion (
bsSpace,
lbsSpace,
textSpace,
ltextSpace,
toEitherValue,
toResultValue,
Result (..),
Expand All @@ -16,6 +18,8 @@ import qualified Data.Aeson.Types.Internal as A
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Vector as V
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT

import Data.Aeson.Decoding.Tokens

Expand All @@ -25,6 +29,12 @@ bsSpace = B.all (\w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09)
lbsSpace :: L.ByteString -> Bool
lbsSpace = L.all (\w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09)

textSpace :: T.Text -> Bool
textSpace = T.all (\c -> c == ' ' || c == '\r' || c == '\n' || c == '\t')

ltextSpace :: LT.Text -> Bool
ltextSpace = LT.all (\c -> c == ' ' || c == '\r' || c == '\n' || c == '\t')

-- | Convert 'Tokens' to 'A.Value'.
--
-- The resulting value will be in normal form if its forced.
Expand Down
Loading

0 comments on commit 297caa5

Please sign in to comment.