Skip to content

Add handling of duplicate keys #714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion Data/Aeson/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,20 @@ module Data.Aeson.Parser
, value
, jstring
, scientific
-- ** Handling objects with duplicate keys
, jsonWith
, jsonLast
, jsonAccum
, jsonNoDup
-- * Strict parsers
-- $strict
, json'
, value'
-- ** Handling objects with duplicate keys
, jsonWith'
, jsonLast'
, jsonAccum'
, jsonNoDup'
-- * Decoding without FromJSON instances
, decodeWith
, decodeStrictWith
Expand All @@ -47,7 +57,7 @@ module Data.Aeson.Parser
) where


import Data.Aeson.Parser.Internal (decodeStrictWith, decodeWith, eitherDecodeStrictWith, eitherDecodeWith, json, json', jstring, scientific, value, value')
import Data.Aeson.Parser.Internal

-- $lazy
--
Expand Down
183 changes: 149 additions & 34 deletions Data/Aeson/Parser/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#if MIN_VERSION_ghc_prim(0,3,1)
{-# LANGUAGE MagicHash #-}
#endif
#if __GLASGOW_HASKELL__ <= 710 && __GLASGOW_HASKELL__ >= 706
-- Work around a compiler bug
{-# OPTIONS_GHC -fsimpl-tick-factor=200 #-}
#endif
-- |
-- Module: Data.Aeson.Parser.Internal
-- Copyright: (c) 2011-2016 Bryan O'Sullivan
Expand All @@ -21,30 +25,43 @@ module Data.Aeson.Parser.Internal
(
-- * Lazy parsers
json, jsonEOF
, jsonWith
, jsonLast
, jsonAccum
, jsonNoDup
, value
, jstring
, jstring_
, scientific
-- * Strict parsers
, json', jsonEOF'
, jsonWith'
, jsonLast'
, jsonAccum'
, jsonNoDup'
, value'
-- * Helpers
, decodeWith
, decodeStrictWith
, eitherDecodeWith
, eitherDecodeStrictWith
-- ** Handling objects with duplicate keys
, fromListAccum
, parseListNoDup
) where

import Prelude.Compat

import Control.Applicative ((<|>))
import Control.Monad (void, when)
import Data.Aeson.Types.Internal (IResult(..), JSONPath, Result(..), Value(..))
import Data.Aeson.Types.Internal (IResult(..), JSONPath, Object, Result(..), Value(..))
import Data.Attoparsec.ByteString.Char8 (Parser, char, decimal, endOfInput, isDigit_w8, signed, string)
import Data.Function (fix)
import Data.Functor.Compat (($>))
import Data.Scientific (Scientific)
import Data.Text (Text)
import Data.Vector as Vector (Vector, empty, fromListN, reverse)
import Data.Vector (Vector)
import qualified Data.Vector as Vector (empty, fromList, fromListN, reverse)
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.Lazy as L
import qualified Data.ByteString as B
Expand Down Expand Up @@ -76,7 +93,7 @@ import qualified Data.Text.Encoding as TE
#define C_n 110
#define C_t 116

-- | Parse a top-level JSON value.
-- | Parse any JSON value.
--
-- The conversion of a parsed value to a Haskell value is deferred
-- until the Haskell value is needed. This may improve performance if
Expand All @@ -86,10 +103,15 @@ import qualified Data.Text.Encoding as TE
-- This function is an alias for 'value'. In aeson 0.8 and earlier, it
-- parsed only object or array types, in conformance with the
-- now-obsolete RFC 4627.
--
-- ==== Warning
--
-- If an object contains duplicate keys, only the first one will be kept.
-- For a more flexible alternative, see 'jsonWith'.
json :: Parser Value
json = value

-- | Parse a top-level JSON value.
-- | Parse any JSON value.
--
-- This is a strict version of 'json' which avoids building up thunks
-- during parsing; it performs all conversions immediately. Prefer
Expand All @@ -98,23 +120,38 @@ json = value
-- This function is an alias for 'value''. In aeson 0.8 and earlier, it
-- parsed only object or array types, in conformance with the
-- now-obsolete RFC 4627.
--
-- ==== Warning
--
-- If an object contains duplicate keys, only the first one will be kept.
-- For a more flexible alternative, see 'jsonWith''.
json' :: Parser Value
json' = value'

object_ :: Parser Value
object_ = {-# SCC "object_" #-} Object <$> objectValues jstring value
-- Open recursion: object_, object_', array_, array_' are parameterized by the
-- toplevel Value parser to be called recursively, to keep the parameter
-- hFromList outside of the recursive loop for proper inlining.

object_ :: HFromList -> Parser Value -> Parser Value
object_ hFromList val = {-# SCC "object_" #-} Object <$> objectValues hFromList jstring val
{-# INLINE object_ #-}

object_' :: Parser Value
object_' = {-# SCC "object_'" #-} do
!vals <- objectValues jstring' value'
object_' :: HFromList -> Parser Value -> Parser Value
object_' hFromList val' = {-# SCC "object_'" #-} do
!vals <- objectValues hFromList jstring' val'
return (Object vals)
where
jstring' = do
!s <- jstring
return s
{-# INLINE object_' #-}

objectValues :: Parser Text -> Parser Value -> Parser (H.HashMap Text Value)
objectValues str val = do
-- The object parser 'objectValues' is parameterized by the object constructor.
-- See also 'jsonWith'.
type HFromList = [(Text, Value)] -> Parser Object
Copy link
Collaborator

@phadej phadej Jun 23, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd have Either String Object as a return type. Parser is "too big".

And actually wouldn't use a type synonym, hFromList is confusing naming, mkObject is what it is

EDIT: I see HFromList is because of HashMap.fromList, too far. mkObject is a domain specific, thus better.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I forgot we're in attoparsec's Parser, not aeson's Parser (which is closer to Either). That's a fair point about the synonym, I'll remove it. Thanks for the review!


objectValues :: HFromList -> Parser Text -> Parser Value -> Parser (H.HashMap Text Value)
objectValues hFromList str val = do
skipSpace
w <- A.peekWord8'
if w == CLOSE_CURLY
Expand All @@ -130,16 +167,18 @@ objectValues str val = do
let acc' = (k, v) : acc
if ch == COMMA
then skipSpace >> loop acc'
else return (H.fromList acc')
else hFromList acc'
{-# INLINE objectValues #-}

array_ :: Parser Value
array_ = {-# SCC "array_" #-} Array <$> arrayValues value
array_ :: Parser Value -> Parser Value
array_ val = {-# SCC "array_" #-} Array <$> arrayValues val
{-# INLINE array_ #-}

array_' :: Parser Value
array_' = {-# SCC "array_'" #-} do
!vals <- arrayValues value'
array_' :: Parser Value -> Parser Value
array_' val = {-# SCC "array_'" #-} do
!vals <- arrayValues val
return (Array vals)
{-# INLINE array_' #-}

arrayValues :: Parser Value -> Parser (Vector Value)
arrayValues val = do
Expand All @@ -157,42 +196,104 @@ arrayValues val = do
else return (Vector.reverse (Vector.fromListN len (v:acc)))
{-# INLINE arrayValues #-}

-- | Parse any JSON value. You should usually 'json' in preference to
-- this function, as this function relaxes the object-or-array
-- requirement of RFC 4627.
--
-- In particular, be careful in using this function if you think your
-- code might interoperate with Javascript. A na&#xef;ve Javascript
-- library that parses JSON data using @eval@ is vulnerable to attack
-- unless the encoded data represents an object or an array. JSON
-- implementations in other languages conform to that same restriction
-- to preserve interoperability and security.
-- | Parse any JSON value. Synonym of 'json'.
value :: Parser Value
value = do
value = jsonWith (pure . H.fromList)

-- | Parse any JSON value.
--
-- This parser is parameterized by a function to construct an 'Object'
-- from a raw list of key-value pairs, where duplicates are preserved.
-- The pairs appear in __reverse order__ from the source.
--
-- ==== __Examples__
--
-- 'json' keeps only the first occurence of each key, using 'HashMap.Lazy.fromList'.
--
-- @
-- 'json' = 'jsonWith' ('pure' '.' 'H.fromList')
-- @
--
-- 'jsonLast' keeps the last occurence of each key, using
-- @'HashMap.Lazy.fromListWith' ('const' 'id')@.
--
-- @
-- 'jsonLast' = 'jsonWith' ('pure' '.' 'HashMap.Lazy.fromListWith' ('const' 'id'))
-- @
--
-- 'jsonAccum' keeps wraps all values in arrays to keep duplicates, using
-- 'fromListAccum'.
--
-- @
-- 'jsonAccum' = 'jsonWith' ('pure' . 'fromListAccum')
-- @
--
-- 'jsonNoDup' fails if any object contains duplicate keys, using 'parseListNoDup'.
--
-- @
-- 'jsonNoDup' = 'jsonWith' 'parseListNoDup'
-- @
jsonWith :: ([(Text, Value)] -> Parser Object) -> Parser Value
jsonWith hFromList = fix $ \value_ -> do
skipSpace
w <- A.peekWord8'
case w of
DOUBLE_QUOTE -> A.anyWord8 *> (String <$> jstring_)
OPEN_CURLY -> A.anyWord8 *> object_
OPEN_SQUARE -> A.anyWord8 *> array_
OPEN_CURLY -> A.anyWord8 *> object_ hFromList value_
OPEN_SQUARE -> A.anyWord8 *> array_ value_
C_f -> string "false" $> Bool False
C_t -> string "true" $> Bool True
C_n -> string "null" $> Null
_ | w >= 48 && w <= 57 || w == 45
-> Number <$> scientific
| otherwise -> fail "not a valid json value"
{-# INLINE jsonWith #-}

-- | Variant of 'json' which keeps only the last occurence of every key.
jsonLast :: Parser Value
jsonLast = jsonWith (pure . H.fromListWith (const id))

-- | Variant of 'json' wrapping all object mappings in 'Array' to preserve
-- key-value pairs with the same keys.
jsonAccum :: Parser Value
jsonAccum = jsonWith (pure . fromListAccum)

-- | Variant of 'json' which fails if any object contains duplicate keys.
jsonNoDup :: Parser Value
jsonNoDup = jsonWith parseListNoDup

-- | Strict version of 'value'. See also 'json''.
-- | @'fromListAccum' kvs@ is an object mapping keys to arrays containing all
-- associated values from the original list @kvs@.
--
-- >>> fromListAccum [("apple", Bool True), ("apple", Bool False), ("orange", Bool False)]
-- fromList [("apple", [Bool False, Bool True]), ("orange", [Bool False])]
fromListAccum :: [(Text, Value)] -> Object
fromListAccum =
fmap (Array . Vector.fromList . ($ [])) . H.fromListWith (.) . (fmap . fmap) (:)

-- | @'fromListNoDup' kvs@ fails if @kvs@ contains duplicate keys.
parseListNoDup :: [(Text, Value)] -> Parser Object
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

point in case: here the HFromList is not used, so connecting the dots is difficult.

I'd remove the type alias.

parseListNoDup =
H.traverseWithKey unwrap . H.fromListWith (\_ _ -> Nothing) . (fmap . fmap) Just
where
unwrap k Nothing = fail $ "found duplicate key: " ++ show k
unwrap _ (Just v) = pure v

-- | Strict version of 'value'. Synonym of 'json''.
value' :: Parser Value
value' = do
value' = jsonWith' (pure . H.fromList)

-- | Strict version of 'jsonWith'.
jsonWith' :: ([(Text, Value)] -> Parser Object) -> Parser Value
jsonWith' hFromList = fix $ \value_ -> do
skipSpace
w <- A.peekWord8'
case w of
DOUBLE_QUOTE -> do
!s <- A.anyWord8 *> jstring_
return (String s)
OPEN_CURLY -> A.anyWord8 *> object_'
OPEN_SQUARE -> A.anyWord8 *> array_'
OPEN_CURLY -> A.anyWord8 *> object_' hFromList value_
OPEN_SQUARE -> A.anyWord8 *> array_' value_
C_f -> string "false" $> Bool False
C_t -> string "true" $> Bool True
C_n -> string "null" $> Null
Expand All @@ -201,6 +302,20 @@ value' = do
!n <- scientific
return (Number n)
| otherwise -> fail "not a valid json value"
{-# INLINE jsonWith' #-}

-- | Variant of 'json'' which keeps only the last occurence of every key.
jsonLast' :: Parser Value
jsonLast' = jsonWith' (pure . H.fromListWith (const id))

-- | Variant of 'json'' wrapping all object mappings in 'Array' to preserve
-- key-value pairs with the same keys.
jsonAccum' :: Parser Value
jsonAccum' = jsonWith' (pure . fromListAccum)

-- | Variant of 'json'' which fails if any object contains duplicate keys.
jsonNoDup' :: Parser Value
jsonNoDup' = jsonWith' parseListNoDup

-- | Parse a quoted JSON string.
jstring :: Parser Text
Expand Down
Loading