Skip to content

Commit f8ad7bb

Browse files
authored
Merge pull request #6594 from phadej/parser-benchmark
Parser benchmark
2 parents dde0d9c + e3dff2a commit f8ad7bb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+524
-393
lines changed

Cabal/Cabal-quickcheck/src/Test/QuickCheck/Instances/Cabal.hs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import Distribution.Types.LibraryName
1717
import Distribution.Types.PackageName
1818
import Distribution.Types.SourceRepo
1919
import Distribution.Types.UnqualComponentName
20+
import Distribution.ModuleName
2021
import Distribution.Types.VersionRange.Internal
2122
import Distribution.Verbosity
2223
import Distribution.Version
@@ -129,6 +130,16 @@ instance Arbitrary VersionIntervals where
129130
instance Arbitrary Bound where
130131
arbitrary = elements [ExclusiveBound, InclusiveBound]
131132

133+
-------------------------------------------------------------------------------
134+
-- ModuleName
135+
-------------------------------------------------------------------------------
136+
137+
instance Arbitrary ModuleName where
138+
arbitrary = fromString . intercalate "." <$> shortListOf1 4 comp where
139+
comp = (:) <$> elements upper <*> shortListOf1 10 (elements moduleChar)
140+
upper = ['A'..'Z']
141+
moduleChar = [ c | c <- ['\0' .. '\255'], isAlphaNum c || c `elem` "_'" ]
142+
132143
-------------------------------------------------------------------------------
133144
-- Dependency
134145
-------------------------------------------------------------------------------

Cabal/Distribution/Backpack.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ instance Pretty OpenUnitId where
117117
--Right (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "foobar"}))
118118
--
119119
-- >>> eitherParsec "foo[Str=text-1.2.3:Data.Text.Text]" :: Either String OpenUnitId
120-
-- Right (IndefFullUnitId (ComponentId "foo") (fromList [(ModuleName ["Str"],OpenModule (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "text-1.2.3"})) (ModuleName ["Data","Text","Text"]))]))
120+
-- Right (IndefFullUnitId (ComponentId "foo") (fromList [(ModuleName "Str",OpenModule (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "text-1.2.3"})) (ModuleName "Data.Text.Text"))]))
121121
--
122122
instance Parsec OpenUnitId where
123123
parsec = P.try parseOpenUnitId <|> fmap DefiniteUnitId parsec
@@ -180,7 +180,7 @@ instance Pretty OpenModule where
180180
-- |
181181
--
182182
-- >>> eitherParsec "Includes2-0.1.0.0-inplace-mysql:Database.MySQL" :: Either String OpenModule
183-
-- Right (OpenModule (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "Includes2-0.1.0.0-inplace-mysql"})) (ModuleName ["Database","MySQL"]))
183+
-- Right (OpenModule (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "Includes2-0.1.0.0-inplace-mysql"})) (ModuleName "Database.MySQL"))
184184
--
185185
instance Parsec OpenModule where
186186
parsec = parsecModuleVar <|> parsecOpenModule

Cabal/Distribution/Compat/DList.hs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,15 @@
1212
module Distribution.Compat.DList (
1313
DList,
1414
runDList,
15+
empty,
1516
singleton,
1617
fromList,
1718
toList,
1819
snoc,
1920
) where
2021

2122
import Prelude ()
22-
import Distribution.Compat.Prelude hiding (toList)
23+
import Distribution.Compat.Prelude hiding (toList, empty)
2324

2425
-- | Difference list.
2526
newtype DList a = DList ([a] -> [a])
@@ -31,6 +32,9 @@ runDList (DList run) = run []
3132
singleton :: a -> DList a
3233
singleton a = DList (a:)
3334

35+
empty :: DList a
36+
empty = DList id
37+
3438
fromList :: [a] -> DList a
3539
fromList as = DList (as ++)
3640

@@ -41,7 +45,7 @@ snoc :: DList a -> a -> DList a
4145
snoc xs x = xs <> singleton x
4246

4347
instance Monoid (DList a) where
44-
mempty = DList id
48+
mempty = empty
4549
mappend = (<>)
4650

4751
instance Semigroup (DList a) where

Cabal/Distribution/FieldGrammar/Described.hs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ module Distribution.FieldGrammar.Described (
2929
-- * Character Sets
3030
csChar,
3131
csAlphaNum,
32+
csUpper,
3233
csNotSpace,
3334
csNotSpaceOrComma,
3435
) where
@@ -128,6 +129,9 @@ csChar = CS.singleton
128129
csAlphaNum :: CS.CharSet
129130
csAlphaNum = CS.alphanum
130131

132+
csUpper :: CS.CharSet
133+
csUpper = CS.upper
134+
131135
csNotSpace :: CS.CharSet
132136
csNotSpace = CS.difference CS.universe $ CS.singleton ' '
133137

Cabal/Distribution/ModuleName.hs

Lines changed: 53 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
{-# LANGUAGE DeriveDataTypeable #-}
2-
{-# LANGUAGE DeriveGeneric #-}
3-
1+
{-# LANGUAGE BangPatterns #-}
2+
{-# LANGUAGE DeriveDataTypeable #-}
3+
{-# LANGUAGE DeriveGeneric #-}
4+
{-# LANGUAGE ScopedTypeVariables #-}
45
-----------------------------------------------------------------------------
56
-- |
67
-- Module : Distribution.ModuleName
@@ -13,7 +14,7 @@
1314
-- Data type for Haskell module names.
1415

1516
module Distribution.ModuleName (
16-
ModuleName (..), -- TODO: move Parsec instance here, don't export constructor
17+
ModuleName,
1718
fromString,
1819
fromComponents,
1920
components,
@@ -33,41 +34,65 @@ import Distribution.Utils.ShortText (ShortText, fromShortText, toShortTex
3334
import System.FilePath (pathSeparator)
3435

3536
import qualified Distribution.Compat.CharParsing as P
37+
import qualified Distribution.Compat.DList as DList
3638
import qualified Text.PrettyPrint as Disp
3739

3840
-- | A valid Haskell module name.
3941
--
40-
newtype ModuleName = ModuleName ShortTextLst
42+
newtype ModuleName = ModuleName ShortText
4143
deriving (Eq, Generic, Ord, Read, Show, Typeable, Data)
4244

45+
unModuleName :: ModuleName -> String
46+
unModuleName (ModuleName s) = fromShortText s
47+
4348
instance Binary ModuleName
4449
instance Structured ModuleName
4550

4651
instance NFData ModuleName where
4752
rnf (ModuleName ms) = rnf ms
4853

4954
instance Pretty ModuleName where
50-
pretty (ModuleName ms) =
51-
Disp.hcat (intersperse (Disp.char '.') (map Disp.text $ stlToStrings ms))
55+
pretty = Disp.text . unModuleName
5256

5357
instance Parsec ModuleName where
54-
parsec = fromComponents <$> toList <$> P.sepByNonEmpty component (P.char '.')
55-
where
56-
component = do
57-
c <- P.satisfy isUpper
58-
cs <- P.munch validModuleChar
59-
return (c:cs)
58+
parsec = parsecModuleName
59+
60+
parsecModuleName :: forall m. CabalParsing m => m ModuleName
61+
parsecModuleName = state0 DList.empty where
62+
upper :: m Char
63+
!upper = P.satisfy isUpper
64+
65+
ch :: m Char
66+
!ch = P.satisfy (\c -> validModuleChar c || c == '.')
67+
68+
alt :: m ModuleName -> m ModuleName -> m ModuleName
69+
!alt = (<|>)
70+
71+
state0 :: DList.DList Char -> m ModuleName
72+
state0 acc = do
73+
c <- upper
74+
state1 (DList.snoc acc c)
75+
76+
state1 :: DList.DList Char -> m ModuleName
77+
state1 acc = state1' acc `alt` return (fromString (DList.toList acc))
78+
79+
state1' :: DList.DList Char -> m ModuleName
80+
state1' acc = do
81+
c <- ch
82+
case c of
83+
'.' -> state0 (DList.snoc acc c)
84+
_ -> state1 (DList.snoc acc c)
6085

6186
instance Described ModuleName where
62-
describe _ = RETodo
87+
describe _ = REMunch1 (reChar '.') component where
88+
component = RECharSet csUpper <> reMunchCS (csAlphaNum <> fromString "_'")
6389

6490
validModuleChar :: Char -> Bool
6591
validModuleChar c = isAlphaNum c || c == '_' || c == '\''
6692

6793
validModuleComponent :: String -> Bool
6894
validModuleComponent [] = False
69-
validModuleComponent (c:cs) = isUpper c
70-
&& all validModuleChar cs
95+
validModuleComponent (c:cs) = isUpper c && all validModuleChar cs
7196

7297
-- | Construct a 'ModuleName' from a valid module name 'String'.
7398
--
@@ -76,77 +101,36 @@ validModuleComponent (c:cs) = isUpper c
76101
-- are parsing user input then use 'Distribution.Text.simpleParse' instead.
77102
--
78103
instance IsString ModuleName where
79-
fromString string = fromComponents (split string)
80-
where
81-
split cs = case break (=='.') cs of
82-
(chunk,[]) -> chunk : []
83-
(chunk,_:rest) -> chunk : split rest
104+
fromString = ModuleName . toShortText
84105

85106
-- | Construct a 'ModuleName' from valid module components, i.e. parts
86107
-- separated by dots.
87108
fromComponents :: [String] -> ModuleName
88-
fromComponents components'
89-
| null components' = error zeroComponents
90-
| all validModuleComponent components' = ModuleName (stlFromStrings components')
91-
| otherwise = error badName
92-
where
93-
zeroComponents = "ModuleName.fromComponents: zero components"
94-
badName = "ModuleName.fromComponents: invalid components " ++ show components'
109+
fromComponents comps = fromString (intercalate "." comps)
110+
{-# DEPRECATED fromComponents "Exists for cabal-install only" #-}
95111

96112
-- | The module name @Main@.
97113
--
98114
main :: ModuleName
99-
main = ModuleName (stlFromStrings ["Main"])
115+
main = ModuleName (fromString "Main")
100116

101117
-- | The individual components of a hierarchical module name. For example
102118
--
103119
-- > components (fromString "A.B.C") = ["A", "B", "C"]
104120
--
105121
components :: ModuleName -> [String]
106-
components (ModuleName ms) = stlToStrings ms
122+
components mn = split (unModuleName mn)
123+
where
124+
split cs = case break (=='.') cs of
125+
(chunk,[]) -> chunk : []
126+
(chunk,_:rest) -> chunk : split rest
107127

108128
-- | Convert a module name to a file path, but without any file extension.
109129
-- For example:
110130
--
111131
-- > toFilePath (fromString "A.B.C") = "A/B/C"
112132
--
113133
toFilePath :: ModuleName -> FilePath
114-
toFilePath = intercalate [pathSeparator] . components
115-
116-
----------------------------------------------------------------------------
117-
-- internal helper
118-
119-
-- | Strict/unpacked representation of @[ShortText]@
120-
data ShortTextLst = STLNil
121-
| STLCons !ShortText !ShortTextLst
122-
deriving (Eq, Generic, Ord, Typeable, Data)
123-
124-
instance NFData ShortTextLst where
125-
rnf = flip seq ()
126-
127-
instance Show ShortTextLst where
128-
showsPrec p = showsPrec p . stlToList
129-
130-
131-
instance Read ShortTextLst where
132-
readsPrec p = map (first stlFromList) . readsPrec p
133-
134-
instance Binary ShortTextLst where
135-
put = put . stlToList
136-
get = stlFromList <$> get
137-
138-
instance Structured ShortTextLst
139-
140-
stlToList :: ShortTextLst -> [ShortText]
141-
stlToList STLNil = []
142-
stlToList (STLCons st next) = st : stlToList next
143-
144-
stlToStrings :: ShortTextLst -> [String]
145-
stlToStrings = map fromShortText . stlToList
146-
147-
stlFromList :: [ShortText] -> ShortTextLst
148-
stlFromList [] = STLNil
149-
stlFromList (x:xs) = STLCons x (stlFromList xs)
150-
151-
stlFromStrings :: [String] -> ShortTextLst
152-
stlFromStrings = stlFromList . map toShortText
134+
toFilePath = map f . unModuleName where
135+
f '.' = pathSeparator
136+
f c = c

Cabal/Distribution/PackageDescription/Parsec.hs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,13 @@ import Distribution.PackageDescription
5454
import Distribution.PackageDescription.Configuration (freeVars)
5555
import Distribution.PackageDescription.FieldGrammar
5656
import Distribution.PackageDescription.Quirks (patchQuirks)
57-
import Distribution.Parsec (parsec, simpleParsec)
57+
import Distribution.Parsec (parsec, simpleParsecBS)
5858
import Distribution.Parsec.FieldLineStream (fieldLineStreamFromBS)
5959
import Distribution.Parsec.Newtypes (CommaFSep, List, SpecVersion (..), Token)
6060
import Distribution.Parsec.Position (Position (..), zeroPos)
6161
import Distribution.Parsec.Warning (PWarnType (..))
6262
import Distribution.Pretty (prettyShow)
63-
import Distribution.Simple.Utils (fromUTF8BS)
63+
import Distribution.Simple.Utils (fromUTF8BS, toUTF8BS)
6464
import Distribution.Types.CondTree
6565
import Distribution.Types.Dependency (Dependency)
6666
import Distribution.Types.ForeignLib
@@ -109,12 +109,12 @@ parseGenericPackageDescription bs = do
109109
"Unsupported cabal-version. See https://github.com/haskell/cabal/issues/4899."
110110
_ -> pure ()
111111

112-
case readFields' bs' of
112+
case readFields' bs'' of
113113
Right (fs, lexWarnings) -> do
114114
when patched $
115115
parseWarning zeroPos PWTQuirkyCabalFile "Legacy cabal file"
116116
-- UTF8 is validated in a prepass step, afterwards parsing is lenient.
117-
parseGenericPackageDescription' ver lexWarnings (validateUTF8 bs') fs
117+
parseGenericPackageDescription' ver lexWarnings invalidUtf8 fs
118118
-- TODO: better marshalling of errors
119119
Left perr -> parseFatalFailure pos (show perr) where
120120
ppos = P.errorPos perr
@@ -123,6 +123,14 @@ parseGenericPackageDescription bs = do
123123
(patched, bs') = patchQuirks bs
124124
ver = scanSpecVersion bs'
125125

126+
invalidUtf8 = validateUTF8 bs'
127+
128+
-- if there are invalid utf8 characters, we make the bytestring valid.
129+
bs'' = case invalidUtf8 of
130+
Nothing -> bs'
131+
Just _ -> toUTF8BS (fromUTF8BS bs')
132+
133+
126134
-- | 'Maybe' variant of 'parseGenericPackageDescription'
127135
parseGenericPackageDescriptionMaybe :: BS.ByteString -> Maybe GenericPackageDescription
128136
parseGenericPackageDescriptionMaybe =
@@ -851,7 +859,7 @@ scanSpecVersion bs = do
851859
--
852860
-- This is currently more tolerant regarding leading 0 digits.
853861
--
854-
ver <- simpleParsec (BS8.unpack vers)
862+
ver <- simpleParsecBS vers
855863
guard $ case versionNumbers ver of
856864
[_,_] -> True
857865
[_,_,_] -> True

0 commit comments

Comments
 (0)