|
| 1 | +{-# LANGUAGE OverloadedStrings #-} |
| 2 | +{-# LANGUAGE ScopedTypeVariables #-} |
| 3 | +module Distribution.FieldGrammar.Described ( |
| 4 | + Described (..), |
| 5 | + describeDoc, |
| 6 | + -- * Regular expressions |
| 7 | + Regex (..), |
| 8 | + RTerm (..), |
| 9 | + reHsString, |
| 10 | + reEps, |
| 11 | + reChar, |
| 12 | + reChars, |
| 13 | + reDot, |
| 14 | + reComma, |
| 15 | + reSpacedComma, |
| 16 | + reMunchCS, |
| 17 | + reMunch1CS, |
| 18 | + -- * Character Sets |
| 19 | + csChar, |
| 20 | + csAlphaNum, |
| 21 | + csNotSpace, |
| 22 | + csNotSpaceOrComma, |
| 23 | + -- * Pretty-printing |
| 24 | + regexDoc, |
| 25 | + -- * Generation |
| 26 | + generate, |
| 27 | + ) where |
| 28 | + |
| 29 | +import Data.Char (isAlphaNum, isControl) |
| 30 | +import Data.String (IsString (..)) |
| 31 | +import Distribution.Compat.Prelude |
| 32 | +import Prelude () |
| 33 | + |
| 34 | +import Distribution.Parsec (Parsec) |
| 35 | +import Distribution.Pretty (Pretty) |
| 36 | + |
| 37 | +import qualified Distribution.Utils.AnsiCharSet as ACS |
| 38 | +import qualified Text.PrettyPrint as PP |
| 39 | + |
| 40 | +-- | Class describing the pretty/parsec format of a. |
| 41 | +class (Pretty a, Parsec a) => Described a where |
| 42 | + -- | A pretty document of "regex" describing the field format |
| 43 | + describe :: proxy a -> Regex RTerm |
| 44 | + |
| 45 | +-- | Pretty-print description. |
| 46 | +-- |
| 47 | +-- >>> describeDoc ([] :: [Bool]) |
| 48 | +-- True|False |
| 49 | +describeDoc :: Described a => proxy a -> PP.Doc |
| 50 | +describeDoc p = regexDoc (describe p) |
| 51 | + |
| 52 | +instance Described Bool where |
| 53 | + describe _ = REUnion ["True", "False"] |
| 54 | + |
| 55 | +instance Described a => Described (Identity a) where |
| 56 | + describe _ = describe ([] :: [a]) |
| 57 | + |
| 58 | +------------------------------------------------------------------------------- |
| 59 | +-- Regex |
| 60 | +------------------------------------------------------------------------------- |
| 61 | + |
| 62 | +-- | Regular expressions tuned for 'Described' use-case. |
| 63 | +data Regex a |
| 64 | + = REAppend [Regex a] -- ^ append @ab@ |
| 65 | + | REUnion [Regex a] -- ^ union @a|b@ |
| 66 | + | REMunch (Regex a) (Regex a) -- ^ star @a*@, with a separator |
| 67 | + | REMunch1 (Regex a) (Regex a) -- ^ plus @a+@, with a separator |
| 68 | + | REOpt (Regex a) -- ^ optional @r?@ |
| 69 | + | REString String -- ^ literal string @abcd@ |
| 70 | + | RECharSet ACS.AnsiCharSet -- ^ charset @[:alnum:]@ |
| 71 | + | RESpaces -- ^ zero-or-more spaces |
| 72 | + | RESpaces1 -- ^ one-or-more spaces |
| 73 | + | REVar a -- ^ variable |
| 74 | + | RELet String (Regex a) |
| 75 | + (Regex (Maybe a)) -- ^ named expression |
| 76 | + | RERec String (Regex (Maybe a)) -- ^ recursive expressions |
| 77 | + |
| 78 | + | RETodo -- ^ unspecified |
| 79 | + deriving (Eq, Ord, Show) |
| 80 | + |
| 81 | +-- | Terminals used by field grammars. |
| 82 | +data RTerm |
| 83 | + = RHaskellString |
| 84 | + | RUnqualName |
| 85 | + deriving (Eq, Ord, Show) |
| 86 | + |
| 87 | +reHsString :: Regex RTerm |
| 88 | +reHsString = REVar RHaskellString |
| 89 | + |
| 90 | +reEps :: Regex a |
| 91 | +reEps = REAppend [] |
| 92 | + |
| 93 | +reChar :: Char -> Regex a |
| 94 | +reChar = RECharSet . ACS.singleton |
| 95 | + |
| 96 | +reChars :: [Char] -> Regex a |
| 97 | +reChars = RECharSet . ACS.fromList |
| 98 | + |
| 99 | +reDot :: Regex a |
| 100 | +reDot = reChar '.' |
| 101 | + |
| 102 | +reComma :: Regex a |
| 103 | +reComma = reChar ',' |
| 104 | + |
| 105 | +reSpacedComma :: Regex a |
| 106 | +reSpacedComma = RESpaces <> reComma <> RESpaces |
| 107 | + |
| 108 | +reMunch1CS :: ACS.AnsiCharSet -> Regex a |
| 109 | +reMunch1CS = REMunch1 reEps . RECharSet |
| 110 | + |
| 111 | +reMunchCS :: ACS.AnsiCharSet -> Regex a |
| 112 | +reMunchCS = REMunch reEps . RECharSet |
| 113 | + |
| 114 | +instance IsString (Regex a) where |
| 115 | + fromString = REString |
| 116 | + |
| 117 | +instance Semigroup (Regex a) where |
| 118 | + x <> y = REAppend (unAppend x ++ unAppend y) where |
| 119 | + unAppend (REAppend rs) = rs |
| 120 | + unAppend r = [r] |
| 121 | + |
| 122 | +instance Monoid (Regex a) where |
| 123 | + mempty = REAppend [] |
| 124 | + mappend = (<>) |
| 125 | + |
| 126 | +------------------------------------------------------------------------------- |
| 127 | +-- Character sets |
| 128 | +------------------------------------------------------------------------------- |
| 129 | + |
| 130 | +csChar :: Char -> ACS.AnsiCharSet |
| 131 | +csChar = ACS.singleton |
| 132 | + |
| 133 | +csAlphaNum :: ACS.AnsiCharSet |
| 134 | +csAlphaNum = ACS.alphanum |
| 135 | + |
| 136 | +csNotSpace :: ACS.AnsiCharSet |
| 137 | +csNotSpace = ACS.filter (\c -> not (isControl c) && c /= ' ') ACS.full |
| 138 | + |
| 139 | +csNotSpaceOrComma :: ACS.AnsiCharSet |
| 140 | +csNotSpaceOrComma = ACS.filter (/= ',') csNotSpace |
| 141 | + |
| 142 | +------------------------------------------------------------------------------- |
| 143 | +-- Pretty-printing |
| 144 | +------------------------------------------------------------------------------- |
| 145 | + |
| 146 | +-- | |
| 147 | +-- |
| 148 | +-- >>> regexDoc $ REString "True" |
| 149 | +-- True |
| 150 | +-- |
| 151 | +-- >>> regexDoc $ REString "foo" <> REString "bar" |
| 152 | +-- foobar |
| 153 | +-- |
| 154 | +-- >>> regexDoc $ REUnion [REString "False" , REString "True"] |
| 155 | +-- False|True |
| 156 | +-- |
| 157 | +-- >>> regexDoc $ REMunch1 $ RECharSet $ CSAlphaNum <> CSChar '-' |
| 158 | +-- [[:alnum:]-]+ |
| 159 | +-- |
| 160 | +-- >>> regexDoc $ REMunch1 $ REUnion [ RECharSet $ CSAlphaNum <> CSChar '-', REString "weird"] |
| 161 | +-- ([[:alnum:]-]|weird)+ |
| 162 | +-- |
| 163 | +-- >>> regexDoc $ RENamed "something" |
| 164 | +-- {something} |
| 165 | +-- |
| 166 | +regexDoc :: Regex RTerm -> PP.Doc |
| 167 | +regexDoc = go termDoc 0 where |
| 168 | + go :: (a -> PP.Doc) -> Int -> Regex a -> PP.Doc |
| 169 | + go f d (REAppend rs) = parensIf (d > 2) $ PP.hcat (map (go f 2) rs) |
| 170 | + go f d (REUnion rs) = parensIf (d > 1) $ PP.hcat (PP.punctuate (PP.text "\\mid") (map (go f 1) rs)) |
| 171 | + |
| 172 | + go f d (REMunch sep r) = parensIf (d > 3) $ |
| 173 | + PP.text "{" <<>> go f 3 r <<>> PP.text "}^\\ast_{" <<>> go f 0 sep <<>> PP.text "}" |
| 174 | + go f d (REMunch1 sep r) = parensIf (d > 3) $ |
| 175 | + PP.text "{" <<>> go f 3 r <<>> PP.text "}^+_{" <<>> go f 0 sep <<>> PP.text "}" |
| 176 | + go f d (REOpt r) = parensIf (d > 3) $ |
| 177 | + PP.text "{" <<>> go f 3 r <<>> PP.text "}^?" |
| 178 | + |
| 179 | + go _ _ (REString s) = PP.text "\\mathop{\\mathord{\"}\\mathtt{" <<>> PP.hcat (map charDoc s) <<>> PP.text "}\\mathord{\"}}" |
| 180 | + go _ _ (RECharSet cs) = charsetDoc cs |
| 181 | + |
| 182 | + go _ _ RESpaces = "\\circ" |
| 183 | + go _ _ RESpaces1 = "\\bullet" |
| 184 | + |
| 185 | + go f _ (REVar a) = f a |
| 186 | + go f d (RELet n _ r) = go (maybe (terminalDoc n) f) d r |
| 187 | + go _ _ (RERec n _) = terminalDoc n |
| 188 | + |
| 189 | + go _ _ RETodo = PP.text "\\mathsf{\\color{red}{TODO}}" |
| 190 | + |
| 191 | + parensIf :: Bool -> PP.Doc -> PP.Doc |
| 192 | + parensIf True d = PP.text "\\left(" <<>> d <<>> PP.text "\\right)" |
| 193 | + parensIf False d = d |
| 194 | + |
| 195 | +termDoc :: RTerm -> PP.Doc |
| 196 | +termDoc RHaskellString = terminalDoc "hs-string" |
| 197 | +termDoc RUnqualName = terminalDoc "unqual-name" |
| 198 | + |
| 199 | +terminalDoc :: String -> PP.Doc |
| 200 | +terminalDoc s = PP.text "\\mathop{\\mathit{" <<>> PP.hcat (map charDoc s) <<>> PP.text "}}" |
| 201 | + |
| 202 | +charDoc :: Char -> PP.Doc |
| 203 | +charDoc ' ' = PP.text " " |
| 204 | +charDoc '{' = PP.text "\\{" |
| 205 | +charDoc '}' = PP.text "\\}" |
| 206 | +charDoc c |
| 207 | + | isAlphaNum c = PP.char c |
| 208 | + | otherwise = PP.text ("\\text{" ++ c : "}") |
| 209 | + |
| 210 | +inquotes :: PP.Doc -> PP.Doc |
| 211 | +inquotes d = "\\mathop{\\mathord{\"}" <<>> d <<>> "\\mathord{\"}}" |
| 212 | + |
| 213 | +-- | |
| 214 | +-- |
| 215 | +-- >>> traverse_ (print . charsetDoc) [CSDigit, CSAlphaNum, CSNotSpaceOrComma, CSNotSpace, CSChar 'a'] |
| 216 | +-- [:digit:] |
| 217 | +-- [:alnum:] |
| 218 | +-- [^ ,] |
| 219 | +-- [^ ] |
| 220 | +-- a |
| 221 | +-- |
| 222 | +-- >>> print $ charsetDoc $ CSAlphaNum <> CSChar '-' |
| 223 | +-- [[:alnum:]-] |
| 224 | +-- |
| 225 | +charsetDoc :: ACS.AnsiCharSet -> PP.Doc |
| 226 | +charsetDoc acs |
| 227 | + | acs == csAlphaNum = terminalDoc "alpha-num" |
| 228 | + | acs == csNotSpace = terminalDoc "not-space" |
| 229 | + | acs == csNotSpaceOrComma = terminalDoc "not-space-nor-comma" |
| 230 | +charsetDoc acs = case ACS.ranges acs of |
| 231 | + [] -> PP.brackets PP.empty |
| 232 | + [(x,y)] | x == y -> inquotes (charDoc x) |
| 233 | + rs -> PP.brackets $ PP.hcat $ map rangeDoc rs |
| 234 | + where |
| 235 | + rangeDoc :: (Char, Char) -> PP.Doc |
| 236 | + rangeDoc (x, y) | x == y = inquotes (charDoc x) |
| 237 | + | otherwise = inquotes (charDoc x) <<>> PP.char '-' <<>> inquotes (charDoc y) |
| 238 | + |
| 239 | +------------------------------------------------------------------------------- |
| 240 | +-- Generation |
| 241 | +------------------------------------------------------------------------------- |
| 242 | + |
| 243 | +-- | Generate an example string. |
| 244 | +generate |
| 245 | + :: Monad m |
| 246 | + => (Int -> Int -> m Int) -- ^ generate integer in range |
| 247 | + -> (a -> m String) -- ^ generate variables |
| 248 | + -> Regex a -- ^ regex |
| 249 | + -> m String -- ^ an example string. |
| 250 | +generate rnd f (REAppend rs) = do |
| 251 | + xs <- traverse (generate rnd f) rs |
| 252 | + return (concat xs) |
| 253 | +generate rnd f (REUnion rs) = do |
| 254 | + n <- rnd 0 (length rs - 1) |
| 255 | + generate rnd f (rs !! n) |
| 256 | +generate rnd f (REMunch sep r) = do |
| 257 | + n <- rnd 0 5 |
| 258 | + xs <- traverse (generate rnd f) (intersperse sep (replicate n r)) |
| 259 | + return (concat xs) |
| 260 | +generate rnd f (REMunch1 sep r) = do |
| 261 | + n <- rnd 1 5 |
| 262 | + xs <- traverse (generate rnd f) (intersperse sep (replicate n r)) |
| 263 | + return (concat xs) |
| 264 | +generate rnd f (REOpt r) = do |
| 265 | + n <- rnd 0 2 |
| 266 | + case n of |
| 267 | + 0 -> return "" |
| 268 | + _ -> generate rnd f r |
| 269 | +generate _ _ (REString str) = return str |
| 270 | +generate rnd _ (RECharSet cs) = return <$> generateCS rnd cs |
| 271 | +generate rnd _ RESpaces1 = (\n -> replicate n ' ') <$> rnd 1 3 |
| 272 | +generate rnd _ RESpaces = (\n -> replicate n ' ') <$> rnd 0 3 |
| 273 | + |
| 274 | +generate _ f (REVar x) = f x |
| 275 | +generate _ _ (RELet _ _ _) = error "generate let" |
| 276 | +generate _ _ (RERec _ _) = error "generate rec" |
| 277 | +generate _ _ RETodo = return "TODO" |
| 278 | + |
| 279 | +generateCS |
| 280 | + :: Monad m |
| 281 | + => (Int -> Int -> m Int) -- ^ generate integer in range |
| 282 | + -> ACS.AnsiCharSet |
| 283 | + -> m Char |
| 284 | +generateCS rnd asc = do |
| 285 | + n <- rnd 0 (ACS.size asc - 1) |
| 286 | + return (ACS.toList asc !! n) |
0 commit comments