@@ -79,16 +79,31 @@ notNewlineStart c = c /= '\n' && c /= '\r' ; {-# INLINE notNewlineStart #-}
79
79
80
80
-- === Char by char checking === --
81
81
82
- isDecDigitChar , isOctDigitChar , isBinDigitChar , isHexDigitChar , isIndentBodyChar :: Char -> Bool
83
- isDecDigitChar c = (c >= ' 0' && c <= ' 9' ) ; {-# INLINE isDecDigitChar #-}
84
- isOctDigitChar c = (c >= ' 0' && c <= ' 7' ) ; {-# INLINE isOctDigitChar #-}
85
- isBinDigitChar c = (c == ' 0' || c == ' 1' ) ; {-# INLINE isBinDigitChar #-}
86
- isHexDigitChar c = isDecDigitChar c || (c >= ' a' && c <= ' f' ) || (c >= ' A' && c <= ' F' ) ; {-# INLINE isHexDigitChar #-}
87
- isIndentBodyChar c = (c >= ' a' && c <= ' z' ) || (c >= ' A' && c <= ' Z' ) || isDecDigitChar c || c == ' _' ; {-# INLINE isIndentBodyChar #-}
82
+ isDecDigitChar , isOctDigitChar , isBinDigitChar , isHexDigitChar , isIdentBodyChar , isVarHead , isConsHead :: Char -> Bool
83
+ isDecDigitChar c = (c >= ' 0' && c <= ' 9' ) ; {-# INLINE isDecDigitChar #-}
84
+ isOctDigitChar c = (c >= ' 0' && c <= ' 7' ) ; {-# INLINE isOctDigitChar #-}
85
+ isBinDigitChar c = (c == ' 0' || c == ' 1' ) ; {-# INLINE isBinDigitChar #-}
86
+ isHexDigitChar c = isDecDigitChar c || (c >= ' a' && c <= ' f' ) || (c >= ' A' && c <= ' F' ) ; {-# INLINE isHexDigitChar #-}
87
+ isIdentBodyChar c = Char. isAlphaNum c || c == ' _' ; {-# INLINE isIdentBodyChar #-}
88
+ isVarHead c = Char. isLower c || c == ' _' ; {-# INLINE isVarHead #-}
89
+ isConsHead = Char. isUpper ; {-# INLINE isConsHead #-}
88
90
89
91
opChars :: [Char ]
90
92
opChars = " !$%&*+-/<>?^~\\ " ; {-# INLINE opChars #-}
91
93
94
+ -- === Names === --
95
+
96
+ lexVariable :: Lexer
97
+ lexVariable = checkSpecialVar
98
+ <$> (takeWhile isIdentBodyChar
99
+ <**> (option id $ flip Text32. snoc <$> (token ' ?' <|> token ' !' ))
100
+ <**> (option id $ flip (<>) <$> takeMany1 ' \' ' ))
101
+ {-# INLINE lexVariable #-}
102
+
103
+ lexConstructor :: Lexer
104
+ lexConstructor = Cons <$> takeWhile isIdentBodyChar
105
+ {-# INLINE lexConstructor #-}
106
+
92
107
93
108
-- === Numbers === --
94
109
@@ -299,8 +314,8 @@ symmap = Vector.generate symmapSize $ \i -> let c = Char.chr i in if
299
314
| c == markerBeginChar -> lexMarker
300
315
301
316
-- Identifiers & Keywords
302
- | varHead c -> checkSpecialVar <$> varBody
303
- | consHead c -> Cons <$> consBody
317
+ | isVarHead c -> lexVariable
318
+ | isConsHead c -> lexConstructor
304
319
305
320
-- Operators
306
321
| c == ' @' -> TypeApp <$ dropToken
@@ -316,23 +331,15 @@ symmap = Vector.generate symmapSize $ \i -> let c = Char.chr i in if
316
331
| c == rawStrQuote -> rawStr
317
332
| c == fmtStrQuote -> fmtStr
318
333
| c == natStrQuote -> natStr
319
- | decHead c -> lexNumber
334
+ | isDecDigitChar c -> lexNumber
320
335
321
336
-- Meta
322
337
| c == ' #' -> handleHash =<< takeMany ' #'
323
338
324
339
-- Utils
325
340
| otherwise -> unknownCharSym c
326
341
327
- where between a l r = a >= l && a <= r
328
- decHead c = between c ' 0' ' 9'
329
- varHead c = between c ' a' ' z' || c == ' _'
330
- consHead c = between c ' A' ' Z'
331
- consBody = indentBaseBody
332
- varBody = indentBaseBody <**> (option id $ flip Text32. snoc <$> (token ' ?' <|> token ' !' ))
333
- <**> (option id $ flip (<>) <$> takeMany1 ' \' ' )
334
- indentBaseBody = takeWhile isIndentBodyChar
335
- handleColons = handleReps [BlockStart , Typed ]
342
+ where handleColons = handleReps [BlockStart , Typed ]
336
343
handleDots = handleReps [Accessor , Range , Anything ]
337
344
handleEqs = handleReps [Assignment , Operator " ==" ]
338
345
handleHash = handleRepsM [pure Disable , lexComment, lexConfig]
@@ -379,8 +386,14 @@ topEntryPoint :: Lexer
379
386
topEntryPoint = peekToken >>= lexSymChar ; {-# INLINE topEntryPoint #-}
380
387
381
388
lexSymChar :: Char -> Lexer
382
- lexSymChar c = if chord < symmapSize then Vector. unsafeIndex symmap chord else unknownCharSym c
383
- where chord = Char. ord c
389
+ lexSymChar c
390
+ -- fetch lexers for ASCII from precomputed cache
391
+ | chord < symmapSize = Vector. unsafeIndex symmap chord
392
+ -- create lexers for unicode names on the fly
393
+ | isVarHead c = lexVariable
394
+ | isConsHead c = lexConstructor
395
+ | otherwise = unknownCharSym c
396
+ where chord = Char. ord c
384
397
{-# INLINE lexSymChar #-}
385
398
386
399
lexeme :: Symbol -> Parser (Symbol , Int )
0 commit comments