-
Notifications
You must be signed in to change notification settings - Fork 2
/
pgen_grammar.txt
76 lines (53 loc) · 2.31 KB
/
pgen_grammar.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Note - This is written in normal PEG fashion, for my own understanding.
# This is not an example of the usage of pgen, it is different.
# Operators:
# / - Try to match the left side, then try to match the right side.
# & - Try to parse, match but rewind.
# ! - Try to parse, match if doesn't match.
# ? - Optionally match. Failure to match does not cause the rule to fail.
# * - Match zero or more. Returns a list of matches.
# + - Match one or more. Returns a list of matches.
# () - Matches if all expressions within match.
# {} - Code to insert into the parser. Must return an ASTNode* or NULL to check for match.
# : - Capture the info from a match inside the node for the current rule.
# | - Register an error using the string or expression on the right, and exit parsing.
# Notes:
# Instead of using an unbalancing { or } inside a codeexpr, use the macros LB or RB.
# Instead of using "{" or "}" use the macros LBSTR or RBSTR.
##########
# PARSER #
##########
# Rules
GrammarFile <- (_ (TokenDef / Directive / Definition))* _ EOF
_ <- ([ \t\r\n\v]+ / "/*" (!"*/" .)* "*/" / "//" [^\n]*)*
Directive <- '%' _ LowerIdent [^\n]*[\n]
Definition <- LowerIdent _ Variables? _ '<-' _ SlashExpr
Variables <- '<' _ Variable (_ ',' _ Variable)* _ '>'
Variable <- (!('>' / ',') .)*
SlashExpr <- ModExprList (_ '/' ModExprList)*
ModExprList <- (_ ModExpr)*
ModExpr <- (LowerIdent _ ':' _)?
(('&' / '!') _)*
BaseExpr
(_ ('?' / '*' / '+'))*
(_ '|' _ (ErrorString / BaseExpr))?
BaseExpr <- ('prev' / 'next')
/ UpperIdent
/ LowerIdent _ !'<'
/ CodeExpr
/ '(' _ SlashExpr _ ')'
CodeExpr <- '{' _ (&'{' CodeExpr / !'}' .)* _ '}'
TokenDef <- UpperIdent _ ':' _ (LitDef / SMDef) _ (';' / [\n])
LitDef <- '"' (!'"' Char)* '"'
SMDef <- NumSet _ '{' ( _ Pair _ '->' _ Num ' '* (';' / [\n]))+ _'}'
NumSet <- Num /
'(' _ Num _ "-" _ Num _ ')' /
'(' _ NumSet _ ("," _ NumSet _)* ')'
CharSet <- "'" Char "'" /
'[' '^'? (!']' Char ('-' !']' Char)?)* ']'
Pair <- '(' _ NumSet _ ',' _ CharSet _ ')'
UpperIdent <- [_A-Z]+
LowerIdent <- [_a-z]+
ErrorString <- '"' (!'"' .)* '"'
Num <- '-'?[0-9]+ # Convertable to int
Char <- '\\'? .