Closed
Description
Looking at the C++
grammar code I noticed that IdentCont
is not grouped () and doing so the performance seem to be affected.
As it is we get this numbers from the playground when parsing itself:
duration: 0.0457s (45700µs)
id total % success fail definition
30604 14790 15814 Total counters
48.33 51.67 % success/fail
...
But if we group IdentCont
as IdentCont <- <(IdentStart (IdentRest)*)>
then:
duration: 0.0313s (31300µs)
id total % success fail definition
19182 5611 13571 Total counters
29.25 70.75 % success/fail
...
Here is the cpp-peglib
grammar mechanically extracted from peglib.h
:
#// Setup PEG syntax parser
Grammar <- (Spacing (Definition)+ EndOfFile)
Definition <-
((Ignore IdentCont Parameters LEFTARROW
Expression (Instruction)?) /
(Ignore Identifier LEFTARROW Expression
(Instruction)?))
Expression <- (Sequence ((SLASH Sequence))*)
Sequence <- ((CUT /Prefix))*
Prefix <- (((AND /NOT))? SuffixWithLabel)
SuffixWithLabel <-
(Suffix ((LABEL Identifier))?)
Suffix <- (Primary (Loop)?)
Loop <- (QUESTION /STAR /PLUS /Repetition)
Primary <-
((Ignore IdentCont Arguments
!(LEFTARROW)) /
(Ignore Identifier
!(((Parameters)? LEFTARROW))) /
(OPEN Expression CLOSE) /
(BeginTok Expression EndTok) /
(BeginCapScope Expression EndCapScope) /
(BeginCap Expression EndCap) /BackRef /
LiteralI /Dictionary /Literal /NegatedClass /
Class /DOT)
Identifier <- (IdentCont Spacing)
IdentCont <- <(IdentStart (IdentRest)*)>
IdentStart <- (!("↑") !("⇑")
([a-zA-Z_%] / [0x0080-0xFFFF]))
IdentRest <- (IdentStart / [0-9])
Dictionary <- (LiteralD ((PIPE LiteralD))+)
lit_ope <- ((['] <(((!([']) Char))*)>
['] Spacing) /
(["] <(((!(["]) Char))*)>
["] Spacing))
Literal <- lit_ope
LiteralD <- lit_ope
LiteralI <-
((['] <(((!([']) Char))*)> "'i"
Spacing) /
(["] <(((!(["]) Char))*)> "\"i"
Spacing))
#// NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
Class <- ('[' !('^')
<(((!(']') Range))+)> ']'
Spacing)
NegatedClass <- ("[^"
<(((!(']') Range))+)> ']'
Spacing)
#// NOTE: This is different from The original Brian Ford's paper, and this
#// modification allows us to specify `[+-]` as a valid char class.
Range <- ((Char '-' !(']') Char) /Char)
Char <-
(('\\' [abefnrtv'"\[\]\\^]) /
('\\' [0-3] [0-7] [0-7]) /
('\\' [0-7] ([0-7])?) /
("\\x" [0-9a-fA-F] ([0-9a-fA-F])?) /
("\\u"
(((('0' [0-9a-fA-F]) / "10")
([0-9a-fA-F]{4,4})) /
([0-9a-fA-F]{4,5}))) /
(!('\\') .))
Repetition <-
(BeginBlacket RepetitionRange EndBlacket)
RepetitionRange <- ((Number COMMA Number) /
(Number COMMA) /Number /
(COMMA Number))
Number <- (([0-9])+ Spacing)
LEFTARROW <- (("<-" / "←") Spacing)
~SLASH <- ('/' Spacing)
~PIPE <- ('|' Spacing)
AND <- ('&' Spacing)
NOT <- ('!' Spacing)
QUESTION <- ('?' Spacing)
STAR <- ('*' Spacing)
PLUS <- ('+' Spacing)
~OPEN <- ('(' Spacing)
~CLOSE <- (')' Spacing)
DOT <- ('.' Spacing)
CUT <- ("↑" Spacing)
~LABEL <- (('^' / "⇑") Spacing)
~Spacing <- ((Space /Comment))*
Comment <-
('#' ((!(EndOfLine) .))* EndOfLine)
Space <- (' ' / '\t' /EndOfLine)
EndOfLine <- ("\r\n" / '\n' / '\r')
EndOfFile <- !(.)
~BeginTok <- ('<' Spacing)
~EndTok <- ('>' Spacing)
~BeginCapScope <- ('$' '(' Spacing)
~EndCapScope <- (')' Spacing)
BeginCap <- ('$' <(IdentCont)> '<' Spacing)
~EndCap <- ('>' Spacing)
BackRef <- ('$' <(IdentCont)> Spacing)
IGNORE <- '~'
Ignore <- (IGNORE)?
Parameters <- (OPEN Identifier
((COMMA Identifier))* CLOSE)
Arguments <- (OPEN Expression
((COMMA Expression))* CLOSE)
~COMMA <- (',' Spacing)
#// Instruction grammars
Instruction <-
(BeginBlacket
((InstructionItem ((InstructionItemSeparator
InstructionItem))*))?
EndBlacket)
InstructionItem <-
(PrecedenceClimbing /ErrorMessage /NoAstOpt)
~InstructionItemSeparator <- (';' Spacing)
~SpacesZom <- (Space)*
~SpacesOom <- (Space)+
~BeginBlacket <- ('{' Spacing)
~EndBlacket <- ('}' Spacing)
#// PrecedenceClimbing instruction
PrecedenceClimbing <-
("precedence" SpacesOom PrecedenceInfo
((SpacesOom PrecedenceInfo))* SpacesZom)
PrecedenceInfo <-
(PrecedenceAssoc
((&(SpacesOom) PrecedenceOpe))+)
PrecedenceOpe <-
(([']
<(((!((Space / ['])) Char))*)>
[']) /
(["]
<(((!((Space / ["])) Char))*)>
["]) /
<(((!((PrecedenceAssoc /Space / '}'))
.))+)>)
PrecedenceAssoc <- [LR]
#// Error message instruction
ErrorMessage <-
("message" SpacesOom LiteralD SpacesZom)
#// No Ast node optimazation instruction
NoAstOpt <- ("no_ast_opt" SpacesZom)
Here is the relevant C++
grammar code:
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
Metadata
Metadata
Assignees
Labels
No labels