Skip to content

Commit

Permalink
A couple of minor optimisations for the generated lexer.
Browse files Browse the repository at this point in the history
  • Loading branch information
alecthomas committed Oct 29, 2022
1 parent 088de44 commit 4fbbf51
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 41 deletions.
68 changes: 52 additions & 16 deletions cmd/participle/gen_lexer_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"regexp/syntax"
"sort"
"text/template"
"unicode"
"unicode/utf8"

"github.com/alecthomas/participle/v2/lexer"
Expand Down Expand Up @@ -224,7 +225,11 @@ func generateRegexMatch(w io.Writer, lexerName, name, pattern string) error {
case syntax.OpLiteral: // matches Runes sequence
n := utf8.RuneCountInString(string(re.Rune))
if re.Flags&syntax.FoldCase != 0 {
fmt.Fprintf(w, "if p+%d <= len(s) && strings.EqualFold(s[p:p+%d], %q) { return p+%d }\n", n, n, string(re.Rune), n)
if n == 1 && !unicode.IsLetter(re.Rune[0]) {
fmt.Fprintf(w, "if p < len(s) && s[p] == %q { return p+1 }\n", re.Rune[0])
} else {
fmt.Fprintf(w, "if p+%d <= len(s) && strings.EqualFold(s[p:p+%d], %q) { return p+%d }\n", n, n, string(re.Rune), n)
}
} else {
if n == 1 {
fmt.Fprintf(w, "if p < len(s) && s[p] == %q { return p+1 }\n", re.Rune[0])
Expand All @@ -237,12 +242,15 @@ func generateRegexMatch(w io.Writer, lexerName, name, pattern string) error {
case syntax.OpCharClass: // matches Runes interpreted as range pair list
fmt.Fprintf(w, "if len(s) <= p { return -1 }\n")
needDecode := false
asciiSet := true
for i := 0; i < len(re.Rune); i += 2 {
l, r := re.Rune[i], re.Rune[i+1]
ln, rn := utf8.RuneLen(l), utf8.RuneLen(r)
if ln != 1 || rn != 1 {
needDecode = true
break
}
if l > 0x7f || r > 0x7f || l != r {
asciiSet = false
}
}
if needDecode {
Expand All @@ -251,25 +259,44 @@ func generateRegexMatch(w io.Writer, lexerName, name, pattern string) error {
} else {
fmt.Fprintf(w, "rn := s[p]\n")
}
fmt.Fprintf(w, "switch {\n")
for i := 0; i < len(re.Rune); i += 2 {
l, r := re.Rune[i], re.Rune[i+1]
ln, rn := utf8.RuneLen(l), utf8.RuneLen(r)
if ln == 1 && rn == 1 {
if l == r {
fmt.Fprintf(w, "case rn == %q: return p+1\n", l)
} else {
fmt.Fprintf(w, "case rn >= %q && rn <= %q: return p+1\n", l, r)
}
if asciiSet {
if len(re.Rune) == 2 {
fmt.Fprintf(w, "if rn == %q { return p+1 }\n", re.Rune[0])
} else if len(re.Rune) == 4 {
fmt.Fprintf(w, "if rn == %q || rn == %q { return p+1 }\n", re.Rune[0], re.Rune[2])
} else {
if l == r {
fmt.Fprintf(w, "case rn == %q: return p+n\n", l)
fmt.Fprintf(w, "switch rn {\n")
fmt.Fprintf(w, "case ")
for i := 0; i < len(re.Rune); i += 2 {
if i != 0 {
fmt.Fprintf(w, ",")
}
fmt.Fprintf(w, "%q", re.Rune[i])
}
fmt.Fprintf(w, ": return p+1\n")
fmt.Fprintf(w, "}\n")
}
} else {
fmt.Fprintf(w, "switch {\n")
for i := 0; i < len(re.Rune); i += 2 {
l, r := re.Rune[i], re.Rune[i+1]
ln, rn := utf8.RuneLen(l), utf8.RuneLen(r)
if ln == 1 && rn == 1 {
if l == r {
fmt.Fprintf(w, "case rn == %q: return p+1\n", l)
} else {
fmt.Fprintf(w, "case rn >= %q && rn <= %q: return p+1\n", l, r)
}
} else {
fmt.Fprintf(w, "case rn >= %q && rn <= %q: return p+n\n", l, r)
if l == r {
fmt.Fprintf(w, "case rn == %q: return p+n\n", l)
} else {
fmt.Fprintf(w, "case rn >= %q && rn <= %q: return p+n\n", l, r)
}
}
}
fmt.Fprintf(w, "}\n")
}
fmt.Fprintf(w, "}\n")
fmt.Fprintf(w, "return -1\n")

case syntax.OpAnyCharNotNL: // matches any character except newline
Expand Down Expand Up @@ -383,3 +410,12 @@ func flatten(re *syntax.Regexp) (out []*syntax.Regexp) {
out = append(out, re)
return
}

func isSimpleRuneRange(runes []rune) bool {
for i := 0; i < len(runes); i += 2 {
if runes[i] != runes[i+1] || utf8.RuneLen(runes[i]) != 1 {
return false
}
}
return true
}
50 changes: 25 additions & 25 deletions lexer/internal/basiclexer.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4fbbf51

Please sign in to comment.