From 8fc3b60b272108bac7bca1ebda2e47d357c012f9 Mon Sep 17 00:00:00 2001 From: Doug Clark Date: Mon, 2 May 2022 13:18:05 -0500 Subject: [PATCH] fixes #49 when extracting literals from pattern copy the bytes to prevent concats from smashing the pattern later --- regexp_test.go | 21 +++++++++++++++++++++ syntax/parser.go | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/regexp_test.go b/regexp_test.go index 1526daa..111fa74 100644 --- a/regexp_test.go +++ b/regexp_test.go @@ -1220,3 +1220,24 @@ func TestFuzzBytes_Match(t *testing.T) { }) } } + +func TestConcatAccidentalPatternCharge(t *testing.T) { + // originally this pattern would parse incorrectly + // specifically the closing group would concat the string literals + // together but the raw rune slice would blow over the original pattern + // so the final bit of pattern parsing would be wrong + // fixed in #49 + r, err := Compile(`(?<=1234\.\*56).*(?=890)`, 0) + + if err != nil { + panic(err) + } + + m, err := r.FindStringMatch(`1234.*567890`) + if err != nil { + panic(err) + } + if m == nil { + t.Fatal("Expected non-nil, got nil") + } +} diff --git a/syntax/parser.go b/syntax/parser.go index f7f0448..5b7eafa 100644 --- a/syntax/parser.go +++ b/syntax/parser.go @@ -2067,7 +2067,8 @@ func (p *parser) addToConcatenate(pos, cch int, isReplacement bool) { } if cch > 1 { - str := p.pattern[pos : pos+cch] + str := make([]rune, cch) + copy(str, p.pattern[pos:pos+cch]) if p.useOptionI() && !isReplacement { // We do the ToLower character by character for consistency. With surrogate chars, doing