Skip to content

Commit f96ba1b

Browse files
unquote should check for invalid UTF-8 code points (#3595)
Quoted tokens can contain both UTF-8 byte and code point literals that should be interpreted when quoted. However, we need to check that the interpreted literals are valid UTF-8 code points or not. This now happens in unquote. Signed-off-by: George Robinson <george.robinson@grafana.com>
1 parent ce6efba commit f96ba1b

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

matchers/parse/parse_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,14 @@ func TestMatchers(t *testing.T) {
107107
name: "equals unicode emoji in quotes",
108108
input: "{\"foo\"=\"🙂\"}",
109109
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
110+
}, {
111+
name: "equals unicode emoji as bytes in quotes",
112+
input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}",
113+
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
114+
}, {
115+
name: "equals unicode emoji as code points in quotes",
116+
input: "{\"foo\"=\"\\U0001f642\"}",
117+
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
110118
}, {
111119
name: "equals unicode sentence in quotes",
112120
input: "{\"foo\"=\"🙂bar\"}",
@@ -199,6 +207,10 @@ func TestMatchers(t *testing.T) {
199207
name: "no unquoted escape sequences",
200208
input: "{foo=bar\\n}",
201209
error: "8:9: \\: invalid input: expected a comma or close brace",
210+
}, {
211+
name: "invalid unicode",
212+
input: "{\"foo\"=\"\\xf0\\x9f\"}",
213+
error: "7:17: \"\\xf0\\x9f\": invalid input",
202214
}}
203215

204216
for _, test := range tests {
@@ -244,6 +256,14 @@ func TestMatcher(t *testing.T) {
244256
name: "equals unicode emoji",
245257
input: "{foo=🙂}",
246258
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
259+
}, {
260+
name: "equals unicode emoji as bytes in quotes",
261+
input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}",
262+
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
263+
}, {
264+
name: "equals unicode emoji as code points in quotes",
265+
input: "{\"foo\"=\"\\U0001f642\"}",
266+
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
247267
}, {
248268
name: "equals unicode sentence",
249269
input: "{foo=🙂bar}",
@@ -331,6 +351,10 @@ func TestMatcher(t *testing.T) {
331351
name: "two or more returns error",
332352
input: "foo=bar,bar=baz",
333353
error: "expected 1 matcher, found 2",
354+
}, {
355+
name: "invalid unicode",
356+
input: "foo=\"\\xf0\\x9f\"",
357+
error: "4:14: \"\\xf0\\x9f\": invalid input",
334358
}}
335359

336360
for _, test := range tests {

matchers/parse/token.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
package parse
1515

1616
import (
17+
"errors"
1718
"fmt"
1819
"strconv"
20+
"unicode/utf8"
1921
)
2022

2123
type tokenKind int
@@ -82,7 +84,14 @@ func (t token) isOneOf(kinds ...tokenKind) bool {
8284
// unquote the value in token. If unquoted returns it unmodified.
8385
func (t token) unquote() (string, error) {
8486
if t.kind == tokenQuoted {
85-
return strconv.Unquote(t.value)
87+
unquoted, err := strconv.Unquote(t.value)
88+
if err != nil {
89+
return "", err
90+
}
91+
if !utf8.ValidString(unquoted) {
92+
return "", errors.New("quoted string contains invalid UTF-8 code points")
93+
}
94+
return unquoted, nil
8695
}
8796
return t.value, nil
8897
}

0 commit comments

Comments
 (0)