From fcf5f6cb72978812ac36085183b5c52701c069cb Mon Sep 17 00:00:00 2001 From: Josh Humphries Date: Thu, 2 Mar 2023 19:40:35 -0500 Subject: [PATCH] encoding/prototext: allow whitespace and comments between minus sign and number in negative numeric literal The text format specification[1] indicates that whitespace and comments may appear after a minus sign and before the subsequent numeric component in negative number literals. But the Go implementation does not allow this. This brings the Go implementation info conformance with this aspect. Fixes golang/protobuf#1526 [1] https://protobuf.dev/reference/protobuf/textformat-spec/#parsing Change-Id: I3996c89ee9d37cf2b7502fc6736d6e2ed6dbcf43 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/473015 Reviewed-by: Lasse Folger Reviewed-by: Damien Neil --- encoding/prototext/decode_test.go | 7 ++-- internal/encoding/text/decode.go | 5 +-- internal/encoding/text/decode_number.go | 43 ++++++++++++++++++------- internal/encoding/text/decode_test.go | 8 +++++ 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/encoding/prototext/decode_test.go b/encoding/prototext/decode_test.go index eed64dbf1..3597d9b6f 100644 --- a/encoding/prototext/decode_test.go +++ b/encoding/prototext/decode_test.go @@ -127,9 +127,9 @@ opt_int64: 3735928559 opt_uint32: 0xff opt_uint64: 0xdeadbeef opt_sint32: -1001 -opt_sint64: -0xffff +opt_sint64: - 0xffff opt_fixed64: 64 -opt_sfixed32: -32 +opt_sfixed32: - 32 opt_float: 1.234 opt_double: 1.23e+100 opt_bytes: "\xe8\xb0\xb7\xe6\xad\x8c" @@ -164,7 +164,8 @@ s_int64: 3735928559 s_uint32: 0xff s_uint64: 0xdeadbeef s_sint32: -1001 -s_sint64: -0xffff +s_sint64: - # + 0xffff s_fixed64: 64 s_sfixed32: -32 s_float: 1.234 diff --git a/internal/encoding/text/decode.go b/internal/encoding/text/decode.go index 427c62d03..87853e786 100644 --- a/internal/encoding/text/decode.go +++ b/internal/encoding/text/decode.go @@ -412,12 +412,13 @@ func (d *Decoder) parseFieldName() (tok Token, err error) { // Field number. Identify if input is a valid number that is not negative // and is decimal integer within 32-bit range. if num := parseNumber(d.in); num.size > 0 { + str := num.string(d.in) if !num.neg && num.kind == numDec { - if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil { + if _, err := strconv.ParseInt(str, 10, 32); err == nil { return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil } } - return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size]) + return Token{}, d.newSyntaxError("invalid field number: %s", str) } return Token{}, d.newSyntaxError("invalid field name: %s", errId(d.in)) diff --git a/internal/encoding/text/decode_number.go b/internal/encoding/text/decode_number.go index 81a5d8c86..3dc8e9787 100644 --- a/internal/encoding/text/decode_number.go +++ b/internal/encoding/text/decode_number.go @@ -15,17 +15,12 @@ func (d *Decoder) parseNumberValue() (Token, bool) { if num.neg { numAttrs |= isNegative } - strSize := num.size - last := num.size - 1 - if num.kind == numFloat && (d.in[last] == 'f' || d.in[last] == 'F') { - strSize = last - } tok := Token{ kind: Scalar, attrs: numberValue, pos: len(d.orig) - len(d.in), raw: d.in[:num.size], - str: string(d.in[:strSize]), + str: num.string(d.in), numAttrs: numAttrs, } d.consume(num.size) @@ -46,6 +41,27 @@ type number struct { kind uint8 neg bool size int + // if neg, this is the length of whitespace and comments between + // the minus sign and the rest fo the number literal + sep int +} + +func (num number) string(data []byte) string { + strSize := num.size + last := num.size - 1 + if num.kind == numFloat && (data[last] == 'f' || data[last] == 'F') { + strSize = last + } + if num.neg && num.sep > 0 { + // strip whitespace/comments between negative sign and the rest + strLen := strSize - num.sep + str := make([]byte, strLen) + str[0] = data[0] + copy(str[1:], data[num.sep+1:strSize]) + return string(str) + } + return string(data[:strSize]) + } // parseNumber constructs a number object from given input. It allows for the @@ -67,6 +83,7 @@ func parseNumber(input []byte) number { } // Optional - + var sep int if s[0] == '-' { neg = true s = s[1:] @@ -74,12 +91,14 @@ func parseNumber(input []byte) number { if len(s) == 0 { return number{} } + // Consume any whitespace or comments between the + // negative sign and the rest of the number + lenBefore := len(s) + s = consume(s, 0) + sep = lenBefore - len(s) + size += sep } - // C++ allows for whitespace and comments in between the negative sign and - // the rest of the number. This logic currently does not but is consistent - // with v1. - switch { case s[0] == '0': if len(s) > 1 { @@ -116,7 +135,7 @@ func parseNumber(input []byte) number { if len(s) > 0 && !isDelim(s[0]) { return number{} } - return number{kind: kind, neg: neg, size: size} + return number{kind: kind, neg: neg, size: size, sep: sep} } } s = s[1:] @@ -188,5 +207,5 @@ func parseNumber(input []byte) number { return number{} } - return number{kind: kind, neg: neg, size: size} + return number{kind: kind, neg: neg, size: size, sep: sep} } diff --git a/internal/encoding/text/decode_test.go b/internal/encoding/text/decode_test.go index a7dbad78e..528b1bc71 100644 --- a/internal/encoding/text/decode_test.go +++ b/internal/encoding/text/decode_test.go @@ -555,6 +555,14 @@ func TestDecoder(t *testing.T) { in: "-123", want: []R{{E: "invalid field number: -123"}}, }, + { + in: "- \t 123.321e6", + want: []R{{E: "invalid field number: -123.321e6"}}, + }, + { + in: "- # negative\n 123", + want: []R{{E: "invalid field number: -123"}}, + }, { // Field number > math.MaxInt32. in: "2147483648:",