From fcf5f6cb72978812ac36085183b5c52701c069cb Mon Sep 17 00:00:00 2001
From: Josh Humphries <jhumphries@buf.build>
Date: Thu, 2 Mar 2023 19:40:35 -0500
Subject: [PATCH] encoding/prototext: allow whitespace and comments between
 minus sign and number in negative numeric literal

The text format specification[1] indicates that whitespace and comments
may appear after a minus sign and before the subsequent numeric component
in negative number literals. But the Go implementation does not allow
this.

This brings the Go implementation info conformance with this aspect.

Fixes golang/protobuf#1526

[1] https://protobuf.dev/reference/protobuf/textformat-spec/#parsing

Change-Id: I3996c89ee9d37cf2b7502fc6736d6e2ed6dbcf43
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/473015
Reviewed-by: Lasse Folger <lassefolger@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
---
 encoding/prototext/decode_test.go       |  7 ++--
 internal/encoding/text/decode.go        |  5 +--
 internal/encoding/text/decode_number.go | 43 ++++++++++++++++++-------
 internal/encoding/text/decode_test.go   |  8 +++++
 4 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/encoding/prototext/decode_test.go b/encoding/prototext/decode_test.go
index eed64dbf1..3597d9b6f 100644
--- a/encoding/prototext/decode_test.go
+++ b/encoding/prototext/decode_test.go
@@ -127,9 +127,9 @@ opt_int64: 3735928559
 opt_uint32: 0xff
 opt_uint64: 0xdeadbeef
 opt_sint32: -1001
-opt_sint64: -0xffff
+opt_sint64: -   0xffff
 opt_fixed64: 64
-opt_sfixed32: -32
+opt_sfixed32: -		32
 opt_float: 1.234
 opt_double: 1.23e+100
 opt_bytes: "\xe8\xb0\xb7\xe6\xad\x8c"
@@ -164,7 +164,8 @@ s_int64: 3735928559
 s_uint32: 0xff
 s_uint64: 0xdeadbeef
 s_sint32: -1001
-s_sint64: -0xffff
+s_sint64: -  #
+             0xffff
 s_fixed64: 64
 s_sfixed32: -32
 s_float: 1.234
diff --git a/internal/encoding/text/decode.go b/internal/encoding/text/decode.go
index 427c62d03..87853e786 100644
--- a/internal/encoding/text/decode.go
+++ b/internal/encoding/text/decode.go
@@ -412,12 +412,13 @@ func (d *Decoder) parseFieldName() (tok Token, err error) {
 	// Field number. Identify if input is a valid number that is not negative
 	// and is decimal integer within 32-bit range.
 	if num := parseNumber(d.in); num.size > 0 {
+		str := num.string(d.in)
 		if !num.neg && num.kind == numDec {
-			if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil {
+			if _, err := strconv.ParseInt(str, 10, 32); err == nil {
 				return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil
 			}
 		}
-		return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size])
+		return Token{}, d.newSyntaxError("invalid field number: %s", str)
 	}
 
 	return Token{}, d.newSyntaxError("invalid field name: %s", errId(d.in))
diff --git a/internal/encoding/text/decode_number.go b/internal/encoding/text/decode_number.go
index 81a5d8c86..3dc8e9787 100644
--- a/internal/encoding/text/decode_number.go
+++ b/internal/encoding/text/decode_number.go
@@ -15,17 +15,12 @@ func (d *Decoder) parseNumberValue() (Token, bool) {
 	if num.neg {
 		numAttrs |= isNegative
 	}
-	strSize := num.size
-	last := num.size - 1
-	if num.kind == numFloat && (d.in[last] == 'f' || d.in[last] == 'F') {
-		strSize = last
-	}
 	tok := Token{
 		kind:     Scalar,
 		attrs:    numberValue,
 		pos:      len(d.orig) - len(d.in),
 		raw:      d.in[:num.size],
-		str:      string(d.in[:strSize]),
+		str:      num.string(d.in),
 		numAttrs: numAttrs,
 	}
 	d.consume(num.size)
@@ -46,6 +41,27 @@ type number struct {
 	kind uint8
 	neg  bool
 	size int
+	// if neg, this is the length of whitespace and comments between
+	// the minus sign and the rest fo the number literal
+	sep int
+}
+
+func (num number) string(data []byte) string {
+	strSize := num.size
+	last := num.size - 1
+	if num.kind == numFloat && (data[last] == 'f' || data[last] == 'F') {
+		strSize = last
+	}
+	if num.neg && num.sep > 0 {
+		// strip whitespace/comments between negative sign and the rest
+		strLen := strSize - num.sep
+		str := make([]byte, strLen)
+		str[0] = data[0]
+		copy(str[1:], data[num.sep+1:strSize])
+		return string(str)
+	}
+	return string(data[:strSize])
+
 }
 
 // parseNumber constructs a number object from given input. It allows for the
@@ -67,6 +83,7 @@ func parseNumber(input []byte) number {
 	}
 
 	// Optional -
+	var sep int
 	if s[0] == '-' {
 		neg = true
 		s = s[1:]
@@ -74,12 +91,14 @@ func parseNumber(input []byte) number {
 		if len(s) == 0 {
 			return number{}
 		}
+		// Consume any whitespace or comments between the
+		// negative sign and the rest of the number
+		lenBefore := len(s)
+		s = consume(s, 0)
+		sep = lenBefore - len(s)
+		size += sep
 	}
 
-	// C++ allows for whitespace and comments in between the negative sign and
-	// the rest of the number. This logic currently does not but is consistent
-	// with v1.
-
 	switch {
 	case s[0] == '0':
 		if len(s) > 1 {
@@ -116,7 +135,7 @@ func parseNumber(input []byte) number {
 				if len(s) > 0 && !isDelim(s[0]) {
 					return number{}
 				}
-				return number{kind: kind, neg: neg, size: size}
+				return number{kind: kind, neg: neg, size: size, sep: sep}
 			}
 		}
 		s = s[1:]
@@ -188,5 +207,5 @@ func parseNumber(input []byte) number {
 		return number{}
 	}
 
-	return number{kind: kind, neg: neg, size: size}
+	return number{kind: kind, neg: neg, size: size, sep: sep}
 }
diff --git a/internal/encoding/text/decode_test.go b/internal/encoding/text/decode_test.go
index a7dbad78e..528b1bc71 100644
--- a/internal/encoding/text/decode_test.go
+++ b/internal/encoding/text/decode_test.go
@@ -555,6 +555,14 @@ func TestDecoder(t *testing.T) {
 			in:   "-123",
 			want: []R{{E: "invalid field number: -123"}},
 		},
+		{
+			in:   "- \t 123.321e6",
+			want: []R{{E: "invalid field number: -123.321e6"}},
+		},
+		{
+			in:   "- # negative\n 123",
+			want: []R{{E: "invalid field number: -123"}},
+		},
 		{
 			// Field number > math.MaxInt32.
 			in:   "2147483648:",