modfile: remove trailing newline from comment tokens

In v0.2.0, the go.mod lexer removed trailing LF bytes from comment tokens. This regressed in v0.3.0. Documentation on Comment.Token says the trailing newline should not be included. This CL fixes the lexer to strip trailing newlines again. It will now strip both LF and CRLF newlines. It also includes a test to ensure comments are attached at the right place in the syntax tree with the right content. Fixes golang/go#39913 Change-Id: I7fba0ed3c85f0a3c23fefc6b7fecfe6df7777aea Reviewed-on: https://go-review.googlesource.com/c/mod/+/240557 Run-TryBot: Jay Conrod <jayconrod@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Matloob <matloob@golang.org>
golang · Jul 6, 2020 · 89ce4c7 · 89ce4c7
1 parent 0b26df4
commit 89ce4c7
Show file tree

Hide file tree

Showing 2 changed files with 125 additions and 0 deletions.
diff --git a/modfile/read.go b/modfile/read.go
@@ -477,9 +477,17 @@ func (in *input) startToken() {
 
 // endToken marks the end of an input token.
 // It records the actual token string in tok.text.
+// A single trailing newline (LF or CRLF) will be removed from comment tokens.
 func (in *input) endToken(kind tokenKind) {
 	in.token.kind = kind
 	text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
+	if kind.isComment() {
+		if strings.HasSuffix(text, "\r\n") {
+			text = text[:len(text)-2]
+		} else {
+			text = strings.TrimSuffix(text, "\n")
+		}
+	}
 	in.token.text = text
 	in.token.endPos = in.pos
 }

diff --git a/modfile/read_test.go b/modfile/read_test.go
@@ -445,3 +445,120 @@ func TestGoVersion(t *testing.T) {
 		})
 	}
 }
+
+func TestComments(t *testing.T) {
+	for _, test := range []struct {
+		desc, input, want string
+	}{
+		{
+			desc: "comment_only",
+			input: `
+// a
+// b
+`,
+			want: `
+comments before "// a"
+comments before "// b"
+`,
+		}, {
+			desc: "line",
+			input: `
+// a
+
+// b
+module m // c
+// d
+
+// e
+`,
+			want: `
+comments before "// a"
+line before "// b"
+line suffix "// c"
+comments before "// d"
+comments before "// e"
+`,
+		}, {
+			desc: "block",
+			input: `
+// a
+
+// b
+block ( // c
+	// d
+
+	// e
+	x // f
+	// g
+
+	// h
+) // i
+// j
+
+// k
+`,
+			want: `
+comments before "// a"
+block before "// b"
+lparen suffix "// c"
+blockline before "// d"
+blockline before ""
+blockline before "// e"
+blockline suffix "// f"
+rparen before "// g"
+rparen before ""
+rparen before "// h"
+rparen suffix "// i"
+comments before "// j"
+comments before "// k"
+`,
+		}, {
+			desc:  "cr_removed",
+			input: "// a\r\r\n",
+			want:  `comments before "// a\r"`,
+		},
+	} {
+		t.Run(test.desc, func(t *testing.T) {
+			f, err := ParseLax("go.mod", []byte(test.input), nil)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			buf := &bytes.Buffer{}
+			printComments := func(prefix string, cs *Comments) {
+				for _, c := range cs.Before {
+					fmt.Fprintf(buf, "%s before %q\n", prefix, c.Token)
+				}
+				for _, c := range cs.Suffix {
+					fmt.Fprintf(buf, "%s suffix %q\n", prefix, c.Token)
+				}
+				for _, c := range cs.After {
+					fmt.Fprintf(buf, "%s after %q\n", prefix, c.Token)
+				}
+			}
+
+			printComments("file", &f.Syntax.Comments)
+			for _, stmt := range f.Syntax.Stmt {
+				switch stmt := stmt.(type) {
+				case *CommentBlock:
+					printComments("comments", stmt.Comment())
+				case *Line:
+					printComments("line", stmt.Comment())
+				case *LineBlock:
+					printComments("block", stmt.Comment())
+					printComments("lparen", stmt.LParen.Comment())
+					for _, line := range stmt.Line {
+						printComments("blockline", line.Comment())
+					}
+					printComments("rparen", stmt.RParen.Comment())
+				}
+			}
+
+			got := strings.TrimSpace(buf.String())
+			want := strings.TrimSpace(test.want)
+			if got != want {
+				t.Errorf("got:\n%s\nwant:\n%s", got, want)
+			}
+		})
+	}
+}