newline normalization in template literals

evanw · Aug 12, 2020 · 77143a7 · 77143a7
1 parent ce281d6
commit 77143a7
Show file tree

Hide file tree

Showing 3 changed files with 132 additions and 11 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,10 @@
 
     You can now use non-tag template literals such as `` `abc` `` and `` `a${b}c` `` with `--target=es5` and esbuild will convert them to string addition such as `"abc"` and `"a" + b + "c"` instead of reporting an error.
 
+* Newline normalization in template literals
+
+    This fixes a bug with esbuild that caused carriage-return characters to incorrectly end up in multi-line template literals if the source file used Windows-style line endings (i.e. `\r\n`). The ES6 language specification says that both carriage-return characters and Windows carriage-return line-feed sequences must be converted to line-feed characters instead. With this change, esbuild's parsing of multi-line template literals should no longer be platform-dependent.
+
 ## 0.6.20
 
 * Symbols are now renamed separately per chunk ([#16](https://github.com/evanw/esbuild/issues/16))

diff --git a/internal/lexer/lexer.go b/internal/lexer/lexer.go
@@ -294,18 +294,53 @@ func (lexer *Lexer) Raw() string {
 }
 
 func (lexer *Lexer) RawTemplateContents() string {
+	var text string
 	switch lexer.Token {
 	case TNoSubstitutionTemplateLiteral, TTemplateTail:
 		// "`x`" or "}x`"
-		return lexer.source.Contents[lexer.start+1 : lexer.end-1]
+		text = lexer.source.Contents[lexer.start+1 : lexer.end-1]
 
 	case TTemplateHead, TTemplateMiddle:
 		// "`x${" or "}x${"
-		return lexer.source.Contents[lexer.start+1 : lexer.end-2]
+		text = lexer.source.Contents[lexer.start+1 : lexer.end-2]
+	}
 
-	default:
-		return ""
+	if strings.IndexByte(text, '\r') == -1 {
+		return text
+	}
+
+	// From the specification:
+	//
+	// 11.8.6.1 Static Semantics: TV and TRV
+	//
+	// TV excludes the code units of LineContinuation while TRV includes
+	// them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
+	// <LF> for both TV and TRV. An explicit EscapeSequence is needed to
+	// include a <CR> or <CR><LF> sequence.
+
+	bytes := []byte(text)
+	end := 0
+	i := 0
+
+	for i < len(bytes) {
+		c := bytes[i]
+		i++
+
+		if c == '\r' {
+			// Convert '\r\n' into '\n'
+			if i < len(bytes) && bytes[i] == '\n' {
+				i++
+			}
+
+			// Convert '\r' into '\n'
+			c = '\n'
+		}
+
+		bytes[end] = c
+		end++
 	}
+
+	return string(bytes[:end])
 }
 
 func (lexer *Lexer) IsIdentifierOrKeyword() bool {
@@ -1301,8 +1336,7 @@ func (lexer *Lexer) Next() {
 
 		case '\'', '"', '`':
 			quote := lexer.codePoint
-			hasEscape := false
-			isASCII := true
+			needsSlowPath := false
 			suffixLen := 1
 
 			if quote != '`' {
@@ -1318,7 +1352,7 @@ func (lexer *Lexer) Next() {
 			for {
 				switch lexer.codePoint {
 				case '\\':
-					hasEscape = true
+					needsSlowPath = true
 					lexer.step()
 
 					// Handle Windows CRLF
@@ -1333,7 +1367,16 @@ func (lexer *Lexer) Next() {
 				case -1: // This indicates the end of the file
 					lexer.SyntaxError()
 
-				case '\r', '\n':
+				case '\r':
+					if quote != '`' {
+						lexer.addError(ast.Loc{Start: int32(lexer.end)}, "Unterminated string literal")
+						panic(LexerPanic{})
+					}
+
+					// Template literals require newline normalization
+					needsSlowPath = true
+
+				case '\n':
 					if quote != '`' {
 						lexer.addError(ast.Loc{Start: int32(lexer.end)}, "Unterminated string literal")
 						panic(LexerPanic{})
@@ -1362,7 +1405,7 @@ func (lexer *Lexer) Next() {
 				default:
 					// Non-ASCII strings need the slow path
 					if lexer.codePoint >= 0x80 {
-						isASCII = false
+						needsSlowPath = true
 					} else if lexer.json.parse && lexer.codePoint < 0x20 {
 						lexer.SyntaxError()
 					}
@@ -1372,7 +1415,7 @@ func (lexer *Lexer) Next() {
 
 			text := lexer.source.Contents[lexer.start+1 : lexer.end-suffixLen]
 
-			if hasEscape || !isASCII {
+			if needsSlowPath {
 				// Slow path
 				lexer.StringLiteral = lexer.decodeEscapeSequences(lexer.start+1, text)
 			} else {
@@ -1970,7 +2013,27 @@ func (lexer *Lexer) decodeEscapeSequences(start int, text string) []uint16 {
 		c, width := utf8.DecodeRuneInString(text[i:])
 		i += width
 
-		if c == '\\' {
+		switch c {
+		case '\r':
+			// From the specification:
+			//
+			// 11.8.6.1 Static Semantics: TV and TRV
+			//
+			// TV excludes the code units of LineContinuation while TRV includes
+			// them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
+			// <LF> for both TV and TRV. An explicit EscapeSequence is needed to
+			// include a <CR> or <CR><LF> sequence.
+
+			// Convert '\r\n' into '\n'
+			if i < len(text) && text[i] == '\n' {
+				i++
+			}
+
+			// Convert '\r' into '\n'
+			decoded = append(decoded, '\n')
+			continue
+
+		case '\\':
 			c2, width2 := utf8.DecodeRuneInString(text[i:])
 			i += width2
 

diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go
@@ -947,6 +947,60 @@ func TestArrow(t *testing.T) {
 
 func TestTemplate(t *testing.T) {
 	expectPrinted(t, "`a${1 + `b${2}c` + 3}d`", "`a${1 + `b${2}c` + 3}d`;\n")
+
+	expectPrinted(t, "`a\nb`", "`a\nb`;\n")
+	expectPrinted(t, "`a\rb`", "`a\nb`;\n")
+	expectPrinted(t, "`a\r\nb`", "`a\nb`;\n")
+	expectPrinted(t, "`a\\nb`", "`a\nb`;\n")
+	expectPrinted(t, "`a\\rb`", "`a\\rb`;\n")
+	expectPrinted(t, "`a\\r\\nb`", "`a\\r\nb`;\n")
+	expectPrinted(t, "`a\u2028b`", "`a\\u2028b`;\n")
+	expectPrinted(t, "`a\u2029b`", "`a\\u2029b`;\n")
+
+	expectPrinted(t, "`a\n${b}`", "`a\n${b}`;\n")
+	expectPrinted(t, "`a\r${b}`", "`a\n${b}`;\n")
+	expectPrinted(t, "`a\r\n${b}`", "`a\n${b}`;\n")
+	expectPrinted(t, "`a\\n${b}`", "`a\n${b}`;\n")
+	expectPrinted(t, "`a\\r${b}`", "`a\\r${b}`;\n")
+	expectPrinted(t, "`a\\r\\n${b}`", "`a\\r\n${b}`;\n")
+	expectPrinted(t, "`a\u2028${b}`", "`a\\u2028${b}`;\n")
+	expectPrinted(t, "`a\u2029${b}`", "`a\\u2029${b}`;\n")
+
+	expectPrinted(t, "`${a}\nb`", "`${a}\nb`;\n")
+	expectPrinted(t, "`${a}\rb`", "`${a}\nb`;\n")
+	expectPrinted(t, "`${a}\r\nb`", "`${a}\nb`;\n")
+	expectPrinted(t, "`${a}\\nb`", "`${a}\nb`;\n")
+	expectPrinted(t, "`${a}\\rb`", "`${a}\\rb`;\n")
+	expectPrinted(t, "`${a}\\r\\nb`", "`${a}\\r\nb`;\n")
+	expectPrinted(t, "`${a}\u2028b`", "`${a}\\u2028b`;\n")
+	expectPrinted(t, "`${a}\u2029b`", "`${a}\\u2029b`;\n")
+
+	expectPrinted(t, "tag`a\nb`", "tag`a\nb`;\n")
+	expectPrinted(t, "tag`a\rb`", "tag`a\nb`;\n")
+	expectPrinted(t, "tag`a\r\nb`", "tag`a\nb`;\n")
+	expectPrinted(t, "tag`a\\nb`", "tag`a\\nb`;\n")
+	expectPrinted(t, "tag`a\\rb`", "tag`a\\rb`;\n")
+	expectPrinted(t, "tag`a\\r\\nb`", "tag`a\\r\\nb`;\n")
+	expectPrinted(t, "tag`a\u2028b`", "tag`a\u2028b`;\n")
+	expectPrinted(t, "tag`a\u2029b`", "tag`a\u2029b`;\n")
+
+	expectPrinted(t, "tag`a\n${b}`", "tag`a\n${b}`;\n")
+	expectPrinted(t, "tag`a\r${b}`", "tag`a\n${b}`;\n")
+	expectPrinted(t, "tag`a\r\n${b}`", "tag`a\n${b}`;\n")
+	expectPrinted(t, "tag`a\\n${b}`", "tag`a\\n${b}`;\n")
+	expectPrinted(t, "tag`a\\r${b}`", "tag`a\\r${b}`;\n")
+	expectPrinted(t, "tag`a\\r\\n${b}`", "tag`a\\r\\n${b}`;\n")
+	expectPrinted(t, "tag`a\u2028${b}`", "tag`a\u2028${b}`;\n")
+	expectPrinted(t, "tag`a\u2029${b}`", "tag`a\u2029${b}`;\n")
+
+	expectPrinted(t, "tag`${a}\nb`", "tag`${a}\nb`;\n")
+	expectPrinted(t, "tag`${a}\rb`", "tag`${a}\nb`;\n")
+	expectPrinted(t, "tag`${a}\r\nb`", "tag`${a}\nb`;\n")
+	expectPrinted(t, "tag`${a}\\nb`", "tag`${a}\\nb`;\n")
+	expectPrinted(t, "tag`${a}\\rb`", "tag`${a}\\rb`;\n")
+	expectPrinted(t, "tag`${a}\\r\\nb`", "tag`${a}\\r\\nb`;\n")
+	expectPrinted(t, "tag`${a}\u2028b`", "tag`${a}\u2028b`;\n")
+	expectPrinted(t, "tag`${a}\u2029b`", "tag`${a}\u2029b`;\n")
 }
 
 func TestSwitch(t *testing.T) {