Skip to content

Commit

Permalink
newline normalization in template literals
Browse files Browse the repository at this point in the history
  • Loading branch information
evanw committed Aug 12, 2020
1 parent ce281d6 commit 77143a7
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

You can now use non-tag template literals such as `` `abc` `` and `` `a${b}c` `` with `--target=es5` and esbuild will convert them to string addition such as `"abc"` and `"a" + b + "c"` instead of reporting an error.

* Newline normalization in template literals

This fixes a bug with esbuild that caused carriage-return characters to incorrectly end up in multi-line template literals if the source file used Windows-style line endings (i.e. `\r\n`). The ES6 language specification says that both carriage-return characters and Windows carriage-return line-feed sequences must be converted to line-feed characters instead. With this change, esbuild's parsing of multi-line template literals should no longer be platform-dependent.

## 0.6.20

* Symbols are now renamed separately per chunk ([#16](https://github.com/evanw/esbuild/issues/16))
Expand Down
85 changes: 74 additions & 11 deletions internal/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,18 +294,53 @@ func (lexer *Lexer) Raw() string {
}

func (lexer *Lexer) RawTemplateContents() string {
var text string
switch lexer.Token {
case TNoSubstitutionTemplateLiteral, TTemplateTail:
// "`x`" or "}x`"
return lexer.source.Contents[lexer.start+1 : lexer.end-1]
text = lexer.source.Contents[lexer.start+1 : lexer.end-1]

case TTemplateHead, TTemplateMiddle:
// "`x${" or "}x${"
return lexer.source.Contents[lexer.start+1 : lexer.end-2]
text = lexer.source.Contents[lexer.start+1 : lexer.end-2]
}

default:
return ""
if strings.IndexByte(text, '\r') == -1 {
return text
}

// From the specification:
//
// 11.8.6.1 Static Semantics: TV and TRV
//
// TV excludes the code units of LineContinuation while TRV includes
// them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
// <LF> for both TV and TRV. An explicit EscapeSequence is needed to
// include a <CR> or <CR><LF> sequence.

bytes := []byte(text)
end := 0
i := 0

for i < len(bytes) {
c := bytes[i]
i++

if c == '\r' {
// Convert '\r\n' into '\n'
if i < len(bytes) && bytes[i] == '\n' {
i++
}

// Convert '\r' into '\n'
c = '\n'
}

bytes[end] = c
end++
}

return string(bytes[:end])
}

func (lexer *Lexer) IsIdentifierOrKeyword() bool {
Expand Down Expand Up @@ -1301,8 +1336,7 @@ func (lexer *Lexer) Next() {

case '\'', '"', '`':
quote := lexer.codePoint
hasEscape := false
isASCII := true
needsSlowPath := false
suffixLen := 1

if quote != '`' {
Expand All @@ -1318,7 +1352,7 @@ func (lexer *Lexer) Next() {
for {
switch lexer.codePoint {
case '\\':
hasEscape = true
needsSlowPath = true
lexer.step()

// Handle Windows CRLF
Expand All @@ -1333,7 +1367,16 @@ func (lexer *Lexer) Next() {
case -1: // This indicates the end of the file
lexer.SyntaxError()

case '\r', '\n':
case '\r':
if quote != '`' {
lexer.addError(ast.Loc{Start: int32(lexer.end)}, "Unterminated string literal")
panic(LexerPanic{})
}

// Template literals require newline normalization
needsSlowPath = true

case '\n':
if quote != '`' {
lexer.addError(ast.Loc{Start: int32(lexer.end)}, "Unterminated string literal")
panic(LexerPanic{})
Expand Down Expand Up @@ -1362,7 +1405,7 @@ func (lexer *Lexer) Next() {
default:
// Non-ASCII strings need the slow path
if lexer.codePoint >= 0x80 {
isASCII = false
needsSlowPath = true
} else if lexer.json.parse && lexer.codePoint < 0x20 {
lexer.SyntaxError()
}
Expand All @@ -1372,7 +1415,7 @@ func (lexer *Lexer) Next() {

text := lexer.source.Contents[lexer.start+1 : lexer.end-suffixLen]

if hasEscape || !isASCII {
if needsSlowPath {
// Slow path
lexer.StringLiteral = lexer.decodeEscapeSequences(lexer.start+1, text)
} else {
Expand Down Expand Up @@ -1970,7 +2013,27 @@ func (lexer *Lexer) decodeEscapeSequences(start int, text string) []uint16 {
c, width := utf8.DecodeRuneInString(text[i:])
i += width

if c == '\\' {
switch c {
case '\r':
// From the specification:
//
// 11.8.6.1 Static Semantics: TV and TRV
//
// TV excludes the code units of LineContinuation while TRV includes
// them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
// <LF> for both TV and TRV. An explicit EscapeSequence is needed to
// include a <CR> or <CR><LF> sequence.

// Convert '\r\n' into '\n'
if i < len(text) && text[i] == '\n' {
i++
}

// Convert '\r' into '\n'
decoded = append(decoded, '\n')
continue

case '\\':
c2, width2 := utf8.DecodeRuneInString(text[i:])
i += width2

Expand Down
54 changes: 54 additions & 0 deletions internal/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,60 @@ func TestArrow(t *testing.T) {

func TestTemplate(t *testing.T) {
expectPrinted(t, "`a${1 + `b${2}c` + 3}d`", "`a${1 + `b${2}c` + 3}d`;\n")

expectPrinted(t, "`a\nb`", "`a\nb`;\n")
expectPrinted(t, "`a\rb`", "`a\nb`;\n")
expectPrinted(t, "`a\r\nb`", "`a\nb`;\n")
expectPrinted(t, "`a\\nb`", "`a\nb`;\n")
expectPrinted(t, "`a\\rb`", "`a\\rb`;\n")
expectPrinted(t, "`a\\r\\nb`", "`a\\r\nb`;\n")
expectPrinted(t, "`a\u2028b`", "`a\\u2028b`;\n")
expectPrinted(t, "`a\u2029b`", "`a\\u2029b`;\n")

expectPrinted(t, "`a\n${b}`", "`a\n${b}`;\n")
expectPrinted(t, "`a\r${b}`", "`a\n${b}`;\n")
expectPrinted(t, "`a\r\n${b}`", "`a\n${b}`;\n")
expectPrinted(t, "`a\\n${b}`", "`a\n${b}`;\n")
expectPrinted(t, "`a\\r${b}`", "`a\\r${b}`;\n")
expectPrinted(t, "`a\\r\\n${b}`", "`a\\r\n${b}`;\n")
expectPrinted(t, "`a\u2028${b}`", "`a\\u2028${b}`;\n")
expectPrinted(t, "`a\u2029${b}`", "`a\\u2029${b}`;\n")

expectPrinted(t, "`${a}\nb`", "`${a}\nb`;\n")
expectPrinted(t, "`${a}\rb`", "`${a}\nb`;\n")
expectPrinted(t, "`${a}\r\nb`", "`${a}\nb`;\n")
expectPrinted(t, "`${a}\\nb`", "`${a}\nb`;\n")
expectPrinted(t, "`${a}\\rb`", "`${a}\\rb`;\n")
expectPrinted(t, "`${a}\\r\\nb`", "`${a}\\r\nb`;\n")
expectPrinted(t, "`${a}\u2028b`", "`${a}\\u2028b`;\n")
expectPrinted(t, "`${a}\u2029b`", "`${a}\\u2029b`;\n")

expectPrinted(t, "tag`a\nb`", "tag`a\nb`;\n")
expectPrinted(t, "tag`a\rb`", "tag`a\nb`;\n")
expectPrinted(t, "tag`a\r\nb`", "tag`a\nb`;\n")
expectPrinted(t, "tag`a\\nb`", "tag`a\\nb`;\n")
expectPrinted(t, "tag`a\\rb`", "tag`a\\rb`;\n")
expectPrinted(t, "tag`a\\r\\nb`", "tag`a\\r\\nb`;\n")
expectPrinted(t, "tag`a\u2028b`", "tag`a\u2028b`;\n")
expectPrinted(t, "tag`a\u2029b`", "tag`a\u2029b`;\n")

expectPrinted(t, "tag`a\n${b}`", "tag`a\n${b}`;\n")
expectPrinted(t, "tag`a\r${b}`", "tag`a\n${b}`;\n")
expectPrinted(t, "tag`a\r\n${b}`", "tag`a\n${b}`;\n")
expectPrinted(t, "tag`a\\n${b}`", "tag`a\\n${b}`;\n")
expectPrinted(t, "tag`a\\r${b}`", "tag`a\\r${b}`;\n")
expectPrinted(t, "tag`a\\r\\n${b}`", "tag`a\\r\\n${b}`;\n")
expectPrinted(t, "tag`a\u2028${b}`", "tag`a\u2028${b}`;\n")
expectPrinted(t, "tag`a\u2029${b}`", "tag`a\u2029${b}`;\n")

expectPrinted(t, "tag`${a}\nb`", "tag`${a}\nb`;\n")
expectPrinted(t, "tag`${a}\rb`", "tag`${a}\nb`;\n")
expectPrinted(t, "tag`${a}\r\nb`", "tag`${a}\nb`;\n")
expectPrinted(t, "tag`${a}\\nb`", "tag`${a}\\nb`;\n")
expectPrinted(t, "tag`${a}\\rb`", "tag`${a}\\rb`;\n")
expectPrinted(t, "tag`${a}\\r\\nb`", "tag`${a}\\r\\nb`;\n")
expectPrinted(t, "tag`${a}\u2028b`", "tag`${a}\u2028b`;\n")
expectPrinted(t, "tag`${a}\u2029b`", "tag`${a}\u2029b`;\n")
}

func TestSwitch(t *testing.T) {
Expand Down

0 comments on commit 77143a7

Please sign in to comment.