Skip to content

Commit 01f055a

Browse files
committed
Add full unicode spec change support
* Requires surrogate pairs, regardless of if they are escaped * Support braced unicode escapes * Improved error messages with more tests
1 parent 17201f9 commit 01f055a

File tree

2 files changed

+271
-137
lines changed

2 files changed

+271
-137
lines changed

src/language/__tests__/lexer-test.js

Lines changed: 119 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,31 @@ describe('Lexer', () => {
263263
value: 'unicode \u1234\u5678\u90AB\uCDEF',
264264
});
265265

266+
expect(lexOne('"unicode \\u{1234}\\u{5678}\\u{90AB}\\u{CDEF}"')).to.contain(
267+
{
268+
kind: TokenKind.STRING,
269+
start: 0,
270+
end: 42,
271+
value: 'unicode \u1234\u5678\u90AB\uCDEF',
272+
},
273+
);
274+
275+
expect(
276+
lexOne('"string with unicode escape outside BMP \\u{1F600}"'),
277+
).to.contain({
278+
kind: TokenKind.STRING,
279+
start: 0,
280+
end: 50,
281+
value: 'string with unicode escape outside BMP 😀',
282+
});
283+
284+
expect(lexOne('"unicode \\u{10FFFF}"')).to.contain({
285+
kind: TokenKind.STRING,
286+
start: 0,
287+
end: 20,
288+
value: 'unicode \u{10FFFF}',
289+
});
290+
266291
expect(
267292
lexOne('"string with unicode code point outside BMP 😀"'),
268293
).to.contain({
@@ -378,55 +403,135 @@ describe('Lexer', () => {
378403
});
379404

380405
expectSyntaxError('"bad \\z esc"').to.deep.equal({
381-
message: 'Syntax Error: Invalid character escape sequence: \\z.',
406+
message: 'Syntax Error: Invalid character escape sequence: "\\z".',
382407
locations: [{ line: 1, column: 7 }],
383408
});
384409

385410
expectSyntaxError('"bad \\x esc"').to.deep.equal({
386-
message: 'Syntax Error: Invalid character escape sequence: \\x.',
411+
message: 'Syntax Error: Invalid character escape sequence: "\\x".',
387412
locations: [{ line: 1, column: 7 }],
388413
});
389414

390415
expectSyntaxError('"bad \\u1 esc"').to.deep.equal({
391-
message: 'Syntax Error: Invalid character escape sequence: \\u1 es.',
416+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u1 es".',
417+
locations: [{ line: 1, column: 7 }],
418+
});
419+
420+
expectSyntaxError('"bad \\u1"').to.deep.equal({
421+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u1".',
392422
locations: [{ line: 1, column: 7 }],
393423
});
394424

395425
expectSyntaxError('"bad \\u0XX1 esc"').to.deep.equal({
396-
message: 'Syntax Error: Invalid character escape sequence: \\u0XX1.',
426+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u0XX1".',
397427
locations: [{ line: 1, column: 7 }],
398428
});
399429

400430
expectSyntaxError('"bad \\uXXXX esc"').to.deep.equal({
401-
message: 'Syntax Error: Invalid character escape sequence: \\uXXXX.',
431+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXX".',
402432
locations: [{ line: 1, column: 7 }],
403433
});
404434

405435
expectSyntaxError('"bad \\uFXXX esc"').to.deep.equal({
406-
message: 'Syntax Error: Invalid character escape sequence: \\uFXXX.',
436+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uFXXX".',
407437
locations: [{ line: 1, column: 7 }],
408438
});
409439

410440
expectSyntaxError('"bad \\uXXXF esc"').to.deep.equal({
411-
message: 'Syntax Error: Invalid character escape sequence: \\uXXXF.',
441+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".',
412442
locations: [{ line: 1, column: 7 }],
413443
});
414444

415-
expectSyntaxError('"bad \\uDEAD esc"').to.deep.equal({
416-
message: 'Syntax Error: Invalid surrogate pair escape sequence: \\uDEAD.',
445+
expectSyntaxError('"bad \\u{} esc"').to.deep.equal({
446+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{}".',
417447
locations: [{ line: 1, column: 7 }],
418448
});
419449

420-
expectSyntaxError('"bad \\uD83D\\noEscape"').to.deep.equal({
421-
message:
422-
'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\n.',
450+
expectSyntaxError('"bad \\u{XXXF} esc"').to.deep.equal({
451+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{XXXF}".',
452+
locations: [{ line: 1, column: 7 }],
453+
});
454+
455+
expectSyntaxError('"bad \\u{XXXF esc"').to.deep.equal({
456+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{XXXF es".',
423457
locations: [{ line: 1, column: 7 }],
424458
});
425459

460+
expectSyntaxError('"bad \\u{X"').to.deep.equal({
461+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{X".',
462+
locations: [{ line: 1, column: 7 }],
463+
});
464+
465+
expectSyntaxError('"bad \\u{XXXF e}scape"').to.deep.equal({
466+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{XXXF e}".',
467+
locations: [{ line: 1, column: 7 }],
468+
});
469+
470+
expectSyntaxError('"bad \\u{110000} esc"').to.deep.equal({
471+
message: 'Syntax Error: Undefined Unicode code-point: "\\u{110000}".',
472+
locations: [{ line: 1, column: 7 }],
473+
});
474+
475+
expectSyntaxError('"bad \uDEAD esc"').to.deep.equal({
476+
message: 'Syntax Error: Invalid low surrogate within String: "\\uDEAD".',
477+
locations: [{ line: 1, column: 6 }],
478+
});
479+
480+
expectSyntaxError('"bad \\uDEAD esc"').to.deep.equal({
481+
message: 'Syntax Error: Invalid low surrogate within String: "\\uDEAD".',
482+
locations: [{ line: 1, column: 6 }],
483+
});
484+
485+
expectSyntaxError('"bad \\u{DEAD} esc"').to.deep.equal({
486+
message: 'Syntax Error: Invalid low surrogate within String: "\\uDEAD".',
487+
locations: [{ line: 1, column: 6 }],
488+
});
489+
490+
expectSyntaxError('"bad \uD83D esc"').to.deep.equal({
491+
message:
492+
'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate " " in String.',
493+
locations: [{ line: 1, column: 6 }],
494+
});
495+
496+
expectSyntaxError('"bad \\uD83D esc"').to.deep.equal({
497+
message:
498+
'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate " " in String.',
499+
locations: [{ line: 1, column: 6 }],
500+
});
501+
502+
expectSyntaxError('"bad \\u{D83D} esc"').to.deep.equal({
503+
message:
504+
'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate " " in String.',
505+
locations: [{ line: 1, column: 6 }],
506+
});
507+
508+
expectSyntaxError('"bad \uD83D\uDBFF esc"').to.deep.equal({
509+
message:
510+
'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate "\\uDBFF" in String.',
511+
locations: [{ line: 1, column: 6 }],
512+
});
513+
426514
expectSyntaxError('"bad \\uD83D\\uDBFF esc"').to.deep.equal({
427515
message:
428-
'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\uDBFF.',
429-
locations: [{ line: 1, column: 7 }],
516+
'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate "\\uDBFF" in String.',
517+
locations: [{ line: 1, column: 6 }],
518+
});
519+
520+
expectSyntaxError('"bad \uD83D\\uDBFF esc"').to.deep.equal({
521+
message:
522+
'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate "\\uDBFF" in String.',
523+
locations: [{ line: 1, column: 6 }],
524+
});
525+
526+
expectSyntaxError('"bad \\uD83D\uDBFF esc"').to.deep.equal({
527+
message:
528+
'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate "\\uDBFF" in String.',
529+
locations: [{ line: 1, column: 6 }],
530+
});
531+
532+
expectSyntaxError('"bad \\uD83D\\escape"').to.deep.equal({
533+
message: 'Syntax Error: Invalid character escape sequence: "\\e".',
534+
locations: [{ line: 1, column: 13 }],
430535
});
431536
});
432537

0 commit comments

Comments
 (0)