@@ -28,13 +28,6 @@ function expectSyntaxError(text: string) {
28
28
}
29
29
30
30
describe ( 'Lexer' , ( ) => {
31
- it ( 'disallows uncommon control characters' , ( ) => {
32
- expectSyntaxError ( '\u0007' ) . to . deep . equal ( {
33
- message : 'Syntax Error: Invalid character: U+0007.' ,
34
- locations : [ { line : 1 , column : 1 } ] ,
35
- } ) ;
36
- } ) ;
37
-
38
31
it ( 'ignores BOM header' , ( ) => {
39
32
expect ( lexOne ( '\uFEFF foo' ) ) . to . contain ( {
40
33
kind : TokenKind . NAME ,
@@ -269,12 +262,98 @@ describe('Lexer', () => {
269
262
value : 'slashes \\ /' ,
270
263
} ) ;
271
264
265
+ expect ( lexOne ( '"unescaped unicode outside BMP \u{1f600}"' ) ) . to . contain ( {
266
+ kind : TokenKind . STRING ,
267
+ start : 0 ,
268
+ end : 34 ,
269
+ value : 'unescaped unicode outside BMP \u{1f600}' ,
270
+ } ) ;
271
+
272
+ expect (
273
+ lexOne ( '"unescaped maximal unicode outside BMP \u{10ffff}"' ) ,
274
+ ) . to . contain ( {
275
+ kind : TokenKind . STRING ,
276
+ start : 0 ,
277
+ end : 42 ,
278
+ value : 'unescaped maximal unicode outside BMP \u{10ffff}' ,
279
+ } ) ;
280
+
272
281
expect ( lexOne ( '"unicode \\u1234\\u5678\\u90AB\\uCDEF"' ) ) . to . contain ( {
273
282
kind : TokenKind . STRING ,
274
283
start : 0 ,
275
284
end : 34 ,
276
285
value : 'unicode \u1234\u5678\u90AB\uCDEF' ,
277
286
} ) ;
287
+
288
+ expect ( lexOne ( '"unicode \\u{1234}\\u{5678}\\u{90AB}\\u{CDEF}"' ) ) . to . contain (
289
+ {
290
+ kind : TokenKind . STRING ,
291
+ start : 0 ,
292
+ end : 42 ,
293
+ value : 'unicode \u1234\u5678\u90AB\uCDEF' ,
294
+ } ,
295
+ ) ;
296
+
297
+ expect (
298
+ lexOne ( '"string with unicode escape outside BMP \\u{1F600}"' ) ,
299
+ ) . to . contain ( {
300
+ kind : TokenKind . STRING ,
301
+ start : 0 ,
302
+ end : 50 ,
303
+ value : 'string with unicode escape outside BMP \u{1f600}' ,
304
+ } ) ;
305
+
306
+ expect ( lexOne ( '"string with minimal unicode escape \\u{0}"' ) ) . to . contain ( {
307
+ kind : TokenKind . STRING ,
308
+ start : 0 ,
309
+ end : 42 ,
310
+ value : 'string with minimal unicode escape \u{0}' ,
311
+ } ) ;
312
+
313
+ expect (
314
+ lexOne ( '"string with maximal unicode escape \\u{10FFFF}"' ) ,
315
+ ) . to . contain ( {
316
+ kind : TokenKind . STRING ,
317
+ start : 0 ,
318
+ end : 47 ,
319
+ value : 'string with maximal unicode escape \u{10FFFF}' ,
320
+ } ) ;
321
+
322
+ expect (
323
+ lexOne ( '"string with maximal minimal unicode escape \\u{00000000}"' ) ,
324
+ ) . to . contain ( {
325
+ kind : TokenKind . STRING ,
326
+ start : 0 ,
327
+ end : 57 ,
328
+ value : 'string with maximal minimal unicode escape \u{0}' ,
329
+ } ) ;
330
+
331
+ expect (
332
+ lexOne ( '"string with unicode surrogate pair escape \\uD83D\\uDE00"' ) ,
333
+ ) . to . contain ( {
334
+ kind : TokenKind . STRING ,
335
+ start : 0 ,
336
+ end : 56 ,
337
+ value : 'string with unicode surrogate pair escape \u{1f600}' ,
338
+ } ) ;
339
+
340
+ expect (
341
+ lexOne ( '"string with minimal surrogate pair escape \\uD800\\uDC00"' ) ,
342
+ ) . to . contain ( {
343
+ kind : TokenKind . STRING ,
344
+ start : 0 ,
345
+ end : 56 ,
346
+ value : 'string with minimal surrogate pair escape \u{10000}' ,
347
+ } ) ;
348
+
349
+ expect (
350
+ lexOne ( '"string with maximal surrogate pair escape \\uDBFF\\uDFFF"' ) ,
351
+ ) . to . contain ( {
352
+ kind : TokenKind . STRING ,
353
+ start : 0 ,
354
+ end : 56 ,
355
+ value : 'string with maximal surrogate pair escape \u{10FFFF}' ,
356
+ } ) ;
278
357
} ) ;
279
358
280
359
it ( 'lex reports useful string errors' , ( ) => {
@@ -304,16 +383,19 @@ describe('Lexer', () => {
304
383
locations : [ { line : 1 , column : 1 } ] ,
305
384
} ) ;
306
385
307
- expectSyntaxError ( '"contains unescaped \u0007 control char"' ) . to . deep . equal (
308
- {
309
- message : 'Syntax Error: Invalid character within String: U+0007.' ,
310
- locations : [ { line : 1 , column : 21 } ] ,
311
- } ,
312
- ) ;
386
+ expectSyntaxError ( '"bad surrogate \uDEAD"' ) . to . deep . equal ( {
387
+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
388
+ locations : [ { line : 1 , column : 16 } ] ,
389
+ } ) ;
390
+
391
+ expectSyntaxError ( '"bad high surrogate pair \uDEAD\uDEAD"' ) . to . deep . equal ( {
392
+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
393
+ locations : [ { line : 1 , column : 26 } ] ,
394
+ } ) ;
313
395
314
- expectSyntaxError ( '"null-byte is not \u0000 end of file "' ) . to . deep . equal ( {
315
- message : 'Syntax Error: Invalid character within String: U+0000 .' ,
316
- locations : [ { line : 1 , column : 19 } ] ,
396
+ expectSyntaxError ( '"bad low surrogate pair \uD800\uD800 "' ) . to . deep . equal ( {
397
+ message : 'Syntax Error: Invalid character within String: U+D800 .' ,
398
+ locations : [ { line : 1 , column : 25 } ] ,
317
399
} ) ;
318
400
319
401
expectSyntaxError ( '"multi\nline"' ) . to . deep . equal ( {
@@ -360,6 +442,93 @@ describe('Lexer', () => {
360
442
message : 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".' ,
361
443
locations : [ { line : 1 , column : 6 } ] ,
362
444
} ) ;
445
+
446
+ expectSyntaxError ( '"bad \\u{} esc"' ) . to . deep . equal ( {
447
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{}".' ,
448
+ locations : [ { line : 1 , column : 6 } ] ,
449
+ } ) ;
450
+
451
+ expectSyntaxError ( '"bad \\u{FXXX} esc"' ) . to . deep . equal ( {
452
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FX".' ,
453
+ locations : [ { line : 1 , column : 6 } ] ,
454
+ } ) ;
455
+
456
+ expectSyntaxError ( '"bad \\u{FFFF esc"' ) . to . deep . equal ( {
457
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FFFF ".' ,
458
+ locations : [ { line : 1 , column : 6 } ] ,
459
+ } ) ;
460
+
461
+ expectSyntaxError ( '"bad \\u{FFFF"' ) . to . deep . equal ( {
462
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FFFF"".' ,
463
+ locations : [ { line : 1 , column : 6 } ] ,
464
+ } ) ;
465
+
466
+ expectSyntaxError ( '"too high \\u{110000} esc"' ) . to . deep . equal ( {
467
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{110000}".' ,
468
+ locations : [ { line : 1 , column : 11 } ] ,
469
+ } ) ;
470
+
471
+ expectSyntaxError ( '"way too high \\u{12345678} esc"' ) . to . deep . equal ( {
472
+ message :
473
+ 'Syntax Error: Invalid Unicode escape sequence: "\\u{12345678}".' ,
474
+ locations : [ { line : 1 , column : 15 } ] ,
475
+ } ) ;
476
+
477
+ expectSyntaxError ( '"too long \\u{000000000} esc"' ) . to . deep . equal ( {
478
+ message :
479
+ 'Syntax Error: Invalid Unicode escape sequence: "\\u{000000000".' ,
480
+ locations : [ { line : 1 , column : 11 } ] ,
481
+ } ) ;
482
+
483
+ expectSyntaxError ( '"bad surrogate \\uDEAD esc"' ) . to . deep . equal ( {
484
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uDEAD".' ,
485
+ locations : [ { line : 1 , column : 16 } ] ,
486
+ } ) ;
487
+
488
+ expectSyntaxError ( '"bad surrogate \\u{DEAD} esc"' ) . to . deep . equal ( {
489
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{DEAD}".' ,
490
+ locations : [ { line : 1 , column : 16 } ] ,
491
+ } ) ;
492
+
493
+ expectSyntaxError (
494
+ '"cannot use braces for surrogate pair \\u{D83D}\\u{DE00} esc"' ,
495
+ ) . to . deep . equal ( {
496
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{D83D}".' ,
497
+ locations : [ { line : 1 , column : 39 } ] ,
498
+ } ) ;
499
+
500
+ expectSyntaxError (
501
+ '"bad high surrogate pair \\uDEAD\\uDEAD esc"' ,
502
+ ) . to . deep . equal ( {
503
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uDEAD".' ,
504
+ locations : [ { line : 1 , column : 26 } ] ,
505
+ } ) ;
506
+
507
+ expectSyntaxError (
508
+ '"bad low surrogate pair \\uD800\\uD800 esc"' ,
509
+ ) . to . deep . equal ( {
510
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD800".' ,
511
+ locations : [ { line : 1 , column : 25 } ] ,
512
+ } ) ;
513
+
514
+ expectSyntaxError (
515
+ '"cannot escape half a pair \uD83D\\uDE00 esc"' ,
516
+ ) . to . deep . equal ( {
517
+ message : 'Syntax Error: Invalid character within String: U+D83D.' ,
518
+ locations : [ { line : 1 , column : 28 } ] ,
519
+ } ) ;
520
+
521
+ expectSyntaxError (
522
+ '"cannot escape half a pair \\uD83D\uDE00 esc"' ,
523
+ ) . to . deep . equal ( {
524
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD83D".' ,
525
+ locations : [ { line : 1 , column : 28 } ] ,
526
+ } ) ;
527
+
528
+ expectSyntaxError ( '"bad \\uD83D\\not an escape"' ) . to . deep . equal ( {
529
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD83D".' ,
530
+ locations : [ { line : 1 , column : 6 } ] ,
531
+ } ) ;
363
532
} ) ;
364
533
365
534
it ( 'lexes block strings' , ( ) => {
@@ -419,6 +588,13 @@ describe('Lexer', () => {
419
588
value : 'unescaped \\n\\r\\b\\t\\f\\u1234' ,
420
589
} ) ;
421
590
591
+ expect ( lexOne ( '"""unescaped unicode outside BMP \u{1f600}"""' ) ) . to . contain ( {
592
+ kind : TokenKind . BLOCK_STRING ,
593
+ start : 0 ,
594
+ end : 38 ,
595
+ value : 'unescaped unicode outside BMP \u{1f600}' ,
596
+ } ) ;
597
+
422
598
expect ( lexOne ( '"""slashes \\\\ \\/"""' ) ) . to . contain ( {
423
599
kind : TokenKind . BLOCK_STRING ,
424
600
start : 0 ,
@@ -491,18 +667,9 @@ describe('Lexer', () => {
491
667
locations : [ { line : 1 , column : 16 } ] ,
492
668
} ) ;
493
669
494
- expectSyntaxError (
495
- '"""contains unescaped \u0007 control char"""' ,
496
- ) . to . deep . equal ( {
497
- message : 'Syntax Error: Invalid character within String: U+0007.' ,
498
- locations : [ { line : 1 , column : 23 } ] ,
499
- } ) ;
500
-
501
- expectSyntaxError (
502
- '"""null-byte is not \u0000 end of file"""' ,
503
- ) . to . deep . equal ( {
504
- message : 'Syntax Error: Invalid character within String: U+0000.' ,
505
- locations : [ { line : 1 , column : 21 } ] ,
670
+ expectSyntaxError ( '"""contains invalid surrogate \uDEAD"""' ) . to . deep . equal ( {
671
+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
672
+ locations : [ { line : 1 , column : 31 } ] ,
506
673
} ) ;
507
674
} ) ;
508
675
@@ -842,6 +1009,16 @@ describe('Lexer', () => {
842
1009
locations : [ { line : 1 , column : 1 } ] ,
843
1010
} ) ;
844
1011
1012
+ expectSyntaxError ( '\x00' ) . to . deep . equal ( {
1013
+ message : 'Syntax Error: Unexpected character: U+0000.' ,
1014
+ locations : [ { line : 1 , column : 1 } ] ,
1015
+ } ) ;
1016
+
1017
+ expectSyntaxError ( '\b' ) . to . deep . equal ( {
1018
+ message : 'Syntax Error: Unexpected character: U+0008.' ,
1019
+ locations : [ { line : 1 , column : 1 } ] ,
1020
+ } ) ;
1021
+
845
1022
expectSyntaxError ( '\u00AA' ) . to . deep . equal ( {
846
1023
message : 'Syntax Error: Unexpected character: U+00AA.' ,
847
1024
locations : [ { line : 1 , column : 1 } ] ,
@@ -856,6 +1033,16 @@ describe('Lexer', () => {
856
1033
message : 'Syntax Error: Unexpected character: U+203B.' ,
857
1034
locations : [ { line : 1 , column : 1 } ] ,
858
1035
} ) ;
1036
+
1037
+ expectSyntaxError ( '\u{1f600}' ) . to . deep . equal ( {
1038
+ message : 'Syntax Error: Unexpected character: U+1F600.' ,
1039
+ locations : [ { line : 1 , column : 1 } ] ,
1040
+ } ) ;
1041
+
1042
+ expectSyntaxError ( '\uDEAD' ) . to . deep . equal ( {
1043
+ message : 'Syntax Error: Invalid character: U+DEAD.' ,
1044
+ locations : [ { line : 1 , column : 1 } ] ,
1045
+ } ) ;
859
1046
} ) ;
860
1047
861
1048
it ( 'lex reports useful information for dashes in names' , ( ) => {
@@ -936,9 +1123,15 @@ describe('Lexer', () => {
936
1123
end : 9 ,
937
1124
value : ' Comment' ,
938
1125
} ) ;
939
- expectSyntaxError ( '# \u0007' ) . to . deep . equal ( {
940
- message : 'Syntax Error: Invalid character: U+0007.' ,
941
- locations : [ { line : 1 , column : 3 } ] ,
1126
+ expect ( lexOne ( '# Comment \u{1f600}' ) . prev ) . to . contain ( {
1127
+ kind : TokenKind . COMMENT ,
1128
+ start : 0 ,
1129
+ end : 12 ,
1130
+ value : ' Comment \u{1f600}' ,
1131
+ } ) ;
1132
+ expectSyntaxError ( '# Invalid surrogate \uDEAD' ) . to . deep . equal ( {
1133
+ message : 'Syntax Error: Invalid character: U+DEAD.' ,
1134
+ locations : [ { line : 1 , column : 21 } ] ,
942
1135
} ) ;
943
1136
} ) ;
944
1137
} ) ;
0 commit comments