@@ -28,13 +28,6 @@ function expectSyntaxError(text: string) {
28
28
}
29
29
30
30
describe ( 'Lexer' , ( ) => {
31
- it ( 'disallows uncommon control characters' , ( ) => {
32
- expectSyntaxError ( '\u0007' ) . to . deep . equal ( {
33
- message : 'Syntax Error: Invalid character: U+0007.' ,
34
- locations : [ { line : 1 , column : 1 } ] ,
35
- } ) ;
36
- } ) ;
37
-
38
31
it ( 'ignores BOM header' , ( ) => {
39
32
expect ( lexOne ( '\uFEFF foo' ) ) . to . contain ( {
40
33
kind : TokenKind . NAME ,
@@ -264,12 +257,98 @@ describe('Lexer', () => {
264
257
value : 'slashes \\ /' ,
265
258
} ) ;
266
259
260
+ expect ( lexOne ( '"unescaped unicode outside BMP \u{1f600}"' ) ) . to . contain ( {
261
+ kind : TokenKind . STRING ,
262
+ start : 0 ,
263
+ end : 34 ,
264
+ value : 'unescaped unicode outside BMP \u{1f600}' ,
265
+ } ) ;
266
+
267
+ expect (
268
+ lexOne ( '"unescaped maximal unicode outside BMP \u{10ffff}"' ) ,
269
+ ) . to . contain ( {
270
+ kind : TokenKind . STRING ,
271
+ start : 0 ,
272
+ end : 42 ,
273
+ value : 'unescaped maximal unicode outside BMP \u{10ffff}' ,
274
+ } ) ;
275
+
267
276
expect ( lexOne ( '"unicode \\u1234\\u5678\\u90AB\\uCDEF"' ) ) . to . contain ( {
268
277
kind : TokenKind . STRING ,
269
278
start : 0 ,
270
279
end : 34 ,
271
280
value : 'unicode \u1234\u5678\u90AB\uCDEF' ,
272
281
} ) ;
282
+
283
+ expect ( lexOne ( '"unicode \\u{1234}\\u{5678}\\u{90AB}\\u{CDEF}"' ) ) . to . contain (
284
+ {
285
+ kind : TokenKind . STRING ,
286
+ start : 0 ,
287
+ end : 42 ,
288
+ value : 'unicode \u1234\u5678\u90AB\uCDEF' ,
289
+ } ,
290
+ ) ;
291
+
292
+ expect (
293
+ lexOne ( '"string with unicode escape outside BMP \\u{1F600}"' ) ,
294
+ ) . to . contain ( {
295
+ kind : TokenKind . STRING ,
296
+ start : 0 ,
297
+ end : 50 ,
298
+ value : 'string with unicode escape outside BMP \u{1f600}' ,
299
+ } ) ;
300
+
301
+ expect ( lexOne ( '"string with minimal unicode escape \\u{0}"' ) ) . to . contain ( {
302
+ kind : TokenKind . STRING ,
303
+ start : 0 ,
304
+ end : 42 ,
305
+ value : 'string with minimal unicode escape \u{0}' ,
306
+ } ) ;
307
+
308
+ expect (
309
+ lexOne ( '"string with maximal unicode escape \\u{10FFFF}"' ) ,
310
+ ) . to . contain ( {
311
+ kind : TokenKind . STRING ,
312
+ start : 0 ,
313
+ end : 47 ,
314
+ value : 'string with maximal unicode escape \u{10FFFF}' ,
315
+ } ) ;
316
+
317
+ expect (
318
+ lexOne ( '"string with maximal minimal unicode escape \\u{00000000}"' ) ,
319
+ ) . to . contain ( {
320
+ kind : TokenKind . STRING ,
321
+ start : 0 ,
322
+ end : 57 ,
323
+ value : 'string with maximal minimal unicode escape \u{0}' ,
324
+ } ) ;
325
+
326
+ expect (
327
+ lexOne ( '"string with unicode surrogate pair escape \\uD83D\\uDE00"' ) ,
328
+ ) . to . contain ( {
329
+ kind : TokenKind . STRING ,
330
+ start : 0 ,
331
+ end : 56 ,
332
+ value : 'string with unicode surrogate pair escape \u{1f600}' ,
333
+ } ) ;
334
+
335
+ expect (
336
+ lexOne ( '"string with minimal surrogate pair escape \\uD800\\uDC00"' ) ,
337
+ ) . to . contain ( {
338
+ kind : TokenKind . STRING ,
339
+ start : 0 ,
340
+ end : 56 ,
341
+ value : 'string with minimal surrogate pair escape \u{10000}' ,
342
+ } ) ;
343
+
344
+ expect (
345
+ lexOne ( '"string with maximal surrogate pair escape \\uDBFF\\uDFFF"' ) ,
346
+ ) . to . contain ( {
347
+ kind : TokenKind . STRING ,
348
+ start : 0 ,
349
+ end : 56 ,
350
+ value : 'string with maximal surrogate pair escape \u{10FFFF}' ,
351
+ } ) ;
273
352
} ) ;
274
353
275
354
it ( 'lex reports useful string errors' , ( ) => {
@@ -299,16 +378,19 @@ describe('Lexer', () => {
299
378
locations : [ { line : 1 , column : 1 } ] ,
300
379
} ) ;
301
380
302
- expectSyntaxError ( '"contains unescaped \u0007 control char"' ) . to . deep . equal (
303
- {
304
- message : 'Syntax Error: Invalid character within String: U+0007.' ,
305
- locations : [ { line : 1 , column : 21 } ] ,
306
- } ,
307
- ) ;
381
+ expectSyntaxError ( '"bad surrogate \uDEAD"' ) . to . deep . equal ( {
382
+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
383
+ locations : [ { line : 1 , column : 16 } ] ,
384
+ } ) ;
385
+
386
+ expectSyntaxError ( '"bad high surrogate pair \uDEAD\uDEAD"' ) . to . deep . equal ( {
387
+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
388
+ locations : [ { line : 1 , column : 26 } ] ,
389
+ } ) ;
308
390
309
- expectSyntaxError ( '"null-byte is not \u0000 end of file "' ) . to . deep . equal ( {
310
- message : 'Syntax Error: Invalid character within String: U+0000 .' ,
311
- locations : [ { line : 1 , column : 19 } ] ,
391
+ expectSyntaxError ( '"bad low surrogate pair \uD800\uD800 "' ) . to . deep . equal ( {
392
+ message : 'Syntax Error: Invalid character within String: U+D800 .' ,
393
+ locations : [ { line : 1 , column : 25 } ] ,
312
394
} ) ;
313
395
314
396
expectSyntaxError ( '"multi\nline"' ) . to . deep . equal ( {
@@ -355,6 +437,93 @@ describe('Lexer', () => {
355
437
message : 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".' ,
356
438
locations : [ { line : 1 , column : 6 } ] ,
357
439
} ) ;
440
+
441
+ expectSyntaxError ( '"bad \\u{} esc"' ) . to . deep . equal ( {
442
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{}".' ,
443
+ locations : [ { line : 1 , column : 6 } ] ,
444
+ } ) ;
445
+
446
+ expectSyntaxError ( '"bad \\u{FXXX} esc"' ) . to . deep . equal ( {
447
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FX".' ,
448
+ locations : [ { line : 1 , column : 6 } ] ,
449
+ } ) ;
450
+
451
+ expectSyntaxError ( '"bad \\u{FFFF esc"' ) . to . deep . equal ( {
452
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FFFF ".' ,
453
+ locations : [ { line : 1 , column : 6 } ] ,
454
+ } ) ;
455
+
456
+ expectSyntaxError ( '"bad \\u{FFFF"' ) . to . deep . equal ( {
457
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FFFF"".' ,
458
+ locations : [ { line : 1 , column : 6 } ] ,
459
+ } ) ;
460
+
461
+ expectSyntaxError ( '"too high \\u{110000} esc"' ) . to . deep . equal ( {
462
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{110000}".' ,
463
+ locations : [ { line : 1 , column : 11 } ] ,
464
+ } ) ;
465
+
466
+ expectSyntaxError ( '"way too high \\u{12345678} esc"' ) . to . deep . equal ( {
467
+ message :
468
+ 'Syntax Error: Invalid Unicode escape sequence: "\\u{12345678}".' ,
469
+ locations : [ { line : 1 , column : 15 } ] ,
470
+ } ) ;
471
+
472
+ expectSyntaxError ( '"too long \\u{000000000} esc"' ) . to . deep . equal ( {
473
+ message :
474
+ 'Syntax Error: Invalid Unicode escape sequence: "\\u{000000000".' ,
475
+ locations : [ { line : 1 , column : 11 } ] ,
476
+ } ) ;
477
+
478
+ expectSyntaxError ( '"bad surrogate \\uDEAD esc"' ) . to . deep . equal ( {
479
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uDEAD".' ,
480
+ locations : [ { line : 1 , column : 16 } ] ,
481
+ } ) ;
482
+
483
+ expectSyntaxError ( '"bad surrogate \\u{DEAD} esc"' ) . to . deep . equal ( {
484
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{DEAD}".' ,
485
+ locations : [ { line : 1 , column : 16 } ] ,
486
+ } ) ;
487
+
488
+ expectSyntaxError (
489
+ '"cannot use braces for surrogate pair \\u{D83D}\\u{DE00} esc"' ,
490
+ ) . to . deep . equal ( {
491
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{D83D}".' ,
492
+ locations : [ { line : 1 , column : 39 } ] ,
493
+ } ) ;
494
+
495
+ expectSyntaxError (
496
+ '"bad high surrogate pair \\uDEAD\\uDEAD esc"' ,
497
+ ) . to . deep . equal ( {
498
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uDEAD".' ,
499
+ locations : [ { line : 1 , column : 26 } ] ,
500
+ } ) ;
501
+
502
+ expectSyntaxError (
503
+ '"bad low surrogate pair \\uD800\\uD800 esc"' ,
504
+ ) . to . deep . equal ( {
505
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD800".' ,
506
+ locations : [ { line : 1 , column : 25 } ] ,
507
+ } ) ;
508
+
509
+ expectSyntaxError (
510
+ '"cannot escape half a pair \uD83D\\uDE00 esc"' ,
511
+ ) . to . deep . equal ( {
512
+ message : 'Syntax Error: Invalid character within String: U+D83D.' ,
513
+ locations : [ { line : 1 , column : 28 } ] ,
514
+ } ) ;
515
+
516
+ expectSyntaxError (
517
+ '"cannot escape half a pair \\uD83D\uDE00 esc"' ,
518
+ ) . to . deep . equal ( {
519
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD83D".' ,
520
+ locations : [ { line : 1 , column : 28 } ] ,
521
+ } ) ;
522
+
523
+ expectSyntaxError ( '"bad \\uD83D\\not an escape"' ) . to . deep . equal ( {
524
+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD83D".' ,
525
+ locations : [ { line : 1 , column : 6 } ] ,
526
+ } ) ;
358
527
} ) ;
359
528
360
529
it ( 'lexes block strings' , ( ) => {
@@ -414,6 +583,13 @@ describe('Lexer', () => {
414
583
value : 'unescaped \\n\\r\\b\\t\\f\\u1234' ,
415
584
} ) ;
416
585
586
+ expect ( lexOne ( '"""unescaped unicode outside BMP \u{1f600}"""' ) ) . to . contain ( {
587
+ kind : TokenKind . BLOCK_STRING ,
588
+ start : 0 ,
589
+ end : 38 ,
590
+ value : 'unescaped unicode outside BMP \u{1f600}' ,
591
+ } ) ;
592
+
417
593
expect ( lexOne ( '"""slashes \\\\ \\/"""' ) ) . to . contain ( {
418
594
kind : TokenKind . BLOCK_STRING ,
419
595
start : 0 ,
@@ -486,18 +662,9 @@ describe('Lexer', () => {
486
662
locations : [ { line : 1 , column : 16 } ] ,
487
663
} ) ;
488
664
489
- expectSyntaxError (
490
- '"""contains unescaped \u0007 control char"""' ,
491
- ) . to . deep . equal ( {
492
- message : 'Syntax Error: Invalid character within String: U+0007.' ,
493
- locations : [ { line : 1 , column : 23 } ] ,
494
- } ) ;
495
-
496
- expectSyntaxError (
497
- '"""null-byte is not \u0000 end of file"""' ,
498
- ) . to . deep . equal ( {
499
- message : 'Syntax Error: Invalid character within String: U+0000.' ,
500
- locations : [ { line : 1 , column : 21 } ] ,
665
+ expectSyntaxError ( '"""contains invalid surrogate \uDEAD"""' ) . to . deep . equal ( {
666
+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
667
+ locations : [ { line : 1 , column : 31 } ] ,
501
668
} ) ;
502
669
} ) ;
503
670
@@ -837,6 +1004,16 @@ describe('Lexer', () => {
837
1004
locations : [ { line : 1 , column : 1 } ] ,
838
1005
} ) ;
839
1006
1007
+ expectSyntaxError ( '\x00' ) . to . deep . equal ( {
1008
+ message : 'Syntax Error: Unexpected character: U+0000.' ,
1009
+ locations : [ { line : 1 , column : 1 } ] ,
1010
+ } ) ;
1011
+
1012
+ expectSyntaxError ( '\b' ) . to . deep . equal ( {
1013
+ message : 'Syntax Error: Unexpected character: U+0008.' ,
1014
+ locations : [ { line : 1 , column : 1 } ] ,
1015
+ } ) ;
1016
+
840
1017
expectSyntaxError ( '\u00AA' ) . to . deep . equal ( {
841
1018
message : 'Syntax Error: Unexpected character: U+00AA.' ,
842
1019
locations : [ { line : 1 , column : 1 } ] ,
@@ -851,6 +1028,16 @@ describe('Lexer', () => {
851
1028
message : 'Syntax Error: Unexpected character: U+203B.' ,
852
1029
locations : [ { line : 1 , column : 1 } ] ,
853
1030
} ) ;
1031
+
1032
+ expectSyntaxError ( '\u{1f600}' ) . to . deep . equal ( {
1033
+ message : 'Syntax Error: Unexpected character: U+1F600.' ,
1034
+ locations : [ { line : 1 , column : 1 } ] ,
1035
+ } ) ;
1036
+
1037
+ expectSyntaxError ( '\uDEAD' ) . to . deep . equal ( {
1038
+ message : 'Syntax Error: Invalid character: U+DEAD.' ,
1039
+ locations : [ { line : 1 , column : 1 } ] ,
1040
+ } ) ;
854
1041
} ) ;
855
1042
856
1043
it ( 'lex reports useful information for dashes in names' , ( ) => {
@@ -931,9 +1118,15 @@ describe('Lexer', () => {
931
1118
end : 9 ,
932
1119
value : ' Comment' ,
933
1120
} ) ;
934
- expectSyntaxError ( '# \u0007' ) . to . deep . equal ( {
935
- message : 'Syntax Error: Invalid character: U+0007.' ,
936
- locations : [ { line : 1 , column : 3 } ] ,
1121
+ expect ( lexOne ( '# Comment \u{1f600}' ) . prev ) . to . contain ( {
1122
+ kind : TokenKind . COMMENT ,
1123
+ start : 0 ,
1124
+ end : 12 ,
1125
+ value : ' Comment \u{1f600}' ,
1126
+ } ) ;
1127
+ expectSyntaxError ( '# Invalid surrogate \uDEAD' ) . to . deep . equal ( {
1128
+ message : 'Syntax Error: Invalid character: U+DEAD.' ,
1129
+ locations : [ { line : 1 , column : 21 } ] ,
937
1130
} ) ;
938
1131
} ) ;
939
1132
} ) ;
0 commit comments