Skip to content

Commit 09def45

Browse files
committed
Refactor Lexer
The lexer needed some cleanup, I found myself doing this as part of a Unicode RFC, but factoring all that out to make the Unicode RFC PR easier to follow. * Always use hexadecimal form for code values. * Remove use of `isNaN` for checking source over-reads. * Defines `isSourceCharacter` * Add more documentation and comments, also replaces regex with lexical grammar * Simplifies error messages * Adds additional tests
1 parent 9a4a228 commit 09def45

File tree

4 files changed

+551
-459
lines changed

4 files changed

+551
-459
lines changed

src/language/__tests__/lexer-test.js

Lines changed: 66 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ function expectSyntaxError(text: string) {
2929
describe('Lexer', () => {
3030
it('disallows uncommon control characters', () => {
3131
expectSyntaxError('\u0007').to.deep.equal({
32-
message: 'Syntax Error: Cannot contain the invalid character "\\u0007".',
32+
message: 'Syntax Error: Invalid character: U+0007.',
3333
locations: [{ line: 1, column: 1 }],
3434
});
3535
});
3636

37-
it('accepts BOM header', () => {
37+
it('ignores BOM header', () => {
3838
expect(lexOne('\uFEFF foo')).to.contain({
3939
kind: TokenKind.NAME,
4040
start: 2,
@@ -138,6 +138,13 @@ describe('Lexer', () => {
138138
value: 'foo',
139139
});
140140

141+
expect(lexOne('\t\tfoo\t\t')).to.contain({
142+
kind: TokenKind.NAME,
143+
start: 2,
144+
end: 5,
145+
value: 'foo',
146+
});
147+
141148
expect(
142149
lexOne(`
143150
#comment
@@ -166,7 +173,7 @@ describe('Lexer', () => {
166173
caughtError = error;
167174
}
168175
expect(String(caughtError)).to.equal(dedent`
169-
Syntax Error: Cannot parse the unexpected character "?".
176+
Syntax Error: Unexpected character: "?".
170177
171178
GraphQL request:3:5
172179
2 |
@@ -186,7 +193,7 @@ describe('Lexer', () => {
186193
caughtError = error;
187194
}
188195
expect(String(caughtError)).to.equal(dedent`
189-
Syntax Error: Cannot parse the unexpected character "?".
196+
Syntax Error: Unexpected character: "?".
190197
191198
foo.js:13:6
192199
12 |
@@ -205,7 +212,7 @@ describe('Lexer', () => {
205212
caughtError = error;
206213
}
207214
expect(String(caughtError)).to.equal(dedent`
208-
Syntax Error: Cannot parse the unexpected character "?".
215+
Syntax Error: Unexpected character: "?".
209216
210217
foo.js:1:5
211218
1 | ?
@@ -293,13 +300,13 @@ describe('Lexer', () => {
293300

294301
expectSyntaxError('"contains unescaped \u0007 control char"').to.deep.equal(
295302
{
296-
message: 'Syntax Error: Invalid character within String: "\\u0007".',
303+
message: 'Syntax Error: Invalid character within String: U+0007.',
297304
locations: [{ line: 1, column: 21 }],
298305
},
299306
);
300307

301308
expectSyntaxError('"null-byte is not \u0000 end of file"').to.deep.equal({
302-
message: 'Syntax Error: Invalid character within String: "\\u0000".',
309+
message: 'Syntax Error: Invalid character within String: U+0000.',
303310
locations: [{ line: 1, column: 19 }],
304311
});
305312

@@ -314,38 +321,38 @@ describe('Lexer', () => {
314321
});
315322

316323
expectSyntaxError('"bad \\z esc"').to.deep.equal({
317-
message: 'Syntax Error: Invalid character escape sequence: \\z.',
318-
locations: [{ line: 1, column: 7 }],
324+
message: 'Syntax Error: Invalid character escape sequence: "\\z".',
325+
locations: [{ line: 1, column: 6 }],
319326
});
320327

321328
expectSyntaxError('"bad \\x esc"').to.deep.equal({
322-
message: 'Syntax Error: Invalid character escape sequence: \\x.',
323-
locations: [{ line: 1, column: 7 }],
329+
message: 'Syntax Error: Invalid character escape sequence: "\\x".',
330+
locations: [{ line: 1, column: 6 }],
324331
});
325332

326333
expectSyntaxError('"bad \\u1 esc"').to.deep.equal({
327-
message: 'Syntax Error: Invalid character escape sequence: \\u1 es.',
328-
locations: [{ line: 1, column: 7 }],
334+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u1 es".',
335+
locations: [{ line: 1, column: 6 }],
329336
});
330337

331338
expectSyntaxError('"bad \\u0XX1 esc"').to.deep.equal({
332-
message: 'Syntax Error: Invalid character escape sequence: \\u0XX1.',
333-
locations: [{ line: 1, column: 7 }],
339+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u0XX1".',
340+
locations: [{ line: 1, column: 6 }],
334341
});
335342

336343
expectSyntaxError('"bad \\uXXXX esc"').to.deep.equal({
337-
message: 'Syntax Error: Invalid character escape sequence: \\uXXXX.',
338-
locations: [{ line: 1, column: 7 }],
344+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXX".',
345+
locations: [{ line: 1, column: 6 }],
339346
});
340347

341348
expectSyntaxError('"bad \\uFXXX esc"').to.deep.equal({
342-
message: 'Syntax Error: Invalid character escape sequence: \\uFXXX.',
343-
locations: [{ line: 1, column: 7 }],
349+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uFXXX".',
350+
locations: [{ line: 1, column: 6 }],
344351
});
345352

346353
expectSyntaxError('"bad \\uXXXF esc"').to.deep.equal({
347-
message: 'Syntax Error: Invalid character escape sequence: \\uXXXF.',
348-
locations: [{ line: 1, column: 7 }],
354+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".',
355+
locations: [{ line: 1, column: 6 }],
349356
});
350357
});
351358

@@ -481,14 +488,14 @@ describe('Lexer', () => {
481488
expectSyntaxError(
482489
'"""contains unescaped \u0007 control char"""',
483490
).to.deep.equal({
484-
message: 'Syntax Error: Invalid character within String: "\\u0007".',
491+
message: 'Syntax Error: Invalid character within String: U+0007.',
485492
locations: [{ line: 1, column: 23 }],
486493
});
487494

488495
expectSyntaxError(
489496
'"""null-byte is not \u0000 end of file"""',
490497
).to.deep.equal({
491-
message: 'Syntax Error: Invalid character within String: "\\u0000".',
498+
message: 'Syntax Error: Invalid character within String: U+0000.',
492499
locations: [{ line: 1, column: 21 }],
493500
});
494501
});
@@ -624,7 +631,7 @@ describe('Lexer', () => {
624631
});
625632

626633
expectSyntaxError('+1').to.deep.equal({
627-
message: 'Syntax Error: Cannot parse the unexpected character "+".',
634+
message: 'Syntax Error: Unexpected character: "+".',
628635
locations: [{ line: 1, column: 1 }],
629636
});
630637

@@ -649,7 +656,7 @@ describe('Lexer', () => {
649656
});
650657

651658
expectSyntaxError('.123').to.deep.equal({
652-
message: 'Syntax Error: Cannot parse the unexpected character ".".',
659+
message: 'Syntax Error: Unexpected character: ".".',
653660
locations: [{ line: 1, column: 1 }],
654661
});
655662

@@ -673,6 +680,11 @@ describe('Lexer', () => {
673680
locations: [{ line: 1, column: 5 }],
674681
});
675682

683+
expectSyntaxError('1.0e"').to.deep.equal({
684+
message: "Syntax Error: Invalid number, expected digit but got: '\"'.",
685+
locations: [{ line: 1, column: 5 }],
686+
});
687+
676688
expectSyntaxError('1.2e3e').to.deep.equal({
677689
message: 'Syntax Error: Invalid number, expected digit but got: "e".',
678690
locations: [{ line: 1, column: 6 }],
@@ -707,7 +719,7 @@ describe('Lexer', () => {
707719
locations: [{ line: 1, column: 2 }],
708720
});
709721
expectSyntaxError('1\u00DF').to.deep.equal({
710-
message: 'Syntax Error: Cannot parse the unexpected character "\\u00DF".',
722+
message: 'Syntax Error: Unexpected character: U+00DF.',
711723
locations: [{ line: 1, column: 2 }],
712724
});
713725
expectSyntaxError('1.23f').to.deep.equal({
@@ -815,22 +827,17 @@ describe('Lexer', () => {
815827

816828
it('lex reports useful unknown character error', () => {
817829
expectSyntaxError('..').to.deep.equal({
818-
message: 'Syntax Error: Cannot parse the unexpected character ".".',
830+
message: 'Syntax Error: Unexpected character: ".".',
819831
locations: [{ line: 1, column: 1 }],
820832
});
821833

822834
expectSyntaxError('?').to.deep.equal({
823-
message: 'Syntax Error: Cannot parse the unexpected character "?".',
835+
message: 'Syntax Error: Unexpected character: "?".',
824836
locations: [{ line: 1, column: 1 }],
825837
});
826838

827839
expectSyntaxError('\u203B').to.deep.equal({
828-
message: 'Syntax Error: Cannot parse the unexpected character "\\u203B".',
829-
locations: [{ line: 1, column: 1 }],
830-
});
831-
832-
expectSyntaxError('\u200b').to.deep.equal({
833-
message: 'Syntax Error: Cannot parse the unexpected character "\\u200B".',
840+
message: 'Syntax Error: Unexpected character: U+203B.',
834841
locations: [{ line: 1, column: 1 }],
835842
});
836843
});
@@ -893,6 +900,31 @@ describe('Lexer', () => {
893900
TokenKind.EOF,
894901
]);
895902
});
903+
904+
it('lexes comments', () => {
905+
expect(lexOne('# Comment').prev).to.contain({
906+
kind: TokenKind.COMMENT,
907+
start: 0,
908+
end: 9,
909+
value: ' Comment',
910+
});
911+
expect(lexOne('# Comment\nAnother line').prev).to.contain({
912+
kind: TokenKind.COMMENT,
913+
start: 0,
914+
end: 9,
915+
value: ' Comment',
916+
});
917+
expect(lexOne('# Comment\r\nAnother line').prev).to.contain({
918+
kind: TokenKind.COMMENT,
919+
start: 0,
920+
end: 9,
921+
value: ' Comment',
922+
});
923+
expectSyntaxError('# \u0007').to.deep.equal({
924+
message: 'Syntax Error: Invalid character: U+0007.',
925+
locations: [{ line: 1, column: 3 }],
926+
});
927+
});
896928
});
897929

898930
describe('isPunctuatorTokenKind', () => {

src/language/ast.d.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ export class Token {
8585
end: number,
8686
line: number,
8787
column: number,
88-
prev: Token | null,
8988
value?: string,
9089
);
9190

src/language/ast.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ export class Token {
9393
end: number,
9494
line: number,
9595
column: number,
96-
prev: Token | null,
9796
value?: string,
9897
) {
9998
this.kind = kind;
@@ -102,7 +101,7 @@ export class Token {
102101
this.line = line;
103102
this.column = column;
104103
this.value = value;
105-
this.prev = prev;
104+
this.prev = null;
106105
this.next = null;
107106
}
108107

0 commit comments

Comments
 (0)