From 1e7bca6d184d956435610c4e18173e65a3ac4a4d Mon Sep 17 00:00:00 2001 From: Christoph Zwerschke Date: Sat, 21 Dec 2019 11:36:11 +0100 Subject: [PATCH] RFC: Number lexer lookahead restriction Implements and adds the tests described by https://github.com/graphql/graphql-spec/pull/601 Replicates graphql/graphql-js@ca1c1dfe0699549c75e1a6c0017bfca11497f654 --- README.md | 2 +- src/graphql/language/lexer.py | 9 +++++++-- tests/language/test_lexer.py | 30 ++++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2a6b9fa9..a18a766b 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ The current stable version 3.0.1 of GraphQL-core is up-to-date with GraphQL.js version 14.5.8. All parts of the API are covered by an extensive test suite -of currently 1992 unit tests. +of currently 1993 unit tests. ## Documentation diff --git a/src/graphql/language/lexer.py b/src/graphql/language/lexer.py index f311e736..75b17a4a 100644 --- a/src/graphql/language/lexer.py +++ b/src/graphql/language/lexer.py @@ -215,8 +215,8 @@ def read_number( position = self.read_digits(position, char) char = body[position : position + 1] - # Numbers cannot be followed by . or e - if char and char in ".eE": + # Numbers cannot be followed by . or NameStart + if char and (char == "." or is_name_start(char)): raise GraphQLSyntaxError( source, position, @@ -436,3 +436,8 @@ def char2hex(a: str): elif "a" <= a <= "f": # a-f return ord(a) - 87 return -1 + + +def is_name_start(char: str) -> bool: + """Check whether char is an underscore or a plain ASCII letter""" + return char == "_" or "A" <= char <= "Z" or "a" <= char <= "z" diff --git a/tests/language/test_lexer.py b/tests/language/test_lexer.py index f69c9d67..cfb3e61e 100644 --- a/tests/language/test_lexer.py +++ b/tests/language/test_lexer.py @@ -19,9 +19,9 @@ def lex_second(s: str) -> Token: return lexer.advance() -def assert_syntax_error(text, message, location): +def assert_syntax_error(text, message, location, second=False): with raises(GraphQLSyntaxError) as exc_info: - lex_one(text) + lex_second(text) if second else lex_one(text) error = exc_info.value assert error.message == f"Syntax Error: {message}" assert error.locations == [location] @@ -330,6 +330,32 @@ def lex_reports_useful_number_errors(): "1.23.4", "Invalid number, expected digit but got: '.'.", (1, 5) ) + def lex_does_not_allow_name_start_after_a_number(): + assert_syntax_error( + "0xF1", "Invalid number, expected digit but got: 'x'.", (1, 2) + ) + assert_syntax_error( + "0b10", "Invalid number, expected digit but got: 'b'.", (1, 2) + ) + assert_syntax_error( + "123abc", "Invalid number, expected digit but got: 'a'.", (1, 4) + ) + assert_syntax_error( + "1_1234", "Invalid number, expected digit but got: '_'.", (1, 2) + ) + assert_syntax_error( + "1_1234", "Invalid number, expected digit but got: '_'.", (1, 2) + ) + assert_syntax_error( + "1ß", "Cannot parse the unexpected character 'ß'.", (1, 2), second=True, + ) + assert_syntax_error( + "1.23f", "Invalid number, expected digit but got: 'f'.", (1, 5) + ) + assert_syntax_error( + "12ß", "Cannot parse the unexpected character 'ß'.", (1, 3), second=True, + ) + # noinspection PyArgumentEqualDefault def lexes_punctuation(): assert lex_one("!") == Token(TokenKind.BANG, 0, 1, 1, 1, None, None)