Skip to content

Stop parsing continuation lines in number literals #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 44 additions & 27 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ typedef struct {
bool in_line_continuation;
} Scanner;

typedef enum {
False,
True,
Error,
} BoolOrErr;

static BoolOrErr bool_or_err_max(BoolOrErr lhs, BoolOrErr rhs) {
return lhs >= rhs ? lhs : rhs;
}

// consume current character into current token and advance
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }

Expand Down Expand Up @@ -54,61 +64,63 @@ static bool is_exp_sentinel(char chr) {
}
}

static bool scan_int(TSLexer *lexer) {
static BoolOrErr scan_int(TSLexer *lexer) {
if (!iswdigit(lexer->lookahead)) {
return false;
return False;
}
// consume digits
while (iswdigit(lexer->lookahead)) {
advance(lexer); // store all digits
}

// handle line continuations
lexer->mark_end(lexer);
// Return an error if a line continuation is found. This scanner cannot
// handle line continuations, particularly in cases like:
//
// ```fortran
// b = 6& ! foo
// &7;
// ```
//
// Here, the scanner would need to return multiple tokens, but tree-sitter
// expects only a single token.
if (lexer->lookahead == '&') {
skip(lexer);
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
// second '&' required to continue the literal
if (lexer->lookahead == '&') {
skip(lexer);
// don't return here, as we may have finished literal on first
// line but still have second '&'
scan_int(lexer);
}
return Error;
}

lexer->mark_end(lexer);
return true;
return True;
}

/// Scan a number of the forms 1XXX, 1.0XXX, 0.1XXX, 1.XDX, etc.
static bool scan_number(TSLexer *lexer) {
static BoolOrErr scan_number(TSLexer *lexer) {
lexer->result_symbol = INTEGER_LITERAL;
bool digits = scan_int(lexer);
BoolOrErr digits = scan_int(lexer);
if (lexer->lookahead == '.') {
advance(lexer);
// exclude decimal if followed by any letter other than d/D and e/E
// if no leading digits are present and a non-digit follows
// the decimal it's a nonmatch.
if (digits && !iswalnum(lexer->lookahead)) {
if ((digits == True) && !iswalnum(lexer->lookahead)) {
lexer->mark_end(lexer); // add decimal to token
}
lexer->result_symbol = FLOAT_LITERAL;
}
// if next char isn't number return since we handle exp
// notation and precision identifiers separately. If there are
// no leading digit it's a nonmatch.
digits = scan_int(lexer) || digits;
if (digits) {
digits = bool_or_err_max(scan_int(lexer), digits);
if (digits == True) {
// process exp notation
if (is_exp_sentinel(lexer->lookahead)) {
advance(lexer);
if (lexer->lookahead == '+' || lexer->lookahead == '-') {
advance(lexer);
}
if (!scan_int(lexer)) {
return true; // valid number token with junk after it
switch (scan_int(lexer)) {
case False:
return True; // valid number token with junk after it
case True:
break;
case Error:
return Error;
}
lexer->mark_end(lexer);
lexer->result_symbol = FLOAT_LITERAL;
Expand Down Expand Up @@ -429,8 +441,13 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[INTEGER_LITERAL] || valid_symbols[FLOAT_LITERAL] ||
valid_symbols[BOZ_LITERAL]) {
// extract out root number from expression
if (scan_number(lexer)) {
return true;
switch (scan_number(lexer)) {
case False:
break;
case True:
return true;
case Error:
return false;
}
if (scan_boz(lexer)) {
return true;
Expand Down
25 changes: 13 additions & 12 deletions test/corpus/expressions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,8 @@ end program
(type_qualifier)
(init_declarator
(identifier)
(ERROR
(number_literal))
(number_literal)))
(end_of_statement)
(variable_declaration
Expand All @@ -1040,18 +1042,17 @@ end program
(init_declarator
(identifier)
(array_literal
(number_literal)
(number_literal))))
(end_of_statement)
(variable_declaration
(intrinsic_type)
(type_qualifier
(argument_list
(number_literal)))
(init_declarator
(identifier)
(array_literal
(number_literal)
(ERROR
(number_literal))
(identifier)
(call_expression
(identifier)
(argument_list
(number_literal)))
(ERROR
(ERROR
(number_literal))
(number_literal))
(number_literal))))
(end_of_statement)
(end_program_statement
Expand Down