From 6dc3d5db755f186745fbee4a01982617b64251fe Mon Sep 17 00:00:00 2001 From: Max Horn Date: Fri, 22 May 2020 17:43:25 +0200 Subject: [PATCH] Remove support for float literals starting with a period Before this patch, writing .0 instead of 0.0 was allowed, as in C, C++, D, Java, Go, Perl, Python and many other languages. This required a hack in the form of an additional entry pointer for our scanner. We now get rid of it, matching languages like Ada, Haskell, OCaml, Rust, Swift. --- src/read.c | 7 -- src/scanner.c | 123 +++++++++++---------------- src/scanner.h | 13 --- tst/testbugfix/2013-08-21-t00295.tst | 5 +- tst/testinstall/float.tst | 2 +- tst/testinstall/longnumber.tst | 25 +++--- 6 files changed, 62 insertions(+), 113 deletions(-) diff --git a/src/read.c b/src/read.c index a85ffe0c59..13b74f524d 100644 --- a/src/read.c +++ b/src/read.c @@ -1495,13 +1495,6 @@ static void ReadFuncExprAbbrevSingle(ReaderState * rs, TypSymbolSet follow) */ static void ReadLiteral(ReaderState * rs, TypSymbolSet follow, Char mode) { - if (rs->s.Symbol == S_DOT) { - // HACK: The only way a dot could turn up here is in a floating point - // literal that starts with '.'. Call back to the scanner to deal - // with this. - ScanForFloatAfterDotHACK(&rs->s); - } - switch (rs->s.Symbol) { /* */ diff --git a/src/scanner.c b/src/scanner.c index 6292ae2188..c0fed54ebf 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -366,10 +366,6 @@ static UInt GetIdent(ScannerState * s, Int i, Char c) ** ** When 's->Value' is completely filled, then a GAP string object is ** created in 's->ValueObj' and all data is stored there. -** -** The argument is used to signal if a decimal point was already read, -** or whether we are starting from scratch.. -** */ static UInt AddCharToBuf(Obj * string, Char * buf, UInt bufsize, UInt pos, Char c) { @@ -386,89 +382,79 @@ static UInt AddCharToValue(ScannerState * s, UInt pos, Char c) return AddCharToBuf(&s->ValueObj, s->Value, MAX_VALUE_LEN - 1, pos, c); } -static UInt GetNumber(ScannerState * s, Int readDecimalPoint, Char c) +static UInt GetNumber(ScannerState * s, Char c) { UInt symbol = S_ILLEGAL; UInt i = 0; - BOOL seenADigit = FALSE; s->ValueObj = 0; - if (readDecimalPoint) { - s->Value[i++] = '.'; + GAP_ASSERT(IsDigit(c)); + + // read initial sequence of digits into 'Value' + while (IsDigit(c)) { + i = AddCharToValue(s, i, c); + c = GET_NEXT_CHAR(); } - else { - // read initial sequence of digits into 'Value' - while (IsDigit(c)) { - i = AddCharToValue(s, i, c); - seenADigit = TRUE; - c = GET_NEXT_CHAR(); - } - // maybe we saw an identifier character and realised that this is an - // identifier we are reading - if (IsIdent(c) || c == '\\') { - // if necessary, copy back from s->ValueObj to s->Value - if (s->ValueObj) { - i = GET_LEN_STRING(s->ValueObj); - GAP_ASSERT(i >= MAX_VALUE_LEN - 1); - memcpy(s->Value, CONST_CSTR_STRING(s->ValueObj), - MAX_VALUE_LEN); - s->ValueObj = 0; - } - // this looks like an identifier, scan the rest of it - return GetIdent(s, i, c); + // maybe we saw an identifier character and realised that this is an + // identifier we are reading + if (IsIdent(c) || c == '\\') { + // if necessary, copy back from s->ValueObj to s->Value + if (s->ValueObj) { + i = GET_LEN_STRING(s->ValueObj); + GAP_ASSERT(i >= MAX_VALUE_LEN - 1); + memcpy(s->Value, CONST_CSTR_STRING(s->ValueObj), + MAX_VALUE_LEN); + s->ValueObj = 0; } + // this looks like an identifier, scan the rest of it + return GetIdent(s, i, c); + } - // Or maybe we saw a '.' which could indicate one of three things: - // - a float literal: 12.345 - // - S_DOT, i.e., '.' used to access a record entry: r.12.345 - // - S_DDOT, i.e., '..' in a range expression: [12..345] - if (c == '.') { - GAP_ASSERT(i < MAX_VALUE_LEN - 1); - - // If the symbol before this integer was S_DOT then we must be in - // a nested record element expression, so don't look for a float. - // This is a bit fragile - if (s->Symbol == S_DOT || s->Symbol == S_BDOT) { - symbol = S_INT; - goto finish; - } - - // peek ahead to decide if we are looking at a range expression - if (PEEK_NEXT_CHAR(s->input) == '.') { - // we are looking at '..' and are probably inside a range - // expression - symbol = S_INT; - goto finish; - } + // Or maybe we saw a '.' which could indicate one of three things: + // - a float literal: 12.345 + // - S_DOT, i.e., '.' used to access a record entry: r.12.345 + // - S_DDOT, i.e., '..' in a range expression: [12..345] + if (c == '.') { + GAP_ASSERT(i < MAX_VALUE_LEN - 1); - // Now the '.' must be part of our number; store it and move on - i = AddCharToValue(s, i, '.'); - c = GET_NEXT_CHAR(); + // If the symbol before this integer was S_DOT then we must be in + // a nested record element expression, so don't look for a float. + // This is a bit fragile + if (s->Symbol == S_DOT || s->Symbol == S_BDOT) { + symbol = S_INT; + goto finish; } - else { - // Anything else we see tells us that the token is done + + // peek ahead to decide if we are looking at a range expression + if (PEEK_NEXT_CHAR(s->input) == '.') { + // we are looking at '..' and are probably inside a range + // expression symbol = S_INT; goto finish; } + + // Now the '.' must be part of our number; store it and move on + i = AddCharToValue(s, i, '.'); + c = GET_NEXT_CHAR(); + } + else { + // Anything else we see tells us that the token is done + symbol = S_INT; + goto finish; } - // When we get here we have read possibly some digits, a . and possibly + // When we get here we have read some digits and a dot and possibly // some more digits, but not an e,E,d,D,q or Q // In any case, from now on, we know we are dealing with a float literal symbol = S_FLOAT; - // read digits + // read digits after dot while (IsDigit(c)) { i = AddCharToValue(s, i, c); - seenADigit = TRUE; c = GET_NEXT_CHAR(); } - if (!seenADigit) - SyntaxError(s, - "Badly formed number: need a digit before or after the " - "decimal point"); if (c == '\\') SyntaxError(s, "Badly formed number"); @@ -529,17 +515,6 @@ static UInt GetNumber(ScannerState * s, Int readDecimalPoint, Char c) } -/**************************************************************************** -** -*F ScanForFloatAfterDotHACK() -** -*/ -void ScanForFloatAfterDotHACK(ScannerState * s) -{ - s->Symbol = GetNumber(s, 1, PEEK_CURR_CHAR(s->input)); -} - - /**************************************************************************** ** *F GetOctalDigits() @@ -970,7 +945,7 @@ static UInt NextSymbol(ScannerState * s) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return GetNumber(s, 0, c); + return GetNumber(s, c); case '\377': symbol = S_EOF; FlushRestOfInputLine(s->input); break; diff --git a/src/scanner.h b/src/scanner.h index a305931d3a..5ebfca39a4 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -342,17 +342,4 @@ void Match(ScannerState * s, TypSymbolSet skipto); -/**************************************************************************** -** -*F ScanForFloatAfterDotHACK() -** -** This function is called by 'ReadLiteral' if it encounters a single dot in -** form the of the symbol 'S_DOT'. The only legal way this could happen is -** if the dot is the start of a float literal like '.123'. As the scanner -** cannot detect this without being context aware, we must provide this -** function to allow the reader to signal to the scanner about this. -*/ -void ScanForFloatAfterDotHACK(ScannerState * s); - - #endif // GAP_SCANNER_H diff --git a/tst/testbugfix/2013-08-21-t00295.tst b/tst/testbugfix/2013-08-21-t00295.tst index 27cf6494b8..55b1973c4d 100644 --- a/tst/testbugfix/2013-08-21-t00295.tst +++ b/tst/testbugfix/2013-08-21-t00295.tst @@ -1,7 +1,6 @@ # 2013/08/21 (MH) gap> . . . . -Syntax error: Badly formed number: need a digit before or after the decimal po\ -int in stream:1 +Syntax error: literal expected in stream:1 . . . . ^ -Syntax error: Record component name expected in stream:2 +Syntax error: ; expected in stream:2 diff --git a/tst/testinstall/float.tst b/tst/testinstall/float.tst index 81166648aa..decd7f265f 100644 --- a/tst/testinstall/float.tst +++ b/tst/testinstall/float.tst @@ -775,7 +775,7 @@ gap> ComplexConjugate(1.3); # gap> Display(1.3); 1.3 -gap> Display(-.4e6); +gap> Display(-0.4e6); -400000. gap> PrintObj(1.3); Print("Q\n"); 1.3Q diff --git a/tst/testinstall/longnumber.tst b/tst/testinstall/longnumber.tst index df3ca6d849..b0bee07b42 100644 --- a/tst/testinstall/longnumber.tst +++ b/tst/testinstall/longnumber.tst @@ -152,8 +152,6 @@ gap> 1.; 1. gap> 0.; 0. -gap> .1; -0.1 gap> 0.1; 0.1 gap> 1111111111111111111111111111111111111.1; @@ -161,31 +159,28 @@ gap> 1111111111111111111111111111111111111.1; gap> 1.11111111111111111111111111111111111111; 1.11111 gap> .; -Syntax error: Badly formed number: need a digit before or after the decimal po\ -int in stream:1 +Syntax error: literal expected in stream:1 .; ^ gap> .n; -Syntax error: Badly formed number: need a digit before or after the decimal po\ -int in stream:1 +Syntax error: literal expected in stream:1 .n; ^ gap> .q; -Syntax error: Badly formed number: need a digit before or after the decimal po\ -int in stream:1 +Syntax error: literal expected in stream:1 .q; ^ gap> .0n; -Error, failed to convert float literal +Syntax error: literal expected in stream:1 +.0n; +^ gap> .0q; -Syntax error: Badly formed number: need at least one digit in the exponent in \ -stream:1 +Syntax error: literal expected in stream:1 .0q; -^^^ +^ gap> .0qn; -Syntax error: Badly formed number: need at least one digit in the exponent in \ -stream:1 +Syntax error: literal expected in stream:1 .0qn; -^^^ +^ gap> Unbind(x); gap> STOP_TEST( "longnumber.tst", 1);