From 6dc3d5db755f186745fbee4a01982617b64251fe Mon Sep 17 00:00:00 2001
From: Max Horn <max@quendi.de>
Date: Fri, 22 May 2020 17:43:25 +0200
Subject: [PATCH] Remove support for float literals starting with a period

Before this patch, writing .0 instead of 0.0 was allowed, as in C, C++, D,
Java, Go, Perl, Python and many other languages. This required a hack in the
form of an additional entry pointer for our scanner. We now get rid of it,
matching languages like Ada, Haskell, OCaml, Rust, Swift.
---
 src/read.c                           |   7 --
 src/scanner.c                        | 123 +++++++++++----------------
 src/scanner.h                        |  13 ---
 tst/testbugfix/2013-08-21-t00295.tst |   5 +-
 tst/testinstall/float.tst            |   2 +-
 tst/testinstall/longnumber.tst       |  25 +++---
 6 files changed, 62 insertions(+), 113 deletions(-)
diff --git a/src/read.c b/src/read.c
index a85ffe0c59..13b74f524d 100644
--- a/src/read.c
+++ b/src/read.c
@@ -1495,13 +1495,6 @@ static void ReadFuncExprAbbrevSingle(ReaderState * rs, TypSymbolSet follow)
 */
 static void ReadLiteral(ReaderState * rs, TypSymbolSet follow, Char mode)
 {
-    if (rs->s.Symbol == S_DOT) {
-        // HACK: The only way a dot could turn up here is in a floating point
-        // literal that starts with '.'. Call back to the scanner to deal
-        // with this.
-        ScanForFloatAfterDotHACK(&rs->s);
-    }
-
     switch (rs->s.Symbol) {
 
     /* <Int>                                                               */
diff --git a/src/scanner.c b/src/scanner.c
index 6292ae2188..c0fed54ebf 100644
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -366,10 +366,6 @@ static UInt GetIdent(ScannerState * s, Int i, Char c)
 **
 **  When 's->Value' is completely filled, then a GAP string object is
 **  created in 's->ValueObj' and all data is stored there.
-**
-**  The argument is used to signal if a decimal point was already read,
-**  or whether we are starting from scratch..
-**
 */
 static UInt AddCharToBuf(Obj * string, Char * buf, UInt bufsize, UInt pos, Char c)
 {
@@ -386,89 +382,79 @@ static UInt AddCharToValue(ScannerState * s, UInt pos, Char c)
     return AddCharToBuf(&s->ValueObj, s->Value, MAX_VALUE_LEN - 1, pos, c);
 }
 
-static UInt GetNumber(ScannerState * s, Int readDecimalPoint, Char c)
+static UInt GetNumber(ScannerState * s, Char c)
 {
     UInt symbol = S_ILLEGAL;
     UInt i = 0;
-    BOOL seenADigit = FALSE;
 
     s->ValueObj = 0;
 
-    if (readDecimalPoint) {
-        s->Value[i++] = '.';
+    GAP_ASSERT(IsDigit(c));
+
+    // read initial sequence of digits into 'Value'
+    while (IsDigit(c)) {
+        i = AddCharToValue(s, i, c);
+        c = GET_NEXT_CHAR();
     }
-    else {
-        // read initial sequence of digits into 'Value'
-        while (IsDigit(c)) {
-            i = AddCharToValue(s, i, c);
-            seenADigit = TRUE;
-            c = GET_NEXT_CHAR();
-        }
 
-        // maybe we saw an identifier character and realised that this is an
-        // identifier we are reading
-        if (IsIdent(c) || c == '\\') {
-            // if necessary, copy back from s->ValueObj to s->Value
-            if (s->ValueObj) {
-                i = GET_LEN_STRING(s->ValueObj);
-                GAP_ASSERT(i >= MAX_VALUE_LEN - 1);
-                memcpy(s->Value, CONST_CSTR_STRING(s->ValueObj),
-                       MAX_VALUE_LEN);
-                s->ValueObj = 0;
-            }
-            // this looks like an identifier, scan the rest of it
-            return GetIdent(s, i, c);
+    // maybe we saw an identifier character and realised that this is an
+    // identifier we are reading
+    if (IsIdent(c) || c == '\\') {
+        // if necessary, copy back from s->ValueObj to s->Value
+        if (s->ValueObj) {
+            i = GET_LEN_STRING(s->ValueObj);
+            GAP_ASSERT(i >= MAX_VALUE_LEN - 1);
+            memcpy(s->Value, CONST_CSTR_STRING(s->ValueObj),
+                   MAX_VALUE_LEN);
+            s->ValueObj = 0;
         }
+        // this looks like an identifier, scan the rest of it
+        return GetIdent(s, i, c);
+    }
 
-        // Or maybe we saw a '.' which could indicate one of three things:
-        // - a float literal: 12.345
-        // - S_DOT, i.e., '.' used to access a record entry: r.12.345
-        // - S_DDOT, i.e., '..' in a range expression:  [12..345]
-        if (c == '.') {
-            GAP_ASSERT(i < MAX_VALUE_LEN - 1);
-
-            // If the symbol before this integer was S_DOT then we must be in
-            // a nested record element expression, so don't look for a float.
-            // This is a bit fragile
-            if (s->Symbol == S_DOT || s->Symbol == S_BDOT) {
-                symbol = S_INT;
-                goto finish;
-            }
-
-            // peek ahead to decide if we are looking at a range expression
-            if (PEEK_NEXT_CHAR(s->input) == '.') {
-                // we are looking at '..' and are probably inside a range
-                // expression
-                symbol = S_INT;
-                goto finish;
-            }
+    // Or maybe we saw a '.' which could indicate one of three things:
+    // - a float literal: 12.345
+    // - S_DOT, i.e., '.' used to access a record entry: r.12.345
+    // - S_DDOT, i.e., '..' in a range expression:  [12..345]
+    if (c == '.') {
+        GAP_ASSERT(i < MAX_VALUE_LEN - 1);
 
-            // Now the '.' must be part of our number; store it and move on
-            i = AddCharToValue(s, i, '.');
-            c = GET_NEXT_CHAR();
+        // If the symbol before this integer was S_DOT then we must be in
+        // a nested record element expression, so don't look for a float.
+        // This is a bit fragile
+        if (s->Symbol == S_DOT || s->Symbol == S_BDOT) {
+            symbol = S_INT;
+            goto finish;
         }
-        else {
-            // Anything else we see tells us that the token is done
+
+        // peek ahead to decide if we are looking at a range expression
+        if (PEEK_NEXT_CHAR(s->input) == '.') {
+            // we are looking at '..' and are probably inside a range
+            // expression
             symbol = S_INT;
             goto finish;
         }
+
+        // Now the '.' must be part of our number; store it and move on
+        i = AddCharToValue(s, i, '.');
+        c = GET_NEXT_CHAR();
+    }
+    else {
+        // Anything else we see tells us that the token is done
+        symbol = S_INT;
+        goto finish;
     }
 
-    // When we get here we have read possibly some digits, a . and possibly
+    // When we get here we have read some digits and a dot and possibly
     // some more digits, but not an e,E,d,D,q or Q
     // In any case, from now on, we know we are dealing with a float literal
     symbol = S_FLOAT;
 
-    // read digits
+    // read digits after dot
     while (IsDigit(c)) {
         i = AddCharToValue(s, i, c);
-        seenADigit = TRUE;
         c = GET_NEXT_CHAR();
     }
-    if (!seenADigit)
-        SyntaxError(s,
-                    "Badly formed number: need a digit before or after the "
-                    "decimal point");
     if (c == '\\')
         SyntaxError(s, "Badly formed number");
 
@@ -529,17 +515,6 @@ static UInt GetNumber(ScannerState * s, Int readDecimalPoint, Char c)
 }
 
 
-/****************************************************************************
-**
-*F  ScanForFloatAfterDotHACK()
-**
-*/
-void ScanForFloatAfterDotHACK(ScannerState * s)
-{
-    s->Symbol = GetNumber(s, 1, PEEK_CURR_CHAR(s->input));
-}
-
-
 /****************************************************************************
 **
 *F  GetOctalDigits()
@@ -970,7 +945,7 @@ static UInt NextSymbol(ScannerState * s)
 
     case '0': case '1': case '2': case '3': case '4':
     case '5': case '6': case '7': case '8': case '9':
-                      return GetNumber(s, 0, c);
+                      return GetNumber(s, c);
 
     case '\377':      symbol = S_EOF;  FlushRestOfInputLine(s->input); break;
 
diff --git a/src/scanner.h b/src/scanner.h
index a305931d3a..5ebfca39a4 100644
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -342,17 +342,4 @@ void Match(ScannerState * s,
            TypSymbolSet   skipto);
 
 
-/****************************************************************************
-**
-*F  ScanForFloatAfterDotHACK()
-**
-**  This function is called by 'ReadLiteral' if it encounters a single dot in
-**  form the of the symbol 'S_DOT'. The only legal way this could happen is
-**  if the dot is the start of a float literal like '.123'. As the scanner
-**  cannot detect this without being context aware, we must provide this
-**  function to allow the reader to signal to the scanner about this.
-*/
-void ScanForFloatAfterDotHACK(ScannerState * s);
-
-
 #endif // GAP_SCANNER_H
diff --git a/tst/testbugfix/2013-08-21-t00295.tst b/tst/testbugfix/2013-08-21-t00295.tst
index 27cf6494b8..55b1973c4d 100644
--- a/tst/testbugfix/2013-08-21-t00295.tst
+++ b/tst/testbugfix/2013-08-21-t00295.tst
@@ -1,7 +1,6 @@
 # 2013/08/21 (MH)
 gap> . . . .
-Syntax error: Badly formed number: need a digit before or after the decimal po\
-int in stream:1
+Syntax error: literal expected in stream:1
 . . . .
 ^
-Syntax error: Record component name expected in stream:2
+Syntax error: ; expected in stream:2
diff --git a/tst/testinstall/float.tst b/tst/testinstall/float.tst
index 81166648aa..decd7f265f 100644
--- a/tst/testinstall/float.tst
+++ b/tst/testinstall/float.tst
@@ -775,7 +775,7 @@ gap> ComplexConjugate(1.3);
 #
 gap> Display(1.3);
 1.3
-gap> Display(-.4e6);
+gap> Display(-0.4e6);
 -400000.
 gap> PrintObj(1.3); Print("Q\n");
 1.3Q
diff --git a/tst/testinstall/longnumber.tst b/tst/testinstall/longnumber.tst
index df3ca6d849..b0bee07b42 100644
--- a/tst/testinstall/longnumber.tst
+++ b/tst/testinstall/longnumber.tst
@@ -152,8 +152,6 @@ gap> 1.;
 1.
 gap> 0.;
 0.
-gap> .1;
-0.1
 gap> 0.1;
 0.1
 gap> 1111111111111111111111111111111111111.1;
@@ -161,31 +159,28 @@ gap> 1111111111111111111111111111111111111.1;
 gap> 1.11111111111111111111111111111111111111;
 1.11111
 gap> .;
-Syntax error: Badly formed number: need a digit before or after the decimal po\
-int in stream:1
+Syntax error: literal expected in stream:1
 .;
 ^
 gap> .n;
-Syntax error: Badly formed number: need a digit before or after the decimal po\
-int in stream:1
+Syntax error: literal expected in stream:1
 .n;
 ^
 gap> .q;
-Syntax error: Badly formed number: need a digit before or after the decimal po\
-int in stream:1
+Syntax error: literal expected in stream:1
 .q;
 ^
 gap> .0n;
-Error, failed to convert float literal
+Syntax error: literal expected in stream:1
+.0n;
+^
 gap> .0q;
-Syntax error: Badly formed number: need at least one digit in the exponent in \
-stream:1
+Syntax error: literal expected in stream:1
 .0q;
-^^^
+^
 gap> .0qn;
-Syntax error: Badly formed number: need at least one digit in the exponent in \
-stream:1
+Syntax error: literal expected in stream:1
 .0qn;
-^^^
+^
 gap> Unbind(x);
 gap> STOP_TEST( "longnumber.tst", 1);