(parser) Preserve exact floating point representation (#412)

The idea is to avoid losing the exact floating point representation as given by the user when constructing the parsed syntax tree. This will be useful in the compilation part (#409), where we want to avoid losing precision because of round-tripping to `double` and back to `string`.
perlang-org · Oct 17, 2023 · 28f6dae · 28f6dae
1 parent 69367d3
commit 28f6dae
Show file tree

Hide file tree

Showing 7 changed files with 91 additions and 60 deletions.
diff --git a/release-notes/v0.4.0.md b/release-notes/v0.4.0.md
@@ -6,6 +6,7 @@
 ### Added
 #### Experimental compilation
 - Add C++-based stdlib project [[#407][407]]
+- Preserve exact floating point representation [[#412][412]]
 
 ### Changed
 #### Data types
@@ -37,3 +38,4 @@
 [389]: https://github.com/perlang-org/perlang/pull/389
 [407]: https://github.com/perlang-org/perlang/pull/407
 [410]: https://github.com/perlang-org/perlang/pull/410
+[412]: https://github.com/perlang-org/perlang/pull/412
diff --git a/src/Perlang.Parser/FloatingPointLiteral.cs b/src/Perlang.Parser/FloatingPointLiteral.cs
@@ -3,21 +3,21 @@
 
 namespace Perlang.Parser;
 
-internal readonly struct FloatingPointLiteral<T> : INumericLiteral
+internal readonly struct FloatingPointLiteral<T> : IFloatingPointLiteral, INumericLiteral
     where T : notnull
 {
-    internal T Value { get; }
+    public object Value { get; }
+    public string NumberCharacters { get; }
 
     /// <inheritdoc cref="INumericLiteral.BitsUsed"/>
     public long BitsUsed { get; }
 
     public bool IsPositive { get; }
 
-    object INumericLiteral.Value => Value;
-
-    public FloatingPointLiteral(T value)
+    public FloatingPointLiteral(T value, string numberCharacters)
     {
         Value = value;
+        NumberCharacters = numberCharacters;
 
         BitsUsed = value switch
         {

diff --git a/src/Perlang.Parser/IFloatingPointLiteral.cs b/src/Perlang.Parser/IFloatingPointLiteral.cs
@@ -0,0 +1,12 @@
+#nullable  enable
+namespace Perlang.Parser;
+
+public interface IFloatingPointLiteral
+{
+    /// <summary>
+    /// Gets a string representation of this floating point literal. This is to ensure we avoid precision loss while
+    /// carrying the value over to the compiler, since `float`/`double` `ToString()` and back are not necessarily
+    /// round-trip safe.
+    /// </summary>
+    public string NumberCharacters { get; }
+}
diff --git a/src/Perlang.Parser/NumberParser.cs b/src/Perlang.Parser/NumberParser.cs
@@ -1,9 +1,9 @@
+#nullable enable
 using System;
 using System.Globalization;
 using System.Numerics;
 using Perlang.Internal.Extensions;
 
-#nullable enable
 namespace Perlang.Parser;
 
 internal static class NumberParser
@@ -22,16 +22,21 @@ public static INumericLiteral Parse(NumericToken numericToken)
                     {
                         // The explicit IFormatProvider is required to ensure we use 123.45 format, regardless of host OS
                         // language/region settings. See #263 for more details.
+                        //
+                        // An interesting detail: we preserve the `numberCharacters` here (the unparsed floating point
+                        // value), since we might otherwise loose valuable precision. Round-tripping via ToString() will
+                        // otherwise risk loosing precision. This is particularly important to get proper semantics for
+                        // `double + float` operations in compiled mode.
                         float value = Single.Parse(numberCharacters, CultureInfo.InvariantCulture);
-                        return new FloatingPointLiteral<float>(value);
+                        return new FloatingPointLiteral<float>(value, numberCharacters);
                     }
 
                     case 'd':
                     {
                         // The explicit IFormatProvider is required to ensure we use 123.45 format, regardless of host OS
                         // language/region settings. See #263 for more details.
                         double value = Double.Parse(numberCharacters, CultureInfo.InvariantCulture);
-                        return new FloatingPointLiteral<double>(value);
+                        return new FloatingPointLiteral<double>(value, numberCharacters);
                     }
 
                     default:
@@ -40,9 +45,9 @@ public static INumericLiteral Parse(NumericToken numericToken)
             }
             else
             {
-                // No suffix provided => use `double` precision by default, just like C#
+                // No suffix provided => use `double` precision by default, just like C++, Java and C#.
                 double value = Double.Parse(numberCharacters, CultureInfo.InvariantCulture);
-                return new FloatingPointLiteral<double>(value);
+                return new FloatingPointLiteral<double>(value, numberCharacters);
             }
         }
         else
@@ -104,72 +109,72 @@ public static INumericLiteral Parse(NumericToken numericToken)
         }
     }
 
-    public static object MakeNegative(object value)
+    public static object MakeNegative(INumericLiteral numericLiteral)
     {
-        if (value is INumericLiteral numericLiteral)
+        if (numericLiteral is IFloatingPointLiteral floatingPointLiteral)
         {
             if (numericLiteral.Value is float floatValue)
             {
-                return new FloatingPointLiteral<float>(-floatValue);
+                return new FloatingPointLiteral<float>(-floatValue, "-" + floatingPointLiteral.NumberCharacters);
             }
             else if (numericLiteral.Value is double doubleValue)
             {
-                return new FloatingPointLiteral<double>(-doubleValue);
+                return new FloatingPointLiteral<double>(-doubleValue, "-" + floatingPointLiteral.NumberCharacters);
             }
-            else if (numericLiteral.Value is int intValue)
+            else
             {
-                return new IntegerLiteral<int>(-intValue);
+                throw new ArgumentException($"Type {numericLiteral.Value.GetType().ToTypeKeyword()} not supported");
             }
-            else if (numericLiteral.Value is uint uintValue)
-            {
-                long negativeValue = -uintValue;
+        }
+        else if (numericLiteral.Value is int intValue)
+        {
+            return new IntegerLiteral<int>(-intValue);
+        }
+        else if (numericLiteral.Value is uint uintValue)
+        {
+            long negativeValue = -uintValue;
 
-                // This is a special hack to ensure that the value -2147483648 gets returned as an `int` and not a `long`.
-                // Some details available in #302, summarized here in brief:
-                //
-                // The value 2147483648 is too large for an `int` => gets parsed into a `ulong` where it will fit. Once it
-                // has been made negative, the value -2147483648 is again small enough to fit in an `int` => the code below
-                // will narrow it down to comply with the "smallest type possible" design principle.
-                //
-                // Rationale: Two's complement: https://en.wikipedia.org/wiki/Two%27s_complement
-                if (negativeValue >= Int32.MinValue)
-                {
-                    return new IntegerLiteral<int>((int)negativeValue);
-                }
-                else
-                {
-                    return new IntegerLiteral<long>(negativeValue);
-                }
-            }
-            else if (numericLiteral.Value is long longValue)
+            // This is a special hack to ensure that the value -2147483648 gets returned as an `int` and not a `long`.
+            // Some details available in #302, summarized here in brief:
+            //
+            // The value 2147483648 is too large for an `int` => gets parsed into a `ulong` where it will fit. Once it
+            // has been made negative, the value -2147483648 is again small enough to fit in an `int` => the code below
+            // will narrow it down to comply with the "smallest type possible" design principle.
+            //
+            // Rationale: Two's complement: https://en.wikipedia.org/wiki/Two%27s_complement
+            if (negativeValue >= Int32.MinValue)
             {
-                return new IntegerLiteral<long>(-longValue);
+                return new IntegerLiteral<int>((int)negativeValue);
             }
-            else if (numericLiteral.Value is ulong ulongValue)
+            else
             {
-                // Again, this needs to be handled specially to ensure that numbers that fit in a `long` doesn't use
-                // BigInteger unnecessarily.
-                BigInteger negativeValue = -new BigInteger(ulongValue);
+                return new IntegerLiteral<long>(negativeValue);
+            }
+        }
+        else if (numericLiteral.Value is long longValue)
+        {
+            return new IntegerLiteral<long>(-longValue);
+        }
+        else if (numericLiteral.Value is ulong ulongValue)
+        {
+            // Again, this needs to be handled specially to ensure that numbers that fit in a `long` doesn't use
+            // BigInteger unnecessarily.
+            BigInteger negativeValue = -new BigInteger(ulongValue);
 
-                if (negativeValue >= Int64.MinValue)
-                {
-                    return new IntegerLiteral<long>((long)negativeValue);
-                }
-                else
-                {
-                    // All negative numbers that are too big to fit in any of the smaller signed integer types will go
-                    // through this code path.
-                    return new IntegerLiteral<BigInteger>(negativeValue);
-                }
+            if (negativeValue >= Int64.MinValue)
+            {
+                return new IntegerLiteral<long>((long)negativeValue);
             }
             else
             {
-                throw new ArgumentException($"Type {numericLiteral.Value.GetType().ToTypeKeyword()} not supported");
+                // All negative numbers that are too big to fit in any of the smaller signed integer types will go
+                // through this code path.
+                return new IntegerLiteral<BigInteger>(negativeValue);
             }
         }
         else
         {
-            throw new ArgumentException($"Type {value.GetType().ToTypeKeyword()} not supported");
+            throw new ArgumentException($"Type {numericLiteral.Value.GetType().ToTypeKeyword()} not supported");
         }
     }
 }
diff --git a/src/Perlang.Parser/PerlangParser.cs b/src/Perlang.Parser/PerlangParser.cs
@@ -10,6 +10,7 @@
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
+using Perlang.Internal.Extensions;
 using static Perlang.Internal.Utils;
 using static Perlang.TokenType;
 
@@ -570,7 +571,20 @@ private Expr UnaryPrefix()
                 // changed.
                 if (@operator.Type == MINUS && right is Expr.Literal rightLiteral)
                 {
-                    return new Expr.Literal(NumberParser.MakeNegative(rightLiteral.Value!));
+                    if (rightLiteral.Value is INumericLiteral numericLiteral)
+                    {
+                        return new Expr.Literal(NumberParser.MakeNegative(numericLiteral));
+                    }
+                    else if (rightLiteral.Value is null)
+                    {
+                        Error(Peek(), "Unary minus operator does not support null operand");
+                        return new Expr.Literal(null);
+                    }
+                    else
+                    {
+                        // TODO: Call Error() here to produce a context-aware error instead of just throwing a raw exception
+                        throw new ArgumentException($"Type {rightLiteral.Value.GetType().ToTypeKeyword()} not supported");
+                    }
                 }
                 else
                 {

diff --git a/src/Perlang.Stdlib/Internal/Utils.cs b/src/Perlang.Stdlib/Internal/Utils.cs
@@ -1,6 +1,5 @@
 using System;
 using System.Globalization;
-using System.Numerics;
 using Perlang.Internal.Extensions;
 using Perlang.Lang;
 using String = Perlang.Lang.String;
@@ -12,15 +11,15 @@ namespace Perlang.Internal
     /// </summary>
     public static class Utils
     {
-        private static readonly Lang.String NullString = AsciiString.from("null");
+        private static readonly String NullString = AsciiString.from("null");
 
-        public static Lang.String Stringify(object @object)
+        public static String Stringify(object @object)
         {
             if (@object == null)
             {
                 return NullString;
             }
-            else if (@object is Lang.String nativeString)
+            else if (@object is String nativeString)
             {
                 return nativeString;
             }

diff --git a/src/Perlang.Tests.Integration/Operator/Binary/Comparison.cs b/src/Perlang.Tests.Integration/Operator/Binary/Comparison.cs
@@ -1,4 +1,3 @@
-using System;
 using System.Collections.Generic;
 using System.Globalization;
 using System.Linq;