gap-system · fingolfin · Jul 27, 2022 · Jul 27, 2022
diff --git a/doc/ref/floats.xml b/doc/ref/floats.xml
@@ -106,7 +106,7 @@ or floating-point numbers. Floating-point numbers may
 also be created, in any floating-point representation, using
 <Ref Constr="NewFloat"/> as in <C>NewFloat(IsIEEE754FloatRep,355/113)</C>,
 by supplying the category filter of the desired new floating-point number;
-or using <Ref Oper="MakeFloat"/> as in <C>NewFloat(1.0,355/113)</C>,
+or using <Ref Oper="MakeFloat"/> as in <C>MakeFloat(1.0,355/113)</C>,
 by supplying a sample floating-point number.
 <P/>
 

diff --git a/lib/float.gi b/lib/float.gi
@@ -109,9 +109,8 @@ BindGlobal("INSTALLFLOATCONSTRUCTORS", function(arg)
         filter := arg[1];
     fi;
 
-    InstallMethod(NewFloat, [filter,IsRat], -1, function(filter,obj)
-        return NewFloat(filter,NumeratorRat(obj))/NewFloat(filter,DenominatorRat(obj));
-    end);
+    # we intentionally provide no default method for converting rationals, as
+    # implementing this accurately depends a lot on the specific floatean type
 
     InstallMethod(NewFloat, [filter,IsInfinity], -1, function(filter,obj)
         return Inverse(NewFloat(filter,0));
@@ -137,10 +136,6 @@ BindGlobal("INSTALLFLOATCONSTRUCTORS", function(arg)
         return obj; # floats are immutable, no harm to return the same one
     end);
 
-    InstallMethod(MakeFloat, [filter,IsRat], -1, function(filter,obj)
-        return MakeFloat(filter,NumeratorRat(obj))/MakeFloat(filter,DenominatorRat(obj));
-    end);
-
     InstallMethod(MakeFloat, [filter,IsInfinity], -1, function(filter,obj)
         return Inverse(MakeFloat(filter,0));
     end);

diff --git a/lib/ieee754.g b/lib/ieee754.g
@@ -160,4 +160,28 @@ if IsHPCGAP then
     MakeReadOnlyObj( IEEE754FLOAT );
 fi;
 
+InstallMethod(NewFloat, [IsIEEE754FloatRep,IsRat], -1, function(filter,obj)
+    local num, den, extra, N;
+    num := NumeratorRat(obj);
+    den := DenominatorRat(obj);
+    extra := QuoInt(num, den);
+    num := RemInt(num, den);
+    N := Log2Int(den);
+    # Avoid overflows in the conversion of numerator and denominator: if they
+    # are too big, shift them down until they (barely) fit. This hardcodes
+    # assumptions about the precision of IsIEEE754FloatRep. It also does not
+    # try to minimize the numerical error of the computation, but it should be
+    # at least reasonably close overall.
+    if N >= 1023 then
+        num := QuoInt(num, 2^(N-1022));
+        den := QuoInt(den, 2^(N-1022));
+    fi;
+    return NewFloat(filter, extra) + NewFloat(filter, num) / NewFloat(filter, den);
+end);
+
+InstallMethod(MakeFloat, [IsIEEE754FloatRep,IsRat], -1, function(filter,obj)
+    return NewFloat(IsIEEE754FloatRep, obj);
+end);
+
+
 SetFloats(IEEE754FLOAT);
diff --git a/tst/testinstall/float.tst b/tst/testinstall/float.tst
@@ -1,4 +1,4 @@
-#@local neginf,posinf,r,nan,l,f,g
+#@local neginf,posinf,r,nan,l,f,g,a,b,e1,e2
 gap> START_TEST("float.tst");
 
 # make sure we are testing the built-in machine floats
@@ -32,6 +32,44 @@ inf
 gap> Float(-infinity);
 -inf
 
+#
+# Test converting rationals to machine floats
+#
+gap> f:=function(r, expected)
+>   local testExp;
+>   testExp:=function(actual, expected)
+>     if (actual - expected) > FLOAT.EPSILON then
+>       Error("expected ", expected, " but got ", actual);
+>     fi;
+>   end;
+>   testExp(Float(r), expected);
+>   testExp(Float(-r), -expected);
+>   testExp(NewFloat(IsIEEE754FloatRep, r), expected);
+>   testExp(NewFloat(IsIEEE754FloatRep, -r), -expected);
+>   testExp(MakeFloat(0.0, r), expected);
+>   testExp(MakeFloat(0.0, -r), -expected);
+> end;;
+gap> for a in [-1..1] do
+>      for b in [-1..1] do
+>        f( (10^309 + a) / (10^308 + b), 10.);
+>      od;
+>    od;
+gap> for a in [-1..1] do
+>      for b in [-1..1] do
+>        f( (10^309 + a) / (10^309 + b), 1.);
+>      od;
+>    od;
+gap> for e1 in [306..309] do for e2 in [306..309] do
+>      for a in [-1..1] do for b in [-1..1] do
+>        f( (10^e1 + a) / (10^e2 + b), 10.^(e1-e2));
+>      od; od;
+>    od; od;
+gap> for e1 in [1020..1025] do for e2 in [1020..1025] do
+>      for a in [-1..1] do for b in [-1..1] do
+>        f( (2^e1 + a) / (2^e2 + b), 2.^(e1-e2));
+>      od; od;
+>    od; od;
+
 #
 # input floats directly
 #