Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid overflows when converting rationals with large denominator and/or numerator to floating point #4144

Merged
merged 1 commit into from
Jul 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/ref/floats.xml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ or floating-point numbers. Floating-point numbers may
also be created, in any floating-point representation, using
<Ref Constr="NewFloat"/> as in <C>NewFloat(IsIEEE754FloatRep,355/113)</C>,
by supplying the category filter of the desired new floating-point number;
or using <Ref Oper="MakeFloat"/> as in <C>NewFloat(1.0,355/113)</C>,
or using <Ref Oper="MakeFloat"/> as in <C>MakeFloat(1.0,355/113)</C>,
by supplying a sample floating-point number.
<P/>

Expand Down
9 changes: 2 additions & 7 deletions lib/float.gi
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,8 @@ BindGlobal("INSTALLFLOATCONSTRUCTORS", function(arg)
filter := arg[1];
fi;

InstallMethod(NewFloat, [filter,IsRat], -1, function(filter,obj)
return NewFloat(filter,NumeratorRat(obj))/NewFloat(filter,DenominatorRat(obj));
end);
# we intentionally provide no default method for converting rationals, as
# implementing this accurately depends a lot on the specific floatean type

InstallMethod(NewFloat, [filter,IsInfinity], -1, function(filter,obj)
return Inverse(NewFloat(filter,0));
Expand All @@ -137,10 +136,6 @@ BindGlobal("INSTALLFLOATCONSTRUCTORS", function(arg)
return obj; # floats are immutable, no harm to return the same one
end);

InstallMethod(MakeFloat, [filter,IsRat], -1, function(filter,obj)
return MakeFloat(filter,NumeratorRat(obj))/MakeFloat(filter,DenominatorRat(obj));
end);

InstallMethod(MakeFloat, [filter,IsInfinity], -1, function(filter,obj)
return Inverse(MakeFloat(filter,0));
end);
Expand Down
24 changes: 24 additions & 0 deletions lib/ieee754.g
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,28 @@ if IsHPCGAP then
MakeReadOnlyObj( IEEE754FLOAT );
fi;

InstallMethod(NewFloat, [IsIEEE754FloatRep,IsRat], -1, function(filter,obj)
local num, den, extra, N;
num := NumeratorRat(obj);
den := DenominatorRat(obj);
extra := QuoInt(num, den);
num := RemInt(num, den);
N := Log2Int(den);
# Avoid overflows in the conversion of numerator and denominator: if they
# are too big, shift them down until they (barely) fit. This hardcodes
# assumptions about the precision of IsIEEE754FloatRep. It also does not
# try to minimize the numerical error of the computation, but it should be
# at least reasonably close overall.
if N >= 1023 then
num := QuoInt(num, 2^(N-1022));
den := QuoInt(den, 2^(N-1022));
fi;
return NewFloat(filter, extra) + NewFloat(filter, num) / NewFloat(filter, den);
end);

InstallMethod(MakeFloat, [IsIEEE754FloatRep,IsRat], -1, function(filter,obj)
return NewFloat(IsIEEE754FloatRep, obj);
end);


SetFloats(IEEE754FLOAT);
40 changes: 39 additions & 1 deletion tst/testinstall/float.tst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#@local neginf,posinf,r,nan,l,f,g
#@local neginf,posinf,r,nan,l,f,g,a,b,e1,e2
gap> START_TEST("float.tst");

# make sure we are testing the built-in machine floats
Expand Down Expand Up @@ -32,6 +32,44 @@ inf
gap> Float(-infinity);
-inf

#
# Test converting rationals to machine floats
#
gap> f:=function(r, expected)
> local testExp;
> testExp:=function(actual, expected)
> if (actual - expected) > FLOAT.EPSILON then
> Error("expected ", expected, " but got ", actual);
> fi;
> end;
> testExp(Float(r), expected);
> testExp(Float(-r), -expected);
> testExp(NewFloat(IsIEEE754FloatRep, r), expected);
> testExp(NewFloat(IsIEEE754FloatRep, -r), -expected);
> testExp(MakeFloat(0.0, r), expected);
> testExp(MakeFloat(0.0, -r), -expected);
> end;;
gap> for a in [-1..1] do
> for b in [-1..1] do
> f( (10^309 + a) / (10^308 + b), 10.);
> od;
> od;
gap> for a in [-1..1] do
> for b in [-1..1] do
> f( (10^309 + a) / (10^309 + b), 1.);
> od;
> od;
gap> for e1 in [306..309] do for e2 in [306..309] do
> for a in [-1..1] do for b in [-1..1] do
> f( (10^e1 + a) / (10^e2 + b), 10.^(e1-e2));
> od; od;
> od; od;
gap> for e1 in [1020..1025] do for e2 in [1020..1025] do
> for a in [-1..1] do for b in [-1..1] do
> f( (2^e1 + a) / (2^e2 + b), 2.^(e1-e2));
> od; od;
> od; od;

#
# input floats directly
#
Expand Down