Skip to content

Fix sig::add_or_sub and IeeeFloat::normalize #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 110 additions & 23 deletions src/ieee.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2051,7 +2051,8 @@ impl<S: Semantics> IeeeFloat<S> {
// Before rounding normalize the exponent of Category::Normal numbers.
let mut omsb = sig::omsb(&self.sig);

if omsb > 0 {
// Only skip this `if` if the value is exactly zero.
if omsb > 0 || loss != Loss::ExactlyZero {
// OMSB is numbered from 1. We want to place it in the integer
// bit numbered PRECISION if possible, with a compensating change in
// the exponent.
Expand Down Expand Up @@ -3008,37 +3009,60 @@ mod sig {
// an addition or subtraction.
// Subtraction is more subtle than one might naively expect.
if *a_sign ^ b_sign {
let loss;

if bits == 0 {
loss = Loss::ExactlyZero;
let (mut loss, loss_is_from_b) = if bits == 0 {
(Loss::ExactlyZero, false)
} else if bits > 0 {
loss = shift_right(b_sig, &mut 0, (bits - 1) as usize);
shift_left(a_sig, a_exp, 1);
(shift_right(b_sig, &mut 0, (bits - 1) as usize), true)
} else {
loss = shift_right(a_sig, a_exp, (-bits - 1) as usize);
shift_left(b_sig, &mut 0, 1);
}

let borrow = (loss != Loss::ExactlyZero) as Limb;

// Should we reverse the subtraction.
if cmp(a_sig, b_sig) == Ordering::Less {
// The code above is intended to ensure that no borrow is necessary.
assert_eq!(sub(b_sig, a_sig, borrow), 0);
a_sig.copy_from_slice(b_sig);
*a_sign = !*a_sign;
} else {
// The code above is intended to ensure that no borrow is necessary.
assert_eq!(sub(a_sig, b_sig, borrow), 0);
}
(shift_right(a_sig, a_exp, (-bits - 1) as usize), false)
};

// Invert the lost fraction - it was on the RHS and subtracted.
match loss {
let invert_loss = |loss| match loss {
Loss::LessThanHalf => Loss::MoreThanHalf,
Loss::MoreThanHalf => Loss::LessThanHalf,
_ => loss,
};

// Should we reverse the subtraction.
match cmp(a_sig, b_sig) {
Ordering::Less => {
let borrow = if loss != Loss::ExactlyZero && !loss_is_from_b {
// The loss is being subtracted, borrow from the significand and invert
// `loss`.
loss = invert_loss(loss);
1
} else {
0
};
// The code above is intended to ensure that no borrow is necessary.
assert_eq!(sub(b_sig, a_sig, borrow), 0);
a_sig.copy_from_slice(b_sig);
*a_sign = !*a_sign;
}
Ordering::Greater => {
let borrow = if loss != Loss::ExactlyZero && loss_is_from_b {
// The loss is being subtracted, borrow from the significand and invert
// `loss`.
loss = invert_loss(loss);
1
} else {
0
};
// The code above is intended to ensure that no borrow is necessary.
assert_eq!(sub(a_sig, b_sig, borrow), 0);
}
Ordering::Equal => {
a_sig.fill(0);
if loss != Loss::ExactlyZero && loss_is_from_b {
// b is slightly larger due to the loss, flip the sign.
*a_sign = !*a_sign;
}
}
}

loss
} else {
let loss = if bits > 0 {
shift_right(b_sig, &mut 0, bits as usize)
Expand All @@ -3051,6 +3075,69 @@ mod sig {
}
}

#[test]
fn test_add_or_sub() {
#[track_caller]
fn run_test(
subtract: bool,
mut lhs_sign: bool,
mut lhs_exponent: ExpInt,
mut lhs_significand: Limb,
rhs_sign: bool,
rhs_exponent: ExpInt,
mut rhs_significand: Limb,
expected_sign: bool,
expected_exponent: ExpInt,
expected_significand: Limb,
expected_loss: Loss,
) {
let loss = add_or_sub(
core::array::from_mut(&mut lhs_significand),
&mut lhs_exponent,
&mut lhs_sign,
core::array::from_mut(&mut rhs_significand),
rhs_exponent,
rhs_sign ^ subtract,
);
assert_eq!(loss, expected_loss);
assert_eq!(lhs_sign, expected_sign);
assert_eq!(lhs_exponent, expected_exponent);
assert_eq!(lhs_significand, expected_significand);
}

// Test cases are all combinations of:
// {equal exponents, LHS larger exponent, RHS larger exponent}
// {equal significands, LHS larger significand, RHS larger significand}
// {no loss, loss}

// Equal exponents (loss cannot occur as their is no shifting)
run_test(true, false, 1, 0x10, false, 1, 0x5, false, 1, 0xb, Loss::ExactlyZero);
run_test(false, false, -2, 0x20, true, -2, 0x20, false, -2, 0, Loss::ExactlyZero);
run_test(false, true, 3, 0x20, false, 3, 0x30, false, 3, 0x10, Loss::ExactlyZero);

// LHS larger exponent
// LHS significand greater after shitfing
run_test(true, false, 7, 0x100, false, 3, 0x100, false, 6, 0x1e0, Loss::ExactlyZero);
run_test(true, false, 7, 0x100, false, 3, 0x101, false, 6, 0x1df, Loss::MoreThanHalf);
// Significands equal after shitfing
run_test(true, false, 7, 0x100, false, 3, 0x1000, false, 6, 0, Loss::ExactlyZero);
run_test(true, false, 7, 0x100, false, 3, 0x1001, true, 6, 0, Loss::LessThanHalf);
// RHS significand greater after shitfing
run_test(true, false, 7, 0x100, false, 3, 0x10000, true, 6, 0x1e00, Loss::ExactlyZero);
run_test(true, false, 7, 0x100, false, 3, 0x10001, true, 6, 0x1e00, Loss::LessThanHalf);

// RHS larger exponent
// RHS significand greater after shitfing
run_test(true, false, 3, 0x100, false, 7, 0x100, true, 6, 0x1e0, Loss::ExactlyZero);
run_test(true, false, 3, 0x101, false, 7, 0x100, true, 6, 0x1df, Loss::MoreThanHalf);
// Significands equal after shitfing
run_test(true, false, 3, 0x1000, false, 7, 0x100, false, 6, 0, Loss::ExactlyZero);
run_test(true, false, 3, 0x1001, false, 7, 0x100, false, 6, 0, Loss::LessThanHalf);
// LHS significand greater after shitfing
run_test(true, false, 3, 0x10000, false, 7, 0x100, false, 6, 0x1e00, Loss::ExactlyZero);
run_test(true, false, 3, 0x10001, false, 7, 0x100, false, 6, 0x1e00, Loss::LessThanHalf);
}

/// `[low, high] = a * b`.
///
/// This cannot overflow, because
Expand Down
95 changes: 95 additions & 0 deletions tests/ieee.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,101 @@ fn fma() {
assert_eq!(-8.85242279E-41, f1.to_f32());
}

// `sig::add_or_sub` can be considered to have 9 possible cases
// when subtracting: all combinations of `Ordering::{Less, Greater, Equal} and
// {no loss, loss from lhs, loss from rhs}. Test each reachable case here.

// Regression test for failing the assertions in `sig::add_or_sub` and normalizing
// the exponent even when the significand is zero if there is a lost fraction.
// This tests `Ordering::Equal`, loss from lhs
{
let mut f1 = Single::from_f32(-1.4728589E-38);
let f2 = Single::from_f32(3.7105144E-6);
let f3 = Single::from_f32(5.5E-44);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(-0.0, f1.to_f32());
}

// Test `Ordering::Greater`, no loss
{
let mut f1 = Single::from_f32(2.0);
let f2 = Single::from_f32(2.0);
let f3 = Single::from_f32(-3.5);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(0.5, f1.to_f32());
}

// Test `Ordering::Less`, no loss
{
let mut f1 = Single::from_f32(2.0);
let f2 = Single::from_f32(2.0);
let f3 = Single::from_f32(-4.5);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(-0.5, f1.to_f32());
}

// Test `Ordering::Equal`, no loss
{
let mut f1 = Single::from_f32(2.0);
let f2 = Single::from_f32(2.0);
let f3 = Single::from_f32(-4.0);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(0.0, f1.to_f32());
}

// Test `Ordering::Less`, loss from lhs
{
let mut f1 = Single::from_f32(2.0000002);
let f2 = Single::from_f32(2.0000002);
let f3 = Single::from_f32(-32.0);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(-27.999998, f1.to_f32());
}

// Test `Ordering::Greater`, loss from rhs
{
let mut f1 = Single::from_f32(1e10);
let f2 = Single::from_f32(1e10);
let f3 = Single::from_f32(-2.0000002);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(1e20, f1.to_f32());
}

// Test `Ordering::Greater`, loss from lhs
{
let mut f1 = Single::from_f32(1e-36);
let f2 = Single::from_f32(0.0019531252);
let f3 = Single::from_f32(-1e-45);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(1.953124e-39, f1.to_f32());
}

// `Ordering::{Equal, Less}` with loss from rhs can't occur for the usage in
// `mul_add` as `mul_add_r` normalises the MSB of lhs to one bit below the top.

// Test cases from llvm/llvm-project#104984
{
let mut f1 = Single::from_f32(0.24999998);
let f2 = Single::from_f32(2.3509885e-38);
let f3 = Single::from_f32(-1e-45);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(5.87747e-39, f1.to_f32());
}
{
let mut f1 = Single::from_f32(4.4501477170144023e-308);
let f2 = Single::from_f32(0.24999999999999997);
let f3 = Single::from_f32(-8.475904604373977e-309);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(2.64946468816203e-309, f1.to_f32());
}
{
let mut f1 = Half::from_bits(0x8fff);
let f2 = Half::from_bits(0x2bff);
let f3 = Half::from_bits(0x0172);
f1 = f1.mul_add(f2, f3).value;
assert_eq!(0x808e, f1.to_bits());
}

// Test using only a single instance of APFloat.
{
let mut f = Double::from_f64(1.5);
Expand Down