Skip to content

Commit 7fcc0b6

Browse files
authored
Merge pull request #19 from mhseiden/fix_hash_impl
Fix skew in hash computation
2 parents b14e460 + 7c98a21 commit 7fcc0b6

File tree

2 files changed

+94
-15
lines changed

2 files changed

+94
-15
lines changed

src/lib.rs

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,24 @@ extern crate unreachable;
88

99
use std::cmp::Ordering;
1010
use std::error::Error;
11-
use std::ops::{Add, AddAssign, Deref, DerefMut, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign};
11+
use std::ops::{Add, AddAssign, Deref, DerefMut, Div, DivAssign, Mul, MulAssign, Neg, Rem,
12+
RemAssign, Sub, SubAssign};
1213
use std::hash::{Hash, Hasher};
1314
use std::fmt;
1415
use std::io;
16+
use std::mem;
1517
use unreachable::unreachable;
1618
use num_traits::Float;
1719

20+
// masks for the parts of the IEEE 754 float
21+
const SIGN_MASK: u64 = 0x8000000000000000u64;
22+
const EXP_MASK: u64 = 0x7ff0000000000000u64;
23+
const MAN_MASK: u64 = 0x000fffffffffffffu64;
24+
25+
// canonical raw bit patterns (for hashing)
26+
const CANONICAL_NAN_BITS: u64 = 0x7ff8000000000000u64;
27+
const CANONICAL_ZERO_BITS: u64 = 0x0u64;
28+
1829
/// A wrapper around Floats providing an implementation of Ord and Hash.
1930
///
2031
/// NaN is sorted as *greater* than all other values and *equal*
@@ -66,11 +77,7 @@ impl<T: Float + PartialOrd> Ord for OrderedFloat<T> {
6677
impl<T: Float + PartialEq> PartialEq for OrderedFloat<T> {
6778
fn eq(&self, other: &OrderedFloat<T>) -> bool {
6879
if self.as_ref().is_nan() {
69-
if other.as_ref().is_nan() {
70-
true
71-
} else {
72-
false
73-
}
80+
if other.as_ref().is_nan() { true } else { false }
7481
} else if other.as_ref().is_nan() {
7582
false
7683
} else {
@@ -128,7 +135,7 @@ impl<T: Float> DerefMut for OrderedFloat<T> {
128135
}
129136
}
130137

131-
impl<T: Float + PartialEq> Eq for OrderedFloat<T> { }
138+
impl<T: Float + PartialEq> Eq for OrderedFloat<T> {}
132139

133140
/// A wrapper around Floats providing an implementation of Ord and Hash.
134141
///
@@ -496,7 +503,7 @@ pub struct FloatIsNaN;
496503

497504
impl Error for FloatIsNaN {
498505
fn description(&self) -> &str {
499-
return "NotNaN constructed with NaN"
506+
return "NotNaN constructed with NaN";
500507
}
501508
}
502509

@@ -514,14 +521,23 @@ impl Into<io::Error> for FloatIsNaN {
514521

515522
#[inline]
516523
fn hash_float<F: Float, H: Hasher>(f: &F, state: &mut H) {
524+
raw_double_bits(f).hash(state);
525+
}
526+
527+
#[inline]
528+
fn raw_double_bits<F: Float>(f: &F) -> u64 {
529+
if f.is_nan() {
530+
return CANONICAL_NAN_BITS;
531+
}
532+
517533
let (man, exp, sign) = f.integer_decode();
518534
if man == 0 {
519-
// Consolidate the representation of zero, whether signed or not
520-
// The IEEE standard considers positive and negative zero to be equal
521-
0
522-
} else {
523-
(man ^ ((exp as u64) << 48) ^ sign as u64)
524-
}.hash(state)
535+
return CANONICAL_ZERO_BITS;
536+
}
537+
538+
let exp_u64 = unsafe { mem::transmute::<i16, u16>(exp) } as u64;
539+
let sign_u64 = if sign > 0 { 1u64 } else { 0u64 };
540+
(man & MAN_MASK) | ((exp_u64 << 52) & EXP_MASK) | ((sign_u64 << 63) & SIGN_MASK)
525541
}
526542

527543
#[cfg(feature = "rustc-serialize")]
@@ -585,7 +601,10 @@ mod impl_serde {
585601

586602
impl<T: Float + Deserialize> Deserialize for NotNaN<T> {
587603
fn deserialize<D: Deserializer>(d: &mut D) -> Result<Self, D::Error> {
588-
T::deserialize(d).and_then(|v| NotNaN::new(v).map_err(|_| <D::Error as Error>::invalid_value("value cannot be NaN")))
604+
T::deserialize(d).and_then(|v| {
605+
NotNaN::new(v)
606+
.map_err(|_| <D::Error as Error>::invalid_value("value cannot be NaN"))
607+
})
589608
}
590609
}
591610
}

tests/test.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ pub use num_traits::Float;
99
pub use std::cmp::Ordering::*;
1010
pub use std::{f32, f64, panic};
1111

12+
pub use std::collections::HashSet;
13+
pub use std::collections::hash_map::RandomState;
14+
pub use std::hash::*;
15+
1216
describe! ordered_float32 {
1317
it "should compare regular floats" {
1418
assert_eq!(OrderedFloat(7.0f32).cmp(&OrderedFloat(7.0)), Equal);
@@ -164,3 +168,59 @@ describe! not_nan64 {
164168
assert!(panic::catch_unwind(|| {let mut tmp = NotNaN::from(0.0f64); tmp %= f64::NAN;}).is_err());
165169
}
166170
}
171+
172+
describe! hashing {
173+
it "should hash zero and neg-zero to the same hc" {
174+
let state = RandomState::new();
175+
let mut h1 = state.build_hasher();
176+
let mut h2 = state.build_hasher();
177+
OrderedFloat::from(0f64).hash(&mut h1);
178+
OrderedFloat::from(-0f64).hash(&mut h2);
179+
assert_eq!(h1.finish(), h2.finish());
180+
}
181+
182+
it "should hash inf and neg-inf to different hcs" {
183+
let state = RandomState::new();
184+
let mut h1 = state.build_hasher();
185+
let mut h2 = state.build_hasher();
186+
OrderedFloat::from(f64::INFINITY).hash(&mut h1);
187+
OrderedFloat::from(f64::NEG_INFINITY).hash(&mut h2);
188+
assert!(h1.finish() != h2.finish());
189+
}
190+
191+
it "should have a good hash function for whole numbers" {
192+
let state = RandomState::new();
193+
let limit = 10000;
194+
195+
let mut set = ::std::collections::HashSet::with_capacity(limit);
196+
for i in 0..limit {
197+
let mut h = state.build_hasher();
198+
OrderedFloat::from(i as f64).hash(&mut h);
199+
set.insert(h.finish());
200+
}
201+
202+
// This allows 100 collisions, which is far too
203+
// many, but should guard against transient issues
204+
// that will result from using RandomState
205+
let pct_unique = set.len() as f64 / limit as f64;
206+
assert!(0.99f64 < pct_unique, "percent-unique={}", pct_unique);
207+
}
208+
209+
it "should have a good hash function for fractional numbers" {
210+
let state = RandomState::new();
211+
let limit = 10000;
212+
213+
let mut set = ::std::collections::HashSet::with_capacity(limit);
214+
for i in 0..limit {
215+
let mut h = state.build_hasher();
216+
OrderedFloat::from(i as f64 * (1f64 / limit as f64)).hash(&mut h);
217+
set.insert(h.finish());
218+
}
219+
220+
// This allows 100 collisions, which is far too
221+
// many, but should guard against transient issues
222+
// that will result from using RandomState
223+
let pct_unique = set.len() as f64 / limit as f64;
224+
assert!(0.99f64 < pct_unique, "percent-unique={}", pct_unique);
225+
}
226+
}

0 commit comments

Comments
 (0)