From 8bceb976df4908b776c7b7e5afddf90b02878590 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 12 Sep 2024 08:31:53 -0700 Subject: [PATCH 1/3] Wean `HashSet` from the raw-entry API This changes `get_or_insert`, `get_or_insert_with`, and `bitxor_assign` to poke directly at the `RawTable` instead of using `raw_entry_mut()`. `HashSet::get_or_insert_with` also asserts that the converted value is actually equivalent after conversion, so we can ensure set consistency. `HashSet::get_or_insert_owned` is removed for now, since it offers no value over the `_with` method, as we would need to assert that too. --- ci/run.sh | 4 +-- src/set.rs | 102 ++++++++++++++++++++++++----------------------------- 2 files changed, 48 insertions(+), 58 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index fc8755c8f..39cf49c42 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -43,8 +43,8 @@ if [ "${CHANNEL}" = "nightly" ]; then fi # Make sure we can compile without the default hasher -"${CARGO}" -vv build --target="${TARGET}" --no-default-features --features raw-entry -"${CARGO}" -vv build --target="${TARGET}" --release --no-default-features --features raw-entry +"${CARGO}" -vv build --target="${TARGET}" --no-default-features +"${CARGO}" -vv build --target="${TARGET}" --release --no-default-features "${CARGO}" -vv ${OP} --target="${TARGET}" "${CARGO}" -vv ${OP} --target="${TARGET}" --features "${FEATURES}" diff --git a/src/set.rs b/src/set.rs index 9b818ed69..a93c0928f 100644 --- a/src/set.rs +++ b/src/set.rs @@ -1,5 +1,4 @@ use crate::{Equivalent, TryReserveError}; -use alloc::borrow::ToOwned; use core::hash::{BuildHasher, Hash}; use core::iter::{Chain, FusedIterator}; use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; @@ -911,45 +910,16 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn get_or_insert(&mut self, value: T) -> &T { - // Although the raw entry gives us `&mut T`, we only return `&T` to be consistent with - // `get`. Key mutation is "raw" because you're not supposed to affect `Eq` or `Hash`. - self.map - .raw_entry_mut() - .from_key(&value) - .or_insert(value, ()) - .0 - } - - /// Inserts an owned copy of the given `value` into the set if it is not - /// present, then returns a reference to the value in the set. - /// - /// # Examples - /// - /// ``` - /// use hashbrown::HashSet; - /// - /// let mut set: HashSet = ["cat", "dog", "horse"] - /// .iter().map(|&pet| pet.to_owned()).collect(); - /// - /// assert_eq!(set.len(), 3); - /// for &pet in &["cat", "dog", "fish"] { - /// let value = set.get_or_insert_owned(pet); - /// assert_eq!(value, pet); - /// } - /// assert_eq!(set.len(), 4); // a new "fish" was inserted - /// ``` - #[inline] - pub fn get_or_insert_owned(&mut self, value: &Q) -> &T - where - Q: Hash + Equivalent + ToOwned + ?Sized, - { - // Although the raw entry gives us `&mut T`, we only return `&T` to be consistent with - // `get`. Key mutation is "raw" because you're not supposed to affect `Eq` or `Hash`. - self.map - .raw_entry_mut() - .from_key(value) - .or_insert_with(|| (value.to_owned(), ())) - .0 + let hash = make_hash(&self.map.hash_builder, &value); + let bucket = match self.map.table.find_or_find_insert_slot( + hash, + equivalent_key(&value), + make_hasher(&self.map.hash_builder), + ) { + Ok(bucket) => bucket, + Err(slot) => unsafe { self.map.table.insert_in_slot(hash, slot, (value, ())) }, + }; + unsafe { &bucket.as_ref().0 } } /// Inserts a value computed from `f` into the set if the given `value` is @@ -970,19 +940,33 @@ where /// } /// assert_eq!(set.len(), 4); // a new "fish" was inserted /// ``` + /// + /// The following example will panic because the new value doesn't match. + /// + /// ```should_panic + /// let mut set = hashbrown::HashSet::new(); + /// set.get_or_insert_with("rust", |_| String::new()); + /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn get_or_insert_with(&mut self, value: &Q, f: F) -> &T where Q: Hash + Equivalent + ?Sized, F: FnOnce(&Q) -> T, { - // Although the raw entry gives us `&mut T`, we only return `&T` to be consistent with - // `get`. Key mutation is "raw" because you're not supposed to affect `Eq` or `Hash`. - self.map - .raw_entry_mut() - .from_key(value) - .or_insert_with(|| (f(value), ())) - .0 + let hash = make_hash(&self.map.hash_builder, &value); + let bucket = match self.map.table.find_or_find_insert_slot( + hash, + equivalent_key(value), + make_hasher(&self.map.hash_builder), + ) { + Ok(bucket) => bucket, + Err(slot) => { + let new = f(value); + assert!(value.equivalent(&new), "new value is not equivalent"); + unsafe { self.map.table.insert_in_slot(hash, slot, (new, ())) } + } + }; + unsafe { &bucket.as_ref().0 } } /// Gets the given value's corresponding entry in the set for in-place manipulation. @@ -1607,15 +1591,21 @@ where /// ``` fn bitxor_assign(&mut self, rhs: &HashSet) { for item in rhs { - let entry = self.map.raw_entry_mut().from_key(item); - match entry { - map::RawEntryMut::Occupied(e) => { - e.remove(); - } - map::RawEntryMut::Vacant(e) => { - e.insert(item.to_owned(), ()); - } - }; + let hash = make_hash(&self.map.hash_builder, &item); + match self.map.table.find_or_find_insert_slot( + hash, + equivalent_key(item), + make_hasher(&self.map.hash_builder), + ) { + Ok(bucket) => unsafe { + self.map.table.remove(bucket); + }, + Err(slot) => unsafe { + self.map + .table + .insert_in_slot(hash, slot, (item.clone(), ())); + }, + } } } } From ad3b4306825a0929330f233db8ec2353e5f5d062 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 12 Sep 2024 08:47:07 -0700 Subject: [PATCH 2/3] Add tests from #400 Co-authored-by: JustForFun88 --- src/set.rs | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/set.rs b/src/set.rs index a93c0928f..9490d0e2f 100644 --- a/src/set.rs +++ b/src/set.rs @@ -2588,7 +2588,7 @@ fn assert_covariance() { #[cfg(test)] mod test_set { - use super::HashSet; + use super::{make_hash, Equivalent, HashSet}; use crate::DefaultHashBuilder; use std::vec::Vec; @@ -3062,4 +3062,57 @@ mod test_set { assert_eq!(HashSet::::new().allocation_size(), 0); assert!(HashSet::::with_capacity(1).allocation_size() > core::mem::size_of::()); } + + #[test] + fn duplicate_insert() { + let mut set = HashSet::new(); + set.insert(1); + set.get_or_insert_with(&1, |_| 1); + set.get_or_insert_with(&1, |_| 1); + assert!([1].iter().eq(set.iter())); + } + + #[test] + #[should_panic] + fn some_invalid_equivalent() { + use core::hash::{Hash, Hasher}; + struct Invalid { + count: u32, + other: u32, + } + + struct InvalidRef { + count: u32, + other: u32, + } + + impl PartialEq for Invalid { + fn eq(&self, other: &Self) -> bool { + self.count == other.count && self.other == other.other + } + } + impl Eq for Invalid {} + + impl Equivalent for InvalidRef { + fn equivalent(&self, key: &Invalid) -> bool { + self.count == key.count && self.other == key.other + } + } + impl Hash for Invalid { + fn hash(&self, state: &mut H) { + self.count.hash(state); + } + } + impl Hash for InvalidRef { + fn hash(&self, state: &mut H) { + self.count.hash(state); + } + } + let mut set: HashSet = HashSet::new(); + let key = InvalidRef { count: 1, other: 1 }; + let value = Invalid { count: 1, other: 2 }; + if make_hash(set.hasher(), &key) == make_hash(set.hasher(), &value) { + set.get_or_insert_with(&key, |_| value); + } + } } From 6420cfd61780877cdacc2377605c7c60858e9c26 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 17 Sep 2024 17:02:34 -0700 Subject: [PATCH 3/3] Simplify some common `find_or_find_insert_slot` --- src/map.rs | 22 +++++++++++++++++----- src/set.rs | 30 +++++++----------------------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/map.rs b/src/map.rs index e8cc26d2e..c2946aa0f 100644 --- a/src/map.rs +++ b/src/map.rs @@ -1741,11 +1741,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, k: K, v: V) -> Option { let hash = make_hash::(&self.hash_builder, &k); - let hasher = make_hasher::<_, V, S>(&self.hash_builder); - match self - .table - .find_or_find_insert_slot(hash, equivalent_key(&k), hasher) - { + match self.find_or_find_insert_slot(hash, &k) { Ok(bucket) => Some(mem::replace(unsafe { &mut bucket.as_mut().1 }, v)), Err(slot) => { unsafe { @@ -1756,6 +1752,22 @@ where } } + #[cfg_attr(feature = "inline-more", inline)] + pub(crate) fn find_or_find_insert_slot( + &mut self, + hash: u64, + key: &Q, + ) -> Result, crate::raw::InsertSlot> + where + Q: Equivalent + ?Sized, + { + self.table.find_or_find_insert_slot( + hash, + equivalent_key(key), + make_hasher(&self.hash_builder), + ) + } + /// Insert a key-value pair into the map without checking /// if the key already exists in the map. /// diff --git a/src/set.rs b/src/set.rs index 9490d0e2f..6bd461d55 100644 --- a/src/set.rs +++ b/src/set.rs @@ -3,7 +3,7 @@ use core::hash::{BuildHasher, Hash}; use core::iter::{Chain, FusedIterator}; use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; use core::{fmt, mem}; -use map::{equivalent_key, make_hash, make_hasher}; +use map::make_hash; use super::map::{self, HashMap, Keys}; use crate::raw::{Allocator, Global, RawExtractIf}; @@ -911,11 +911,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn get_or_insert(&mut self, value: T) -> &T { let hash = make_hash(&self.map.hash_builder, &value); - let bucket = match self.map.table.find_or_find_insert_slot( - hash, - equivalent_key(&value), - make_hasher(&self.map.hash_builder), - ) { + let bucket = match self.map.find_or_find_insert_slot(hash, &value) { Ok(bucket) => bucket, Err(slot) => unsafe { self.map.table.insert_in_slot(hash, slot, (value, ())) }, }; @@ -953,12 +949,8 @@ where Q: Hash + Equivalent + ?Sized, F: FnOnce(&Q) -> T, { - let hash = make_hash(&self.map.hash_builder, &value); - let bucket = match self.map.table.find_or_find_insert_slot( - hash, - equivalent_key(value), - make_hasher(&self.map.hash_builder), - ) { + let hash = make_hash(&self.map.hash_builder, value); + let bucket = match self.map.find_or_find_insert_slot(hash, value) { Ok(bucket) => bucket, Err(slot) => { let new = f(value); @@ -1141,11 +1133,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn replace(&mut self, value: T) -> Option { let hash = make_hash(&self.map.hash_builder, &value); - match self.map.table.find_or_find_insert_slot( - hash, - equivalent_key(&value), - make_hasher(&self.map.hash_builder), - ) { + match self.map.find_or_find_insert_slot(hash, &value) { Ok(bucket) => Some(mem::replace(unsafe { &mut bucket.as_mut().0 }, value)), Err(slot) => { unsafe { @@ -1591,12 +1579,8 @@ where /// ``` fn bitxor_assign(&mut self, rhs: &HashSet) { for item in rhs { - let hash = make_hash(&self.map.hash_builder, &item); - match self.map.table.find_or_find_insert_slot( - hash, - equivalent_key(item), - make_hasher(&self.map.hash_builder), - ) { + let hash = make_hash(&self.map.hash_builder, item); + match self.map.find_or_find_insert_slot(hash, item) { Ok(bucket) => unsafe { self.map.table.remove(bucket); },