Skip to content

Commit 763ccf4

Browse files
committed
Add low-level HashTable API
The primary use case for this type over [`HashMap`] or [`HashSet`] is to support types that do not implement the [`Hash`] and [`Eq`] traits, but instead require additional data not contained in the key itself to compute a hash and compare two elements for equality. `HashTable` has some similarities with `RawTable`, but has a completely safe API. It is intended as a replacement for the existing raw entry API, with the intend of deprecating the latter and eventually removing it. Examples of when this can be useful include: - An `IndexMap` implementation where indices into a `Vec` are stored as elements in a `HashTable<usize>`. Hashing and comparing the elements requires indexing the associated `Vec` to get the actual value referred to by the index. - Avoiding re-computing a hash when it is already known. - Mutating the key of an element in a way that doesn't affect its hash. To achieve this, `HashTable` methods that search for an element in the table require a hash value and equality function to be explicitly passed in as arguments. The method will then iterate over the elements with the given hash and call the equality function on each of them, until a match is found.
1 parent f9e4900 commit 763ccf4

File tree

7 files changed

+2096
-35
lines changed

7 files changed

+2096
-35
lines changed

src/external_trait_impls/rayon/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ mod helpers;
22
pub(crate) mod map;
33
pub(crate) mod raw;
44
pub(crate) mod set;
5+
pub(crate) mod table;
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
//! Rayon extensions for `HashTable`.
2+
3+
use super::raw::{RawIntoParIter, RawParDrain, RawParIter};
4+
use crate::hash_table::HashTable;
5+
use crate::raw::{Allocator, Global};
6+
use core::fmt;
7+
use core::marker::PhantomData;
8+
use rayon::iter::plumbing::UnindexedConsumer;
9+
use rayon::iter::{IntoParallelIterator, ParallelIterator};
10+
11+
/// Parallel iterator over shared references to entries in a map.
12+
///
13+
/// This iterator is created by the [`par_iter`] method on [`HashTable`]
14+
/// (provided by the [`IntoParallelRefIterator`] trait).
15+
/// See its documentation for more.
16+
///
17+
/// [`par_iter`]: /hashbrown/struct.HashTable.html#method.par_iter
18+
/// [`HashTable`]: /hashbrown/struct.HashTable.html
19+
/// [`IntoParallelRefIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefIterator.html
20+
pub struct ParIter<'a, T> {
21+
inner: RawParIter<T>,
22+
marker: PhantomData<&'a T>,
23+
}
24+
25+
impl<'a, T: Sync> ParallelIterator for ParIter<'a, T> {
26+
type Item = &'a T;
27+
28+
#[cfg_attr(feature = "inline-more", inline)]
29+
fn drive_unindexed<C>(self, consumer: C) -> C::Result
30+
where
31+
C: UnindexedConsumer<Self::Item>,
32+
{
33+
self.inner
34+
.map(|x| unsafe { x.as_ref() })
35+
.drive_unindexed(consumer)
36+
}
37+
}
38+
39+
impl<T> Clone for ParIter<'_, T> {
40+
#[cfg_attr(feature = "inline-more", inline)]
41+
fn clone(&self) -> Self {
42+
Self {
43+
inner: self.inner.clone(),
44+
marker: PhantomData,
45+
}
46+
}
47+
}
48+
49+
impl<T: fmt::Debug> fmt::Debug for ParIter<'_, T> {
50+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51+
let iter = unsafe { self.inner.iter() }.map(|x| unsafe { x.as_ref() });
52+
f.debug_list().entries(iter).finish()
53+
}
54+
}
55+
56+
/// Parallel iterator over mutable references to entries in a map.
57+
///
58+
/// This iterator is created by the [`par_iter_mut`] method on [`HashTable`]
59+
/// (provided by the [`IntoParallelRefMutIterator`] trait).
60+
/// See its documentation for more.
61+
///
62+
/// [`par_iter_mut`]: /hashbrown/struct.HashTable.html#method.par_iter_mut
63+
/// [`HashTable`]: /hashbrown/struct.HashTable.html
64+
/// [`IntoParallelRefMutIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefMutIterator.html
65+
pub struct ParIterMut<'a, T> {
66+
inner: RawParIter<T>,
67+
marker: PhantomData<&'a mut T>,
68+
}
69+
70+
impl<'a, T: Send> ParallelIterator for ParIterMut<'a, T> {
71+
type Item = &'a mut T;
72+
73+
#[cfg_attr(feature = "inline-more", inline)]
74+
fn drive_unindexed<C>(self, consumer: C) -> C::Result
75+
where
76+
C: UnindexedConsumer<Self::Item>,
77+
{
78+
self.inner
79+
.map(|x| unsafe { x.as_mut() })
80+
.drive_unindexed(consumer)
81+
}
82+
}
83+
84+
impl<T: fmt::Debug> fmt::Debug for ParIterMut<'_, T> {
85+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
86+
ParIter {
87+
inner: self.inner.clone(),
88+
marker: PhantomData,
89+
}
90+
.fmt(f)
91+
}
92+
}
93+
94+
/// Parallel iterator over entries of a consumed map.
95+
///
96+
/// This iterator is created by the [`into_par_iter`] method on [`HashTable`]
97+
/// (provided by the [`IntoParallelIterator`] trait).
98+
/// See its documentation for more.
99+
///
100+
/// [`into_par_iter`]: /hashbrown/struct.HashTable.html#method.into_par_iter
101+
/// [`HashTable`]: /hashbrown/struct.HashTable.html
102+
/// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html
103+
pub struct IntoParIter<T, A: Allocator + Clone = Global> {
104+
inner: RawIntoParIter<T, A>,
105+
}
106+
107+
impl<T: Send, A: Allocator + Clone + Send> ParallelIterator for IntoParIter<T, A> {
108+
type Item = T;
109+
110+
#[cfg_attr(feature = "inline-more", inline)]
111+
fn drive_unindexed<C>(self, consumer: C) -> C::Result
112+
where
113+
C: UnindexedConsumer<Self::Item>,
114+
{
115+
self.inner.drive_unindexed(consumer)
116+
}
117+
}
118+
119+
impl<T: fmt::Debug, A: Allocator + Clone> fmt::Debug for IntoParIter<T, A> {
120+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121+
ParIter {
122+
inner: unsafe { self.inner.par_iter() },
123+
marker: PhantomData,
124+
}
125+
.fmt(f)
126+
}
127+
}
128+
129+
/// Parallel draining iterator over entries of a map.
130+
///
131+
/// This iterator is created by the [`par_drain`] method on [`HashTable`].
132+
/// See its documentation for more.
133+
///
134+
/// [`par_drain`]: /hashbrown/struct.HashTable.html#method.par_drain
135+
/// [`HashTable`]: /hashbrown/struct.HashTable.html
136+
pub struct ParDrain<'a, T, A: Allocator + Clone = Global> {
137+
inner: RawParDrain<'a, T, A>,
138+
}
139+
140+
impl<T: Send, A: Allocator + Clone + Sync> ParallelIterator for ParDrain<'_, T, A> {
141+
type Item = T;
142+
143+
#[cfg_attr(feature = "inline-more", inline)]
144+
fn drive_unindexed<C>(self, consumer: C) -> C::Result
145+
where
146+
C: UnindexedConsumer<Self::Item>,
147+
{
148+
self.inner.drive_unindexed(consumer)
149+
}
150+
}
151+
152+
impl<T: fmt::Debug, A: Allocator + Clone> fmt::Debug for ParDrain<'_, T, A> {
153+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154+
ParIter {
155+
inner: unsafe { self.inner.par_iter() },
156+
marker: PhantomData,
157+
}
158+
.fmt(f)
159+
}
160+
}
161+
162+
impl<T: Send, A: Allocator + Clone> HashTable<T, A> {
163+
/// Consumes (potentially in parallel) all values in an arbitrary order,
164+
/// while preserving the map's allocated memory for reuse.
165+
#[cfg_attr(feature = "inline-more", inline)]
166+
pub fn par_drain(&mut self) -> ParDrain<'_, T, A> {
167+
ParDrain {
168+
inner: self.table.par_drain(),
169+
}
170+
}
171+
}
172+
173+
impl<T: Send, A: Allocator + Clone + Send> IntoParallelIterator for HashTable<T, A> {
174+
type Item = T;
175+
type Iter = IntoParIter<T, A>;
176+
177+
#[cfg_attr(feature = "inline-more", inline)]
178+
fn into_par_iter(self) -> Self::Iter {
179+
IntoParIter {
180+
inner: self.table.into_par_iter(),
181+
}
182+
}
183+
}
184+
185+
impl<'a, T: Sync, A: Allocator + Clone> IntoParallelIterator for &'a HashTable<T, A> {
186+
type Item = &'a T;
187+
type Iter = ParIter<'a, T>;
188+
189+
#[cfg_attr(feature = "inline-more", inline)]
190+
fn into_par_iter(self) -> Self::Iter {
191+
ParIter {
192+
inner: unsafe { self.table.par_iter() },
193+
marker: PhantomData,
194+
}
195+
}
196+
}
197+
198+
impl<'a, T: Send, A: Allocator + Clone> IntoParallelIterator for &'a mut HashTable<T, A> {
199+
type Item = &'a mut T;
200+
type Iter = ParIterMut<'a, T>;
201+
202+
#[cfg_attr(feature = "inline-more", inline)]
203+
fn into_par_iter(self) -> Self::Iter {
204+
ParIterMut {
205+
inner: unsafe { self.table.par_iter() },
206+
marker: PhantomData,
207+
}
208+
}
209+
}
210+
211+
#[cfg(test)]
212+
mod test_par_table {
213+
use alloc::vec::Vec;
214+
use core::sync::atomic::{AtomicUsize, Ordering};
215+
216+
use rayon::prelude::*;
217+
218+
use crate::{
219+
hash_map::{make_hash, DefaultHashBuilder},
220+
hash_table::HashTable,
221+
};
222+
223+
#[test]
224+
fn test_iterate() {
225+
let hasher = DefaultHashBuilder::default();
226+
let mut a = HashTable::new();
227+
for i in 0..32 {
228+
a.insert_unchecked(make_hash(&hasher, &i), i, |x| make_hash(&hasher, x));
229+
}
230+
let observed = AtomicUsize::new(0);
231+
a.par_iter().for_each(|k| {
232+
observed.fetch_or(1 << *k, Ordering::Relaxed);
233+
});
234+
assert_eq!(observed.into_inner(), 0xFFFF_FFFF);
235+
}
236+
237+
#[test]
238+
fn test_move_iter() {
239+
let hasher = DefaultHashBuilder::default();
240+
let hs = {
241+
let mut hs = HashTable::new();
242+
243+
hs.insert_unchecked(make_hash(&hasher, &'a'), 'a', |x| make_hash(&hasher, x));
244+
hs.insert_unchecked(make_hash(&hasher, &'b'), 'b', |x| make_hash(&hasher, x));
245+
246+
hs
247+
};
248+
249+
let v = hs.into_par_iter().collect::<Vec<char>>();
250+
assert!(v == ['a', 'b'] || v == ['b', 'a']);
251+
}
252+
}

src/lib.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ mod map;
8181
mod rustc_entry;
8282
mod scopeguard;
8383
mod set;
84+
mod table;
8485

8586
pub mod hash_map {
8687
//! A hash map implemented with quadratic probing and SIMD lookup.
@@ -113,9 +114,24 @@ pub mod hash_set {
113114
pub use crate::external_trait_impls::rayon::set::*;
114115
}
115116
}
117+
pub mod hash_table {
118+
//! A hash table implemented with quadratic probing and SIMD lookup.
119+
pub use crate::table::*;
120+
121+
#[cfg(feature = "rayon")]
122+
/// [rayon]-based parallel iterator types for hash tables.
123+
/// You will rarely need to interact with it directly unless you have need
124+
/// to name one of the iterator types.
125+
///
126+
/// [rayon]: https://docs.rs/rayon/1.0/rayon
127+
pub mod rayon {
128+
pub use crate::external_trait_impls::rayon::table::*;
129+
}
130+
}
116131

117132
pub use crate::map::HashMap;
118133
pub use crate::set::HashSet;
134+
pub use crate::table::HashTable;
119135

120136
#[cfg(feature = "equivalent")]
121137
pub use equivalent::Equivalent;

src/map.rs

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawTable};
1+
use crate::raw::{
2+
Allocator, Bucket, Global, RawDrain, RawExtractIf, RawIntoIter, RawIter, RawTable,
3+
};
24
use crate::{Equivalent, TryReserveError};
35
use core::borrow::Borrow;
46
use core::fmt::{self, Debug};
@@ -977,7 +979,7 @@ impl<K, V, S, A: Allocator + Clone> HashMap<K, V, S, A> {
977979
{
978980
ExtractIf {
979981
f,
980-
inner: ExtractIfInner {
982+
inner: RawExtractIf {
981983
iter: unsafe { self.table.iter() },
982984
table: &mut self.table,
983985
},
@@ -2722,7 +2724,7 @@ where
27222724
F: FnMut(&K, &mut V) -> bool,
27232725
{
27242726
f: F,
2725-
inner: ExtractIfInner<'a, K, V, A>,
2727+
inner: RawExtractIf<'a, (K, V), A>,
27262728
}
27272729

27282730
impl<K, V, F, A> Iterator for ExtractIf<'_, K, V, F, A>
@@ -2734,7 +2736,7 @@ where
27342736

27352737
#[cfg_attr(feature = "inline-more", inline)]
27362738
fn next(&mut self) -> Option<Self::Item> {
2737-
self.inner.next(&mut self.f)
2739+
self.inner.next(|&mut (ref k, ref mut v)| (self.f)(k, v))
27382740
}
27392741

27402742
#[inline]
@@ -2745,30 +2747,6 @@ where
27452747

27462748
impl<K, V, F> FusedIterator for ExtractIf<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {}
27472749

2748-
/// Portions of `ExtractIf` shared with `set::ExtractIf`
2749-
pub(super) struct ExtractIfInner<'a, K, V, A: Allocator + Clone> {
2750-
pub iter: RawIter<(K, V)>,
2751-
pub table: &'a mut RawTable<(K, V), A>,
2752-
}
2753-
2754-
impl<K, V, A: Allocator + Clone> ExtractIfInner<'_, K, V, A> {
2755-
#[cfg_attr(feature = "inline-more", inline)]
2756-
pub(super) fn next<F>(&mut self, f: &mut F) -> Option<(K, V)>
2757-
where
2758-
F: FnMut(&K, &mut V) -> bool,
2759-
{
2760-
unsafe {
2761-
for item in &mut self.iter {
2762-
let &mut (ref key, ref mut value) = item.as_mut();
2763-
if f(key, value) {
2764-
return Some(self.table.remove(item).0);
2765-
}
2766-
}
2767-
}
2768-
None
2769-
}
2770-
}
2771-
27722750
/// A mutable iterator over the values of a `HashMap` in arbitrary order.
27732751
/// The iterator element type is `&'a mut V`.
27742752
///

src/raw/mod.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3710,6 +3710,28 @@ impl Iterator for RawIterHashInner {
37103710
}
37113711
}
37123712

3713+
pub(crate) struct RawExtractIf<'a, T, A: Allocator + Clone> {
3714+
pub iter: RawIter<T>,
3715+
pub table: &'a mut RawTable<T, A>,
3716+
}
3717+
3718+
impl<T, A: Allocator + Clone> RawExtractIf<'_, T, A> {
3719+
#[cfg_attr(feature = "inline-more", inline)]
3720+
pub(crate) fn next<F>(&mut self, mut f: F) -> Option<T>
3721+
where
3722+
F: FnMut(&mut T) -> bool,
3723+
{
3724+
unsafe {
3725+
for item in &mut self.iter {
3726+
if f(item.as_mut()) {
3727+
return Some(self.table.remove(item).0);
3728+
}
3729+
}
3730+
}
3731+
None
3732+
}
3733+
}
3734+
37133735
#[cfg(test)]
37143736
mod test_map {
37153737
use super::*;

0 commit comments

Comments
 (0)