Skip to content

Commit ef84e09

Browse files
committed
Auto merge of #466 - Amanieu:hashtable, r=Amanieu
Add low-level `HashTable` API The primary use case for this type over `HashMap` or `HashSet` is to support types that do not implement the `Hash` and `Eq` traits, but instead require additional data not contained in the key itself to compute a hash and compare two elements for equality. `HashTable` has some similarities with `RawTable`, but has a completely safe API. It is intended as a replacement for the existing raw entry API, with the intend of deprecating the latter and eventually removing it. Examples of when this can be useful include: - An `IndexMap` implementation where indices into a `Vec` are stored as elements in a `HashTable<usize>`. Hashing and comparing the elements requires indexing the associated `Vec` to get the actual value referred to by the index. - Avoiding re-computing a hash when it is already known. - Mutating the key of an element in a way that doesn't affect its hash. To achieve this, `HashTable` methods that search for an element in the table require a hash value and equality function to be explicitly passed in as arguments. The method will then iterate over the elements with the given hash and call the equality function on each of them, until a match is found.
2 parents 63a693a + 9556bf4 commit ef84e09

File tree

7 files changed

+2334
-35
lines changed

7 files changed

+2334
-35
lines changed

src/external_trait_impls/rayon/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ mod helpers;
22
pub(crate) mod map;
33
pub(crate) mod raw;
44
pub(crate) mod set;
5+
pub(crate) mod table;
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
//! Rayon extensions for `HashTable`.
2+
3+
use super::raw::{RawIntoParIter, RawParDrain, RawParIter};
4+
use crate::hash_table::HashTable;
5+
use crate::raw::{Allocator, Global};
6+
use core::fmt;
7+
use core::marker::PhantomData;
8+
use rayon::iter::plumbing::UnindexedConsumer;
9+
use rayon::iter::{IntoParallelIterator, ParallelIterator};
10+
11+
/// Parallel iterator over shared references to entries in a map.
12+
///
13+
/// This iterator is created by the [`par_iter`] method on [`HashTable`]
14+
/// (provided by the [`IntoParallelRefIterator`] trait).
15+
/// See its documentation for more.
16+
///
17+
/// [`par_iter`]: /hashbrown/struct.HashTable.html#method.par_iter
18+
/// [`HashTable`]: /hashbrown/struct.HashTable.html
19+
/// [`IntoParallelRefIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefIterator.html
20+
pub struct ParIter<'a, T> {
21+
inner: RawParIter<T>,
22+
marker: PhantomData<&'a T>,
23+
}
24+
25+
impl<'a, T: Sync> ParallelIterator for ParIter<'a, T> {
26+
type Item = &'a T;
27+
28+
#[cfg_attr(feature = "inline-more", inline)]
29+
fn drive_unindexed<C>(self, consumer: C) -> C::Result
30+
where
31+
C: UnindexedConsumer<Self::Item>,
32+
{
33+
self.inner
34+
.map(|x| unsafe { x.as_ref() })
35+
.drive_unindexed(consumer)
36+
}
37+
}
38+
39+
impl<T> Clone for ParIter<'_, T> {
40+
#[cfg_attr(feature = "inline-more", inline)]
41+
fn clone(&self) -> Self {
42+
Self {
43+
inner: self.inner.clone(),
44+
marker: PhantomData,
45+
}
46+
}
47+
}
48+
49+
impl<T: fmt::Debug> fmt::Debug for ParIter<'_, T> {
50+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51+
let iter = unsafe { self.inner.iter() }.map(|x| unsafe { x.as_ref() });
52+
f.debug_list().entries(iter).finish()
53+
}
54+
}
55+
56+
/// Parallel iterator over mutable references to entries in a map.
57+
///
58+
/// This iterator is created by the [`par_iter_mut`] method on [`HashTable`]
59+
/// (provided by the [`IntoParallelRefMutIterator`] trait).
60+
/// See its documentation for more.
61+
///
62+
/// [`par_iter_mut`]: /hashbrown/struct.HashTable.html#method.par_iter_mut
63+
/// [`HashTable`]: /hashbrown/struct.HashTable.html
64+
/// [`IntoParallelRefMutIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefMutIterator.html
65+
pub struct ParIterMut<'a, T> {
66+
inner: RawParIter<T>,
67+
marker: PhantomData<&'a mut T>,
68+
}
69+
70+
impl<'a, T: Send> ParallelIterator for ParIterMut<'a, T> {
71+
type Item = &'a mut T;
72+
73+
#[cfg_attr(feature = "inline-more", inline)]
74+
fn drive_unindexed<C>(self, consumer: C) -> C::Result
75+
where
76+
C: UnindexedConsumer<Self::Item>,
77+
{
78+
self.inner
79+
.map(|x| unsafe { x.as_mut() })
80+
.drive_unindexed(consumer)
81+
}
82+
}
83+
84+
impl<T: fmt::Debug> fmt::Debug for ParIterMut<'_, T> {
85+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
86+
ParIter {
87+
inner: self.inner.clone(),
88+
marker: PhantomData,
89+
}
90+
.fmt(f)
91+
}
92+
}
93+
94+
/// Parallel iterator over entries of a consumed map.
95+
///
96+
/// This iterator is created by the [`into_par_iter`] method on [`HashTable`]
97+
/// (provided by the [`IntoParallelIterator`] trait).
98+
/// See its documentation for more.
99+
///
100+
/// [`into_par_iter`]: /hashbrown/struct.HashTable.html#method.into_par_iter
101+
/// [`HashTable`]: /hashbrown/struct.HashTable.html
102+
/// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html
103+
pub struct IntoParIter<T, A: Allocator = Global> {
104+
inner: RawIntoParIter<T, A>,
105+
}
106+
107+
impl<T: Send, A: Allocator + Send> ParallelIterator for IntoParIter<T, A> {
108+
type Item = T;
109+
110+
#[cfg_attr(feature = "inline-more", inline)]
111+
fn drive_unindexed<C>(self, consumer: C) -> C::Result
112+
where
113+
C: UnindexedConsumer<Self::Item>,
114+
{
115+
self.inner.drive_unindexed(consumer)
116+
}
117+
}
118+
119+
impl<T: fmt::Debug, A: Allocator> fmt::Debug for IntoParIter<T, A> {
120+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121+
ParIter {
122+
inner: unsafe { self.inner.par_iter() },
123+
marker: PhantomData,
124+
}
125+
.fmt(f)
126+
}
127+
}
128+
129+
/// Parallel draining iterator over entries of a map.
130+
///
131+
/// This iterator is created by the [`par_drain`] method on [`HashTable`].
132+
/// See its documentation for more.
133+
///
134+
/// [`par_drain`]: /hashbrown/struct.HashTable.html#method.par_drain
135+
/// [`HashTable`]: /hashbrown/struct.HashTable.html
136+
pub struct ParDrain<'a, T, A: Allocator = Global> {
137+
inner: RawParDrain<'a, T, A>,
138+
}
139+
140+
impl<T: Send, A: Allocator + Sync> ParallelIterator for ParDrain<'_, T, A> {
141+
type Item = T;
142+
143+
#[cfg_attr(feature = "inline-more", inline)]
144+
fn drive_unindexed<C>(self, consumer: C) -> C::Result
145+
where
146+
C: UnindexedConsumer<Self::Item>,
147+
{
148+
self.inner.drive_unindexed(consumer)
149+
}
150+
}
151+
152+
impl<T: fmt::Debug, A: Allocator> fmt::Debug for ParDrain<'_, T, A> {
153+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154+
ParIter {
155+
inner: unsafe { self.inner.par_iter() },
156+
marker: PhantomData,
157+
}
158+
.fmt(f)
159+
}
160+
}
161+
162+
impl<T: Send, A: Allocator> HashTable<T, A> {
163+
/// Consumes (potentially in parallel) all values in an arbitrary order,
164+
/// while preserving the map's allocated memory for reuse.
165+
#[cfg_attr(feature = "inline-more", inline)]
166+
pub fn par_drain(&mut self) -> ParDrain<'_, T, A> {
167+
ParDrain {
168+
inner: self.raw.par_drain(),
169+
}
170+
}
171+
}
172+
173+
impl<T: Send, A: Allocator + Send> IntoParallelIterator for HashTable<T, A> {
174+
type Item = T;
175+
type Iter = IntoParIter<T, A>;
176+
177+
#[cfg_attr(feature = "inline-more", inline)]
178+
fn into_par_iter(self) -> Self::Iter {
179+
IntoParIter {
180+
inner: self.raw.into_par_iter(),
181+
}
182+
}
183+
}
184+
185+
impl<'a, T: Sync, A: Allocator> IntoParallelIterator for &'a HashTable<T, A> {
186+
type Item = &'a T;
187+
type Iter = ParIter<'a, T>;
188+
189+
#[cfg_attr(feature = "inline-more", inline)]
190+
fn into_par_iter(self) -> Self::Iter {
191+
ParIter {
192+
inner: unsafe { self.raw.par_iter() },
193+
marker: PhantomData,
194+
}
195+
}
196+
}
197+
198+
impl<'a, T: Send, A: Allocator> IntoParallelIterator for &'a mut HashTable<T, A> {
199+
type Item = &'a mut T;
200+
type Iter = ParIterMut<'a, T>;
201+
202+
#[cfg_attr(feature = "inline-more", inline)]
203+
fn into_par_iter(self) -> Self::Iter {
204+
ParIterMut {
205+
inner: unsafe { self.raw.par_iter() },
206+
marker: PhantomData,
207+
}
208+
}
209+
}
210+
211+
#[cfg(test)]
212+
mod test_par_table {
213+
use alloc::vec::Vec;
214+
use core::sync::atomic::{AtomicUsize, Ordering};
215+
216+
use rayon::prelude::*;
217+
218+
use crate::{
219+
hash_map::{make_hash, DefaultHashBuilder},
220+
hash_table::HashTable,
221+
};
222+
223+
#[test]
224+
fn test_iterate() {
225+
let hasher = DefaultHashBuilder::default();
226+
let mut a = HashTable::new();
227+
for i in 0..32 {
228+
a.insert_unique(make_hash(&hasher, &i), i, |x| make_hash(&hasher, x));
229+
}
230+
let observed = AtomicUsize::new(0);
231+
a.par_iter().for_each(|k| {
232+
observed.fetch_or(1 << *k, Ordering::Relaxed);
233+
});
234+
assert_eq!(observed.into_inner(), 0xFFFF_FFFF);
235+
}
236+
237+
#[test]
238+
fn test_move_iter() {
239+
let hasher = DefaultHashBuilder::default();
240+
let hs = {
241+
let mut hs = HashTable::new();
242+
243+
hs.insert_unique(make_hash(&hasher, &'a'), 'a', |x| make_hash(&hasher, x));
244+
hs.insert_unique(make_hash(&hasher, &'b'), 'b', |x| make_hash(&hasher, x));
245+
246+
hs
247+
};
248+
249+
let v = hs.into_par_iter().collect::<Vec<char>>();
250+
assert!(v == ['a', 'b'] || v == ['b', 'a']);
251+
}
252+
}

src/lib.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ mod map;
8181
mod rustc_entry;
8282
mod scopeguard;
8383
mod set;
84+
mod table;
8485

8586
pub mod hash_map {
8687
//! A hash map implemented with quadratic probing and SIMD lookup.
@@ -113,9 +114,24 @@ pub mod hash_set {
113114
pub use crate::external_trait_impls::rayon::set::*;
114115
}
115116
}
117+
pub mod hash_table {
118+
//! A hash table implemented with quadratic probing and SIMD lookup.
119+
pub use crate::table::*;
120+
121+
#[cfg(feature = "rayon")]
122+
/// [rayon]-based parallel iterator types for hash tables.
123+
/// You will rarely need to interact with it directly unless you have need
124+
/// to name one of the iterator types.
125+
///
126+
/// [rayon]: https://docs.rs/rayon/1.0/rayon
127+
pub mod rayon {
128+
pub use crate::external_trait_impls::rayon::table::*;
129+
}
130+
}
116131

117132
pub use crate::map::HashMap;
118133
pub use crate::set::HashSet;
134+
pub use crate::table::HashTable;
119135

120136
#[cfg(feature = "equivalent")]
121137
pub use equivalent::Equivalent;

src/map.rs

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawTable};
1+
use crate::raw::{
2+
Allocator, Bucket, Global, RawDrain, RawExtractIf, RawIntoIter, RawIter, RawTable,
3+
};
24
use crate::{Equivalent, TryReserveError};
35
use core::borrow::Borrow;
46
use core::fmt::{self, Debug};
@@ -979,7 +981,7 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
979981
{
980982
ExtractIf {
981983
f,
982-
inner: ExtractIfInner {
984+
inner: RawExtractIf {
983985
iter: unsafe { self.table.iter() },
984986
table: &mut self.table,
985987
},
@@ -2724,7 +2726,7 @@ where
27242726
F: FnMut(&K, &mut V) -> bool,
27252727
{
27262728
f: F,
2727-
inner: ExtractIfInner<'a, K, V, A>,
2729+
inner: RawExtractIf<'a, (K, V), A>,
27282730
}
27292731

27302732
impl<K, V, F, A> Iterator for ExtractIf<'_, K, V, F, A>
@@ -2736,7 +2738,7 @@ where
27362738

27372739
#[cfg_attr(feature = "inline-more", inline)]
27382740
fn next(&mut self) -> Option<Self::Item> {
2739-
self.inner.next(&mut self.f)
2741+
self.inner.next(|&mut (ref k, ref mut v)| (self.f)(k, v))
27402742
}
27412743

27422744
#[inline]
@@ -2747,30 +2749,6 @@ where
27472749

27482750
impl<K, V, F> FusedIterator for ExtractIf<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {}
27492751

2750-
/// Portions of `ExtractIf` shared with `set::ExtractIf`
2751-
pub(super) struct ExtractIfInner<'a, K, V, A: Allocator> {
2752-
pub iter: RawIter<(K, V)>,
2753-
pub table: &'a mut RawTable<(K, V), A>,
2754-
}
2755-
2756-
impl<K, V, A: Allocator> ExtractIfInner<'_, K, V, A> {
2757-
#[cfg_attr(feature = "inline-more", inline)]
2758-
pub(super) fn next<F>(&mut self, f: &mut F) -> Option<(K, V)>
2759-
where
2760-
F: FnMut(&K, &mut V) -> bool,
2761-
{
2762-
unsafe {
2763-
for item in &mut self.iter {
2764-
let &mut (ref key, ref mut value) = item.as_mut();
2765-
if f(key, value) {
2766-
return Some(self.table.remove(item).0);
2767-
}
2768-
}
2769-
}
2770-
None
2771-
}
2772-
}
2773-
27742752
/// A mutable iterator over the values of a `HashMap` in arbitrary order.
27752753
/// The iterator element type is `&'a mut V`.
27762754
///

src/raw/mod.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4457,6 +4457,28 @@ impl Iterator for RawIterHashInner {
44574457
}
44584458
}
44594459

4460+
pub(crate) struct RawExtractIf<'a, T, A: Allocator> {
4461+
pub iter: RawIter<T>,
4462+
pub table: &'a mut RawTable<T, A>,
4463+
}
4464+
4465+
impl<T, A: Allocator> RawExtractIf<'_, T, A> {
4466+
#[cfg_attr(feature = "inline-more", inline)]
4467+
pub(crate) fn next<F>(&mut self, mut f: F) -> Option<T>
4468+
where
4469+
F: FnMut(&mut T) -> bool,
4470+
{
4471+
unsafe {
4472+
for item in &mut self.iter {
4473+
if f(item.as_mut()) {
4474+
return Some(self.table.remove(item).0);
4475+
}
4476+
}
4477+
}
4478+
None
4479+
}
4480+
}
4481+
44604482
#[cfg(test)]
44614483
mod test_map {
44624484
use super::*;

0 commit comments

Comments
 (0)