Skip to content

Commit 3d2d163

Browse files
committed
Auto merge of #470 - JustForFun88:doc_bucket_fn, r=Amanieu
Doc all bucket functions Further improvements to documentation. Now it’s time for all ***bucket*** functions. Additionally, the `RawTableInner::data_end<T>` and `RawTable::data_end` functions are now **safe** because they simply cast an existing pointer to a pointer of another type, which is safe.
2 parents 844b337 + 708b6e7 commit 3d2d163

File tree

3 files changed

+242
-9
lines changed

3 files changed

+242
-9
lines changed

src/map.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,8 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
944944
///
945945
/// Keeps the allocated memory for reuse.
946946
///
947+
/// [`retain()`]: HashMap::retain
948+
///
947949
/// # Examples
948950
///
949951
/// ```

src/raw/mod.rs

Lines changed: 238 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -905,10 +905,35 @@ impl<T, A: Allocator> RawTable<T, A> {
905905
&self.alloc
906906
}
907907

908-
/// Returns pointer to one past last element of data table.
908+
/// Returns pointer to one past last `data` element in the the table as viewed from
909+
/// the start point of the allocation.
910+
///
911+
/// The caller must ensure that the `RawTable` outlives the returned [`NonNull<T>`],
912+
/// otherwise using it may result in [`undefined behavior`].
913+
///
914+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
909915
#[inline]
910-
pub unsafe fn data_end(&self) -> NonNull<T> {
911-
NonNull::new_unchecked(self.table.ctrl.as_ptr().cast())
916+
pub fn data_end(&self) -> NonNull<T> {
917+
// SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe
918+
//
919+
// `self.table.ctrl.as_ptr().cast()` returns pointer that
920+
// points here (to the end of `T0`)
921+
// ∨
922+
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
923+
// \________ ________/
924+
// \/
925+
// `n = buckets - 1`, i.e. `RawTable::buckets() - 1`
926+
//
927+
// where: T0...T_n - our stored data;
928+
// CT0...CT_n - control bytes or metadata for `data`.
929+
// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
930+
// with loading `Group` bytes from the heap works properly, even if the result
931+
// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
932+
// `RawTableInner::set_ctrl` function.
933+
//
934+
// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
935+
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
936+
unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) }
912937
}
913938

914939
/// Returns pointer to start of data table.
@@ -940,8 +965,55 @@ impl<T, A: Allocator> RawTable<T, A> {
940965
}
941966

942967
/// Returns a pointer to an element in the table.
968+
///
969+
/// The caller must ensure that the `RawTable` outlives the returned [`Bucket<T>`],
970+
/// otherwise using it may result in [`undefined behavior`].
971+
///
972+
/// # Safety
973+
///
974+
/// If `mem::size_of::<T>() != 0`, then the caller of this function must observe the
975+
/// following safety rules:
976+
///
977+
/// * The table must already be allocated;
978+
///
979+
/// * The `index` must not be greater than the number returned by the [`RawTable::buckets`]
980+
/// function, i.e. `(index + 1) <= self.buckets()`.
981+
///
982+
/// It is safe to call this function with index of zero (`index == 0`) on a table that has
983+
/// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
984+
///
985+
/// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
986+
/// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
987+
/// `(index + 1) <= self.buckets()`.
988+
///
989+
/// [`RawTable::buckets`]: RawTable::buckets
990+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
943991
#[inline]
944992
pub unsafe fn bucket(&self, index: usize) -> Bucket<T> {
993+
// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
994+
// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
995+
// the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"):
996+
//
997+
// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
998+
// part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`)
999+
// |
1000+
// | `base = self.data_end()` points here
1001+
// | (to the start of CT0 or to the end of T0)
1002+
// v v
1003+
// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
1004+
// ^ \__________ __________/
1005+
// `table.bucket(3)` returns a pointer that points \/
1006+
// here in the `data` part of the `RawTable` (to additional control bytes
1007+
// the end of T3) `m = Group::WIDTH - 1`
1008+
//
1009+
// where: T0...T_n - our stored data;
1010+
// CT0...CT_n - control bytes or metadata for `data`;
1011+
// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
1012+
// the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask`
1013+
// is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function.
1014+
//
1015+
// P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
1016+
// of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`.
9451017
debug_assert_ne!(self.table.bucket_mask, 0);
9461018
debug_assert!(index < self.buckets());
9471019
Bucket::from_base_index(self.data_end(), index)
@@ -2212,6 +2284,9 @@ impl RawTableInner {
22122284
///
22132285
/// * The [`RawTableInner`] must have properly initialized control bytes.
22142286
///
2287+
/// The type `T` must be the actual type of the elements stored in the table,
2288+
/// otherwise using the returned [`RawIter`] results in [`undefined behavior`].
2289+
///
22152290
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
22162291
#[inline]
22172292
unsafe fn iter<T>(&self) -> RawIter<T> {
@@ -2228,13 +2303,20 @@ impl RawTableInner {
22282303
// `ctrl` points here (to the start
22292304
// of the first control byte `CT0`)
22302305
// ∨
2231-
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
2306+
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
22322307
// \________ ________/
22332308
// \/
2234-
// `n = buckets - 1`, i.e. `RawIndexTableInner::buckets() - 1`
2309+
// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
22352310
//
22362311
// where: T0...T_n - our stored data;
22372312
// CT0...CT_n - control bytes or metadata for `data`.
2313+
// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2314+
// with loading `Group` bytes from the heap works properly, even if the result
2315+
// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2316+
// `RawTableInner::set_ctrl` function.
2317+
//
2318+
// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2319+
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
22382320
let data = Bucket::from_base_index(self.data_end(), 0);
22392321
RawIter {
22402322
// SAFETY: See explanation above
@@ -2258,6 +2340,9 @@ impl RawTableInner {
22582340
///
22592341
/// # Safety
22602342
///
2343+
/// The type `T` must be the actual type of the elements stored in the table,
2344+
/// otherwise calling this function may result in [`undefined behavior`].
2345+
///
22612346
/// If `T` is a type that should be dropped and **the table is not empty**,
22622347
/// calling this function more than once results in [`undefined behavior`].
22632348
///
@@ -2309,6 +2394,8 @@ impl RawTableInner {
23092394
///
23102395
/// * Calling this function more than once;
23112396
///
2397+
/// * The type `T` must be the actual type of the elements stored in the table.
2398+
///
23122399
/// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
23132400
/// to allocate this table.
23142401
///
@@ -2348,13 +2435,116 @@ impl RawTableInner {
23482435
}
23492436
}
23502437

2438+
/// Returns a pointer to an element in the table (convenience for
2439+
/// `Bucket::from_base_index(self.data_end::<T>(), index)`).
2440+
///
2441+
/// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket<T>`],
2442+
/// otherwise using it may result in [`undefined behavior`].
2443+
///
2444+
/// # Safety
2445+
///
2446+
/// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived from the
2447+
/// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling
2448+
/// this function, the following safety rules must be observed:
2449+
///
2450+
/// * The table must already be allocated;
2451+
///
2452+
/// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2453+
/// function, i.e. `(index + 1) <= self.buckets()`.
2454+
///
2455+
/// * The type `T` must be the actual type of the elements stored in the table, otherwise
2456+
/// using the returned [`Bucket`] may result in [`undefined behavior`].
2457+
///
2458+
/// It is safe to call this function with index of zero (`index == 0`) on a table that has
2459+
/// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
2460+
///
2461+
/// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
2462+
/// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
2463+
/// `(index + 1) <= self.buckets()`.
2464+
///
2465+
/// ```none
2466+
/// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2467+
/// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2468+
/// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2469+
///
2470+
/// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
2471+
/// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`])
2472+
/// |
2473+
/// | `base = table.data_end::<T>()` points here
2474+
/// | (to the start of CT0 or to the end of T0)
2475+
/// v v
2476+
/// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2477+
/// ^ \__________ __________/
2478+
/// `table.bucket(3)` returns a pointer that points \/
2479+
/// here in the `data` part of the `RawTableInner` additional control bytes
2480+
/// (to the end of T3) `m = Group::WIDTH - 1`
2481+
///
2482+
/// where: T0...T_n - our stored data;
2483+
/// CT0...CT_n - control bytes or metadata for `data`;
2484+
/// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2485+
/// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2486+
/// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2487+
///
2488+
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2489+
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2490+
/// ```
2491+
///
2492+
/// [`Bucket::from_base_index`]: Bucket::from_base_index
2493+
/// [`RawTableInner::buckets`]: RawTableInner::buckets
2494+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23512495
#[inline]
23522496
unsafe fn bucket<T>(&self, index: usize) -> Bucket<T> {
23532497
debug_assert_ne!(self.bucket_mask, 0);
23542498
debug_assert!(index < self.buckets());
23552499
Bucket::from_base_index(self.data_end(), index)
23562500
}
23572501

2502+
/// Returns a raw `*mut u8` pointer to the start of the `data` element in the table
2503+
/// (convenience for `self.data_end::<u8>().as_ptr().sub((index + 1) * size_of)`).
2504+
///
2505+
/// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`,
2506+
/// otherwise using it may result in [`undefined behavior`].
2507+
///
2508+
/// # Safety
2509+
///
2510+
/// If any of the following conditions are violated, the result is [`undefined behavior`]:
2511+
///
2512+
/// * The table must already be allocated;
2513+
///
2514+
/// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2515+
/// function, i.e. `(index + 1) <= self.buckets()`;
2516+
///
2517+
/// * The `size_of` must be equal to the size of the elements stored in the table;
2518+
///
2519+
/// ```none
2520+
/// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2521+
/// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2522+
/// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2523+
///
2524+
/// `table.bucket_ptr(3, mem::size_of::<T>())` returns a pointer that points here in the
2525+
/// `data` part of the `RawTableInner`, i.e. to the start of T3
2526+
/// |
2527+
/// | `base = table.data_end::<u8>()` points here
2528+
/// | (to the start of CT0 or to the end of T0)
2529+
/// v v
2530+
/// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2531+
/// \__________ __________/
2532+
/// \/
2533+
/// additional control bytes
2534+
/// `m = Group::WIDTH - 1`
2535+
///
2536+
/// where: T0...T_n - our stored data;
2537+
/// CT0...CT_n - control bytes or metadata for `data`;
2538+
/// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2539+
/// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2540+
/// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2541+
///
2542+
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2543+
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2544+
/// ```
2545+
///
2546+
/// [`RawTableInner::buckets`]: RawTableInner::buckets
2547+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23582548
#[inline]
23592549
unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 {
23602550
debug_assert_ne!(self.bucket_mask, 0);
@@ -2363,9 +2553,47 @@ impl RawTableInner {
23632553
base.sub((index + 1) * size_of)
23642554
}
23652555

2556+
/// Returns pointer to one past last `data` element in the the table as viewed from
2557+
/// the start point of the allocation (convenience for `self.ctrl.cast()`).
2558+
///
2559+
/// This function actually returns a pointer to the end of the `data element` at
2560+
/// index "0" (zero).
2561+
///
2562+
/// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull<T>`],
2563+
/// otherwise using it may result in [`undefined behavior`].
2564+
///
2565+
/// # Note
2566+
///
2567+
/// The type `T` must be the actual type of the elements stored in the table, otherwise
2568+
/// using the returned [`NonNull<T>`] may result in [`undefined behavior`].
2569+
///
2570+
/// ```none
2571+
/// `table.data_end::<T>()` returns pointer that points here
2572+
/// (to the end of `T0`)
2573+
/// ∨
2574+
/// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
2575+
/// \________ ________/
2576+
/// \/
2577+
/// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
2578+
///
2579+
/// where: T0...T_n - our stored data;
2580+
/// CT0...CT_n - control bytes or metadata for `data`.
2581+
/// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2582+
/// with loading `Group` bytes from the heap works properly, even if the result
2583+
/// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2584+
/// `RawTableInner::set_ctrl` function.
2585+
///
2586+
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2587+
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2588+
/// ```
2589+
///
2590+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23662591
#[inline]
2367-
unsafe fn data_end<T>(&self) -> NonNull<T> {
2368-
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
2592+
fn data_end<T>(&self) -> NonNull<T> {
2593+
unsafe {
2594+
// SAFETY: `self.ctrl` is `NonNull`, so casting it is safe
2595+
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
2596+
}
23692597
}
23702598

23712599
/// Returns an iterator-like object for a probe sequence on the table.
@@ -2758,7 +2986,7 @@ impl RawTableInner {
27582986
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
27592987
// \________ ________/
27602988
// \/
2761-
// `n = buckets - 1`, i.e. `RawIndexTableInner::buckets() - 1`
2989+
// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
27622990
//
27632991
// where: T0...T_n - our stored data;
27642992
// CT0...CT_n - control bytes or metadata for `data`.
@@ -3000,7 +3228,7 @@ impl RawTableInner {
30003228
///
30013229
/// # Note
30023230
///
3003-
/// This function must be called only after [`drop_elements`](RawTable::drop_elements),
3231+
/// This function must be called only after [`drop_elements`](RawTableInner::drop_elements),
30043232
/// else it can lead to leaking of memory. Also calling this function automatically
30053233
/// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid
30063234
/// (dangling) the `ctrl` field of the table.
@@ -3521,6 +3749,7 @@ impl<T> RawIterRange<T> {
35213749
///
35223750
/// * The `len` must be a power of two.
35233751
///
3752+
/// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
35243753
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
35253754
#[cfg_attr(feature = "inline-more", inline)]
35263755
unsafe fn new(ctrl: *const u8, data: Bucket<T>, len: usize) -> Self {

src/set.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,8 @@ impl<T, S, A: Allocator> HashSet<T, S, A> {
383383
/// or the iteration short-circuits, then the remaining elements will be retained.
384384
/// Use [`retain()`] with a negated predicate if you do not need the returned iterator.
385385
///
386+
/// [`retain()`]: HashSet::retain
387+
///
386388
/// # Examples
387389
///
388390
/// ```

0 commit comments

Comments
 (0)