Skip to content

Commit 9ec266a

Browse files
committed
Doc all bucket functons
1 parent a24996e commit 9ec266a

File tree

1 file changed

+236
-9
lines changed

1 file changed

+236
-9
lines changed

src/raw/mod.rs

Lines changed: 236 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -905,10 +905,33 @@ impl<T, A: Allocator> RawTable<T, A> {
905905
&self.alloc
906906
}
907907

908-
/// Returns pointer to one past last element of data table.
908+
/// Returns pointer to one past last `data` element in the the table as viewed from
909+
/// the start point of the allocation.
910+
///
911+
/// The caller must ensure that the `RawTable` outlives the returned [`NonNull<T>`],
912+
/// otherwise using it may result in [`undefined behavior`].
909913
#[inline]
910-
pub unsafe fn data_end(&self) -> NonNull<T> {
911-
NonNull::new_unchecked(self.table.ctrl.as_ptr().cast())
914+
pub fn data_end(&self) -> NonNull<T> {
915+
// SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe
916+
//
917+
// `self.table.ctrl.as_ptr().cast()` returns pointer that
918+
// points here (to the end of `T0`)
919+
// ∨
920+
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
921+
// \________ ________/
922+
// \/
923+
// `n = buckets - 1`, i.e. `RawTable::buckets() - 1`
924+
//
925+
// where: T0...T_n - our stored data;
926+
// CT0...CT_n - control bytes or metadata for `data`.
927+
// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
928+
// with loading `Group` bytes from the heap works properly, even if the result
929+
// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
930+
// `RawTableInner::set_ctrl` function.
931+
//
932+
// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
933+
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
934+
unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) }
912935
}
913936

914937
/// Returns pointer to start of data table.
@@ -940,8 +963,55 @@ impl<T, A: Allocator> RawTable<T, A> {
940963
}
941964

942965
/// Returns a pointer to an element in the table.
966+
///
967+
/// The caller must ensure that the `RawTable` outlives the returned [`Bucket<T>`],
968+
/// otherwise using it may result in [`undefined behavior`].
969+
///
970+
/// # Safety
971+
///
972+
/// If `mem::size_of::<T>() != 0`, then the caller of this function must observe the
973+
/// following safety rules:
974+
///
975+
/// * The table must already be allocated;
976+
///
977+
/// * The `index` must not be greater than the number returned by the [`RawTable::buckets`]
978+
/// function, i.e. `(index + 1) <= self.buckets()`.
979+
///
980+
/// It is safe to call this function with index of zero (`index == 0`) on a table that has
981+
/// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
982+
///
983+
/// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
984+
/// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
985+
/// `(index + 1) <= self.buckets()`.
986+
///
987+
/// [`RawTable::buckets`]: RawTable::buckets
988+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
943989
#[inline]
944990
pub unsafe fn bucket(&self, index: usize) -> Bucket<T> {
991+
// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
992+
// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
993+
// the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"):
994+
//
995+
// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
996+
// part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`)
997+
// |
998+
// | `base = self.data_end()` points here
999+
// | (to the start of CT0 or to the end of T0)
1000+
// v v
1001+
// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
1002+
// ^ \__________ __________/
1003+
// `table.bucket(3)` returns a pointer that points \/
1004+
// here in the `data` part of the `RawTable` (to additional control bytes
1005+
// the end of T3) `m = Group::WIDTH - 1`
1006+
//
1007+
// where: T0...T_n - our stored data;
1008+
// CT0...CT_n - control bytes or metadata for `data`;
1009+
// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
1010+
// the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask`
1011+
// is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function.
1012+
//
1013+
// P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
1014+
// of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`.
9451015
debug_assert_ne!(self.table.bucket_mask, 0);
9461016
debug_assert!(index < self.buckets());
9471017
Bucket::from_base_index(self.data_end(), index)
@@ -2212,6 +2282,9 @@ impl RawTableInner {
22122282
///
22132283
/// * The [`RawTableInner`] must have properly initialized control bytes.
22142284
///
2285+
/// The type `T` must be the actual type of the elements stored in the table,
2286+
/// otherwise using the returned [`RawIter`] results in [`undefined behavior`].
2287+
///
22152288
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
22162289
#[inline]
22172290
unsafe fn iter<T>(&self) -> RawIter<T> {
@@ -2228,13 +2301,20 @@ impl RawTableInner {
22282301
// `ctrl` points here (to the start
22292302
// of the first control byte `CT0`)
22302303
// ∨
2231-
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
2304+
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
22322305
// \________ ________/
22332306
// \/
2234-
// `n = buckets - 1`, i.e. `RawIndexTableInner::buckets() - 1`
2307+
// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
22352308
//
22362309
// where: T0...T_n - our stored data;
22372310
// CT0...CT_n - control bytes or metadata for `data`.
2311+
// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2312+
// with loading `Group` bytes from the heap works properly, even if the result
2313+
// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2314+
// `RawTableInner::set_ctrl` function.
2315+
//
2316+
// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2317+
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
22382318
let data = Bucket::from_base_index(self.data_end(), 0);
22392319
RawIter {
22402320
// SAFETY: See explanation above
@@ -2258,6 +2338,9 @@ impl RawTableInner {
22582338
///
22592339
/// # Safety
22602340
///
2341+
/// The type `T` must be the actual type of the elements stored in the table,
2342+
/// otherwise calling this function may result in [`undefined behavior`].
2343+
///
22612344
/// If `T` is a type that should be dropped and **the table is not empty**,
22622345
/// calling this function more than once results in [`undefined behavior`].
22632346
///
@@ -2309,6 +2392,8 @@ impl RawTableInner {
23092392
///
23102393
/// * Calling this function more than once;
23112394
///
2395+
/// * The type `T` must be the actual type of the elements stored in the table.
2396+
///
23122397
/// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
23132398
/// to allocate this table.
23142399
///
@@ -2348,13 +2433,116 @@ impl RawTableInner {
23482433
}
23492434
}
23502435

2436+
/// Returns a pointer to an element in the table (convenience for
2437+
/// `Bucket::from_base_index(self.data_end::<T>(), index)`).
2438+
///
2439+
/// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket<T>`],
2440+
/// otherwise using it may result in [`undefined behavior`].
2441+
///
2442+
/// # Safety
2443+
///
2444+
/// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived from the
2445+
/// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling
2446+
/// this function, the following safety rules must be observed:
2447+
///
2448+
/// * The table must already be allocated;
2449+
///
2450+
/// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2451+
/// function, i.e. `(index + 1) <= self.buckets()`.
2452+
///
2453+
/// * The type `T` must be the actual type of the elements stored in the table, otherwise
2454+
/// using the returned [`Bucket`] may result in [`undefined behavior`].
2455+
///
2456+
/// It is safe to call this function with index of zero (`index == 0`) on a table that has
2457+
/// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
2458+
///
2459+
/// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
2460+
/// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
2461+
/// `(index + 1) <= self.buckets()`.
2462+
///
2463+
/// ```none
2464+
/// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2465+
/// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2466+
/// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2467+
///
2468+
/// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
2469+
/// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`])
2470+
/// |
2471+
/// | `base = table.data_end::<T>()` points here
2472+
/// | (to the start of CT0 or to the end of T0)
2473+
/// v v
2474+
/// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2475+
/// ^ \__________ __________/
2476+
/// `table.bucket(3)` returns a pointer that points \/
2477+
/// here in the `data` part of the `RawTableInner` additional control bytes
2478+
/// (to the end of T3) `m = Group::WIDTH - 1`
2479+
///
2480+
/// where: T0...T_n - our stored data;
2481+
/// CT0...CT_n - control bytes or metadata for `data`;
2482+
/// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2483+
/// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2484+
/// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2485+
///
2486+
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2487+
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2488+
/// ```
2489+
///
2490+
/// [`Bucket::from_base_index`]: Bucket::from_base_index
2491+
/// [`RawTableInner::buckets`]: RawTableInner::buckets
2492+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23512493
#[inline]
23522494
unsafe fn bucket<T>(&self, index: usize) -> Bucket<T> {
23532495
debug_assert_ne!(self.bucket_mask, 0);
23542496
debug_assert!(index < self.buckets());
23552497
Bucket::from_base_index(self.data_end(), index)
23562498
}
23572499

2500+
/// Returns a raw `*mut u8` pointer to the start of the `data` element in the table
2501+
/// (convenience for `self.data_end::<u8>().as_ptr().sub((index + 1) * size_of)`).
2502+
///
2503+
/// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`,
2504+
/// otherwise using it may result in [`undefined behavior`].
2505+
///
2506+
/// # Safety
2507+
///
2508+
/// If any of the following conditions are violated, the result is [`undefined behavior`]:
2509+
///
2510+
/// * The table must already be allocated;
2511+
///
2512+
/// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2513+
/// function, i.e. `(index + 1) <= self.buckets()`;
2514+
///
2515+
/// * The `size_of` must be equal to the size of the elements stored in the table;
2516+
///
2517+
/// ```none
2518+
/// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2519+
/// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2520+
/// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2521+
///
2522+
/// `table.bucket_ptr(3, mem::size_of::<T>())` returns a pointer that points here in the
2523+
/// `data` part of the `RawTableInner`, i.e. to the start of T3
2524+
/// |
2525+
/// | `base = table.data_end::<u8>()` points here
2526+
/// | (to the start of CT0 or to the end of T0)
2527+
/// v v
2528+
/// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2529+
/// \__________ __________/
2530+
/// \/
2531+
/// additional control bytes
2532+
/// `m = Group::WIDTH - 1`
2533+
///
2534+
/// where: T0...T_n - our stored data;
2535+
/// CT0...CT_n - control bytes or metadata for `data`;
2536+
/// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2537+
/// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2538+
/// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2539+
///
2540+
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2541+
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2542+
/// ```
2543+
///
2544+
/// [`RawTableInner::buckets`]: RawTableInner::buckets
2545+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23582546
#[inline]
23592547
unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 {
23602548
debug_assert_ne!(self.bucket_mask, 0);
@@ -2363,9 +2551,47 @@ impl RawTableInner {
23632551
base.sub((index + 1) * size_of)
23642552
}
23652553

2554+
/// Returns pointer to one past last `data` element in the the table as viewed from
2555+
/// the start point of the allocation (convenience for `self.ctrl.cast()`).
2556+
///
2557+
/// This function actually returns a pointer to the end of the `data element` at
2558+
/// index "0" (zero).
2559+
///
2560+
/// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull<T>`],
2561+
/// otherwise using it may result in [`undefined behavior`].
2562+
///
2563+
/// # Safety
2564+
///
2565+
/// The type `T` must be the actual type of the elements stored in the table, otherwise
2566+
/// using the returned [`NonNull<T>`] may result in [`undefined behavior`].
2567+
///
2568+
/// ```none
2569+
/// `table.data_end::<T>()` returns pointer that points here
2570+
/// (to the end of `T0`)
2571+
/// ∨
2572+
/// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
2573+
/// \________ ________/
2574+
/// \/
2575+
/// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
2576+
///
2577+
/// where: T0...T_n - our stored data;
2578+
/// CT0...CT_n - control bytes or metadata for `data`.
2579+
/// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2580+
/// with loading `Group` bytes from the heap works properly, even if the result
2581+
/// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2582+
/// `RawTableInner::set_ctrl` function.
2583+
///
2584+
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2585+
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2586+
/// ```
2587+
///
2588+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23662589
#[inline]
2367-
unsafe fn data_end<T>(&self) -> NonNull<T> {
2368-
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
2590+
fn data_end<T>(&self) -> NonNull<T> {
2591+
unsafe {
2592+
// SAFETY: `self.ctrl` is `NonNull`, so casting it is safe
2593+
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
2594+
}
23692595
}
23702596

23712597
/// Returns an iterator-like object for a probe sequence on the table.
@@ -2758,7 +2984,7 @@ impl RawTableInner {
27582984
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
27592985
// \________ ________/
27602986
// \/
2761-
// `n = buckets - 1`, i.e. `RawIndexTableInner::buckets() - 1`
2987+
// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
27622988
//
27632989
// where: T0...T_n - our stored data;
27642990
// CT0...CT_n - control bytes or metadata for `data`.
@@ -3000,7 +3226,7 @@ impl RawTableInner {
30003226
///
30013227
/// # Note
30023228
///
3003-
/// This function must be called only after [`drop_elements`](RawTable::drop_elements),
3229+
/// This function must be called only after [`drop_elements`](RawTableInner::drop_elements),
30043230
/// else it can lead to leaking of memory. Also calling this function automatically
30053231
/// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid
30063232
/// (dangling) the `ctrl` field of the table.
@@ -3521,6 +3747,7 @@ impl<T> RawIterRange<T> {
35213747
///
35223748
/// * The `len` must be a power of two.
35233749
///
3750+
/// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
35243751
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
35253752
#[cfg_attr(feature = "inline-more", inline)]
35263753
unsafe fn new(ctrl: *const u8, data: Bucket<T>, len: usize) -> Self {

0 commit comments

Comments
 (0)