@@ -905,10 +905,33 @@ impl<T, A: Allocator> RawTable<T, A> {
905
905
& self . alloc
906
906
}
907
907
908
- /// Returns pointer to one past last element of data table.
908
+ /// Returns pointer to one past last `data` element in the the table as viewed from
909
+ /// the start point of the allocation.
910
+ ///
911
+ /// The caller must ensure that the `RawTable` outlives the returned [`NonNull<T>`],
912
+ /// otherwise using it may result in [`undefined behavior`].
909
913
#[ inline]
910
- pub unsafe fn data_end ( & self ) -> NonNull < T > {
911
- NonNull :: new_unchecked ( self . table . ctrl . as_ptr ( ) . cast ( ) )
914
+ pub fn data_end ( & self ) -> NonNull < T > {
915
+ // SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe
916
+ //
917
+ // `self.table.ctrl.as_ptr().cast()` returns pointer that
918
+ // points here (to the end of `T0`)
919
+ // ∨
920
+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
921
+ // \________ ________/
922
+ // \/
923
+ // `n = buckets - 1`, i.e. `RawTable::buckets() - 1`
924
+ //
925
+ // where: T0...T_n - our stored data;
926
+ // CT0...CT_n - control bytes or metadata for `data`.
927
+ // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
928
+ // with loading `Group` bytes from the heap works properly, even if the result
929
+ // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
930
+ // `RawTableInner::set_ctrl` function.
931
+ //
932
+ // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
933
+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
934
+ unsafe { NonNull :: new_unchecked ( self . table . ctrl . as_ptr ( ) . cast ( ) ) }
912
935
}
913
936
914
937
/// Returns pointer to start of data table.
@@ -940,8 +963,55 @@ impl<T, A: Allocator> RawTable<T, A> {
940
963
}
941
964
942
965
/// Returns a pointer to an element in the table.
966
+ ///
967
+ /// The caller must ensure that the `RawTable` outlives the returned [`Bucket<T>`],
968
+ /// otherwise using it may result in [`undefined behavior`].
969
+ ///
970
+ /// # Safety
971
+ ///
972
+ /// If `mem::size_of::<T>() != 0`, then the caller of this function must observe the
973
+ /// following safety rules:
974
+ ///
975
+ /// * The table must already be allocated;
976
+ ///
977
+ /// * The `index` must not be greater than the number returned by the [`RawTable::buckets`]
978
+ /// function, i.e. `(index + 1) <= self.buckets()`.
979
+ ///
980
+ /// It is safe to call this function with index of zero (`index == 0`) on a table that has
981
+ /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
982
+ ///
983
+ /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
984
+ /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
985
+ /// `(index + 1) <= self.buckets()`.
986
+ ///
987
+ /// [`RawTable::buckets`]: RawTable::buckets
988
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
943
989
#[ inline]
944
990
pub unsafe fn bucket ( & self , index : usize ) -> Bucket < T > {
991
+ // If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
992
+ // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
993
+ // the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"):
994
+ //
995
+ // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
996
+ // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`)
997
+ // |
998
+ // | `base = self.data_end()` points here
999
+ // | (to the start of CT0 or to the end of T0)
1000
+ // v v
1001
+ // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
1002
+ // ^ \__________ __________/
1003
+ // `table.bucket(3)` returns a pointer that points \/
1004
+ // here in the `data` part of the `RawTable` (to additional control bytes
1005
+ // the end of T3) `m = Group::WIDTH - 1`
1006
+ //
1007
+ // where: T0...T_n - our stored data;
1008
+ // CT0...CT_n - control bytes or metadata for `data`;
1009
+ // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
1010
+ // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask`
1011
+ // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function.
1012
+ //
1013
+ // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
1014
+ // of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`.
945
1015
debug_assert_ne ! ( self . table. bucket_mask, 0 ) ;
946
1016
debug_assert ! ( index < self . buckets( ) ) ;
947
1017
Bucket :: from_base_index ( self . data_end ( ) , index)
@@ -2212,6 +2282,9 @@ impl RawTableInner {
2212
2282
///
2213
2283
/// * The [`RawTableInner`] must have properly initialized control bytes.
2214
2284
///
2285
+ /// The type `T` must be the actual type of the elements stored in the table,
2286
+ /// otherwise using the returned [`RawIter`] results in [`undefined behavior`].
2287
+ ///
2215
2288
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2216
2289
#[ inline]
2217
2290
unsafe fn iter < T > ( & self ) -> RawIter < T > {
@@ -2228,13 +2301,20 @@ impl RawTableInner {
2228
2301
// `ctrl` points here (to the start
2229
2302
// of the first control byte `CT0`)
2230
2303
// ∨
2231
- // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
2304
+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
2232
2305
// \________ ________/
2233
2306
// \/
2234
- // `n = buckets - 1`, i.e. `RawIndexTableInner ::buckets() - 1`
2307
+ // `n = buckets - 1`, i.e. `RawTableInner ::buckets() - 1`
2235
2308
//
2236
2309
// where: T0...T_n - our stored data;
2237
2310
// CT0...CT_n - control bytes or metadata for `data`.
2311
+ // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2312
+ // with loading `Group` bytes from the heap works properly, even if the result
2313
+ // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2314
+ // `RawTableInner::set_ctrl` function.
2315
+ //
2316
+ // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2317
+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2238
2318
let data = Bucket :: from_base_index ( self . data_end ( ) , 0 ) ;
2239
2319
RawIter {
2240
2320
// SAFETY: See explanation above
@@ -2258,6 +2338,9 @@ impl RawTableInner {
2258
2338
///
2259
2339
/// # Safety
2260
2340
///
2341
+ /// The type `T` must be the actual type of the elements stored in the table,
2342
+ /// otherwise calling this function may result in [`undefined behavior`].
2343
+ ///
2261
2344
/// If `T` is a type that should be dropped and **the table is not empty**,
2262
2345
/// calling this function more than once results in [`undefined behavior`].
2263
2346
///
@@ -2309,6 +2392,8 @@ impl RawTableInner {
2309
2392
///
2310
2393
/// * Calling this function more than once;
2311
2394
///
2395
+ /// * The type `T` must be the actual type of the elements stored in the table.
2396
+ ///
2312
2397
/// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
2313
2398
/// to allocate this table.
2314
2399
///
@@ -2348,13 +2433,116 @@ impl RawTableInner {
2348
2433
}
2349
2434
}
2350
2435
2436
+ /// Returns a pointer to an element in the table (convenience for
2437
+ /// `Bucket::from_base_index(self.data_end::<T>(), index)`).
2438
+ ///
2439
+ /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket<T>`],
2440
+ /// otherwise using it may result in [`undefined behavior`].
2441
+ ///
2442
+ /// # Safety
2443
+ ///
2444
+ /// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived from the
2445
+ /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling
2446
+ /// this function, the following safety rules must be observed:
2447
+ ///
2448
+ /// * The table must already be allocated;
2449
+ ///
2450
+ /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2451
+ /// function, i.e. `(index + 1) <= self.buckets()`.
2452
+ ///
2453
+ /// * The type `T` must be the actual type of the elements stored in the table, otherwise
2454
+ /// using the returned [`Bucket`] may result in [`undefined behavior`].
2455
+ ///
2456
+ /// It is safe to call this function with index of zero (`index == 0`) on a table that has
2457
+ /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
2458
+ ///
2459
+ /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
2460
+ /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
2461
+ /// `(index + 1) <= self.buckets()`.
2462
+ ///
2463
+ /// ```none
2464
+ /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2465
+ /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2466
+ /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2467
+ ///
2468
+ /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
2469
+ /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`])
2470
+ /// |
2471
+ /// | `base = table.data_end::<T>()` points here
2472
+ /// | (to the start of CT0 or to the end of T0)
2473
+ /// v v
2474
+ /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2475
+ /// ^ \__________ __________/
2476
+ /// `table.bucket(3)` returns a pointer that points \/
2477
+ /// here in the `data` part of the `RawTableInner` additional control bytes
2478
+ /// (to the end of T3) `m = Group::WIDTH - 1`
2479
+ ///
2480
+ /// where: T0...T_n - our stored data;
2481
+ /// CT0...CT_n - control bytes or metadata for `data`;
2482
+ /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2483
+ /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2484
+ /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2485
+ ///
2486
+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2487
+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2488
+ /// ```
2489
+ ///
2490
+ /// [`Bucket::from_base_index`]: Bucket::from_base_index
2491
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2492
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2351
2493
#[ inline]
2352
2494
unsafe fn bucket < T > ( & self , index : usize ) -> Bucket < T > {
2353
2495
debug_assert_ne ! ( self . bucket_mask, 0 ) ;
2354
2496
debug_assert ! ( index < self . buckets( ) ) ;
2355
2497
Bucket :: from_base_index ( self . data_end ( ) , index)
2356
2498
}
2357
2499
2500
+ /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table
2501
+ /// (convenience for `self.data_end::<u8>().as_ptr().sub((index + 1) * size_of)`).
2502
+ ///
2503
+ /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`,
2504
+ /// otherwise using it may result in [`undefined behavior`].
2505
+ ///
2506
+ /// # Safety
2507
+ ///
2508
+ /// If any of the following conditions are violated, the result is [`undefined behavior`]:
2509
+ ///
2510
+ /// * The table must already be allocated;
2511
+ ///
2512
+ /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2513
+ /// function, i.e. `(index + 1) <= self.buckets()`;
2514
+ ///
2515
+ /// * The `size_of` must be equal to the size of the elements stored in the table;
2516
+ ///
2517
+ /// ```none
2518
+ /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2519
+ /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2520
+ /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2521
+ ///
2522
+ /// `table.bucket_ptr(3, mem::size_of::<T>())` returns a pointer that points here in the
2523
+ /// `data` part of the `RawTableInner`, i.e. to the start of T3
2524
+ /// |
2525
+ /// | `base = table.data_end::<u8>()` points here
2526
+ /// | (to the start of CT0 or to the end of T0)
2527
+ /// v v
2528
+ /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2529
+ /// \__________ __________/
2530
+ /// \/
2531
+ /// additional control bytes
2532
+ /// `m = Group::WIDTH - 1`
2533
+ ///
2534
+ /// where: T0...T_n - our stored data;
2535
+ /// CT0...CT_n - control bytes or metadata for `data`;
2536
+ /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2537
+ /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2538
+ /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2539
+ ///
2540
+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2541
+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2542
+ /// ```
2543
+ ///
2544
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2545
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2358
2546
#[ inline]
2359
2547
unsafe fn bucket_ptr ( & self , index : usize , size_of : usize ) -> * mut u8 {
2360
2548
debug_assert_ne ! ( self . bucket_mask, 0 ) ;
@@ -2363,9 +2551,47 @@ impl RawTableInner {
2363
2551
base. sub ( ( index + 1 ) * size_of)
2364
2552
}
2365
2553
2554
+ /// Returns pointer to one past last `data` element in the the table as viewed from
2555
+ /// the start point of the allocation (convenience for `self.ctrl.cast()`).
2556
+ ///
2557
+ /// This function actually returns a pointer to the end of the `data element` at
2558
+ /// index "0" (zero).
2559
+ ///
2560
+ /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull<T>`],
2561
+ /// otherwise using it may result in [`undefined behavior`].
2562
+ ///
2563
+ /// # Safety
2564
+ ///
2565
+ /// The type `T` must be the actual type of the elements stored in the table, otherwise
2566
+ /// using the returned [`NonNull<T>`] may result in [`undefined behavior`].
2567
+ ///
2568
+ /// ```none
2569
+ /// `table.data_end::<T>()` returns pointer that points here
2570
+ /// (to the end of `T0`)
2571
+ /// ∨
2572
+ /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
2573
+ /// \________ ________/
2574
+ /// \/
2575
+ /// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
2576
+ ///
2577
+ /// where: T0...T_n - our stored data;
2578
+ /// CT0...CT_n - control bytes or metadata for `data`.
2579
+ /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2580
+ /// with loading `Group` bytes from the heap works properly, even if the result
2581
+ /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2582
+ /// `RawTableInner::set_ctrl` function.
2583
+ ///
2584
+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2585
+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2586
+ /// ```
2587
+ ///
2588
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2366
2589
#[ inline]
2367
- unsafe fn data_end < T > ( & self ) -> NonNull < T > {
2368
- NonNull :: new_unchecked ( self . ctrl . as_ptr ( ) . cast ( ) )
2590
+ fn data_end < T > ( & self ) -> NonNull < T > {
2591
+ unsafe {
2592
+ // SAFETY: `self.ctrl` is `NonNull`, so casting it is safe
2593
+ NonNull :: new_unchecked ( self . ctrl . as_ptr ( ) . cast ( ) )
2594
+ }
2369
2595
}
2370
2596
2371
2597
/// Returns an iterator-like object for a probe sequence on the table.
@@ -2758,7 +2984,7 @@ impl RawTableInner {
2758
2984
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
2759
2985
// \________ ________/
2760
2986
// \/
2761
- // `n = buckets - 1`, i.e. `RawIndexTableInner ::buckets() - 1`
2987
+ // `n = buckets - 1`, i.e. `RawTableInner ::buckets() - 1`
2762
2988
//
2763
2989
// where: T0...T_n - our stored data;
2764
2990
// CT0...CT_n - control bytes or metadata for `data`.
@@ -3000,7 +3226,7 @@ impl RawTableInner {
3000
3226
///
3001
3227
/// # Note
3002
3228
///
3003
- /// This function must be called only after [`drop_elements`](RawTable ::drop_elements),
3229
+ /// This function must be called only after [`drop_elements`](RawTableInner ::drop_elements),
3004
3230
/// else it can lead to leaking of memory. Also calling this function automatically
3005
3231
/// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid
3006
3232
/// (dangling) the `ctrl` field of the table.
@@ -3521,6 +3747,7 @@ impl<T> RawIterRange<T> {
3521
3747
///
3522
3748
/// * The `len` must be a power of two.
3523
3749
///
3750
+ /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
3524
3751
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
3525
3752
#[ cfg_attr( feature = "inline-more" , inline) ]
3526
3753
unsafe fn new ( ctrl : * const u8 , data : Bucket < T > , len : usize ) -> Self {
0 commit comments