@@ -1601,51 +1601,50 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1601
1601
}
1602
1602
}
1603
1603
1604
+ /// Fixes up an insertion slot due to false positives for groups smaller than the group width.
1605
+ #[ inline]
1606
+ unsafe fn fix_insert_slot ( & self , index : usize ) -> usize {
1607
+ // In tables smaller than the group width, trailing control
1608
+ // bytes outside the range of the table are filled with
1609
+ // EMPTY entries. These will unfortunately trigger a
1610
+ // match, but once masked may point to a full bucket that
1611
+ // is already occupied. We detect this situation here and
1612
+ // perform a second scan starting at the beginning of the
1613
+ // table. This second scan is guaranteed to find an empty
1614
+ // slot (due to the load factor) before hitting the trailing
1615
+ // control bytes (containing EMPTY).
1616
+ if unlikely ( self . is_bucket_full ( index) ) {
1617
+ debug_assert ! ( self . bucket_mask < Group :: WIDTH ) ;
1618
+ // SAFETY:
1619
+ //
1620
+ // * We are in range and `ptr = self.ctrl(0)` are valid for reads
1621
+ // and properly aligned, because the table is already allocated
1622
+ // (see `TableLayout::calculate_layout_for` and `ptr::read`);
1623
+ //
1624
+ // * For tables larger than the group width, we will never end up in the given
1625
+ // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1626
+ // full bucket index. For tables smaller than the group width, calling the
1627
+ // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1628
+ // safe, as the trailing control bytes outside the range of the table are filled
1629
+ // with EMPTY bytes, so this second scan either finds an empty slot (due to the
1630
+ // load factor) or hits the trailing control bytes (containing EMPTY). See
1631
+ // `intrinsics::cttz_nonzero` for more information.
1632
+ Group :: load_aligned ( self . ctrl ( 0 ) )
1633
+ . match_empty_or_deleted ( )
1634
+ . lowest_set_bit_nonzero ( )
1635
+ } else {
1636
+ index
1637
+ }
1638
+ }
1639
+
1604
1640
/// Finds the position to insert something in a group.
1641
+ /// This may have false positives and must be fixed up with `fix_insert_slot` before it's used.
1605
1642
#[ inline]
1606
1643
fn find_insert_slot_in_group ( & self , group : & Group , probe_seq : & ProbeSeq ) -> Option < usize > {
1607
1644
let bit = group. match_empty_or_deleted ( ) . lowest_set_bit ( ) ;
1608
1645
1609
1646
if likely ( bit. is_some ( ) ) {
1610
- let mut index = ( probe_seq. pos + bit. unwrap ( ) ) & self . bucket_mask ;
1611
-
1612
- // In tables smaller than the group width, trailing control
1613
- // bytes outside the range of the table are filled with
1614
- // EMPTY entries. These will unfortunately trigger a
1615
- // match, but once masked may point to a full bucket that
1616
- // is already occupied. We detect this situation here and
1617
- // perform a second scan starting at the beginning of the
1618
- // table. This second scan is guaranteed to find an empty
1619
- // slot (due to the load factor) before hitting the trailing
1620
- // control bytes (containing EMPTY).
1621
- //
1622
- // SAFETY: The `index` is guaranteed to be in range `0..self.bucket_mask`
1623
- // due to masking with `self.bucket_mask`
1624
- unsafe {
1625
- if unlikely ( self . is_bucket_full ( index) ) {
1626
- debug_assert ! ( self . bucket_mask < Group :: WIDTH ) ;
1627
- debug_assert_ne ! ( probe_seq. pos, 0 ) ;
1628
- // SAFETY:
1629
- //
1630
- // * We are in range and `ptr = self.ctrl(0)` are valid for reads
1631
- // and properly aligned, because the table is already allocated
1632
- // (see `TableLayout::calculate_layout_for` and `ptr::read`);
1633
- //
1634
- // * For tables larger than the group width, we will never end up in the given
1635
- // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1636
- // full bucket index. For tables smaller than the group width, calling the
1637
- // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1638
- // safe, as the trailing control bytes outside the range of the table are filled
1639
- // with EMPTY bytes, so this second scan either finds an empty slot (due to the
1640
- // load factor) or hits the trailing control bytes (containing EMPTY). See
1641
- // `intrinsics::cttz_nonzero` for more information.
1642
- index = Group :: load_aligned ( self . ctrl ( 0 ) )
1643
- . match_empty_or_deleted ( )
1644
- . lowest_set_bit_nonzero ( ) ;
1645
- }
1646
- }
1647
-
1648
- Some ( index)
1647
+ Some ( ( probe_seq. pos + bit. unwrap ( ) ) & self . bucket_mask )
1649
1648
} else {
1650
1649
None
1651
1650
}
@@ -1690,10 +1689,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1690
1689
// We must have found a insert slot by now, since the current group contains at
1691
1690
// least one. For tables smaller than the group width, there will still be an
1692
1691
// empty element in the current (and only) group due to the load factor.
1693
- debug_assert ! ( insert_slot. is_some( ) ) ;
1694
- match insert_slot {
1695
- Some ( insert_slot) => return ( insert_slot, false ) ,
1696
- None => unsafe { hint:: unreachable_unchecked ( ) } ,
1692
+ unsafe {
1693
+ return ( self . fix_insert_slot ( insert_slot. unwrap_unchecked ( ) ) , false ) ;
1697
1694
}
1698
1695
}
1699
1696
@@ -1756,7 +1753,7 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1756
1753
let index = self . find_insert_slot_in_group ( & group, & probe_seq) ;
1757
1754
1758
1755
if likely ( index. is_some ( ) ) {
1759
- return index. unwrap ( ) ;
1756
+ return self . fix_insert_slot ( index. unwrap_unchecked ( ) ) ;
1760
1757
}
1761
1758
}
1762
1759
probe_seq. move_next ( self . bucket_mask ) ;
0 commit comments