Skip to content

Commit d278523

Browse files
committed
Fixup insertion slot separately
1 parent 4947248 commit d278523

File tree

1 file changed

+41
-44
lines changed

1 file changed

+41
-44
lines changed

src/raw/mod.rs

Lines changed: 41 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,51 +1601,50 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16011601
}
16021602
}
16031603

1604+
/// Fixes up an insertion slot due to false positives for groups smaller than the group width.
1605+
#[inline]
1606+
unsafe fn fix_insert_slot(&self, index: usize) -> usize {
1607+
// In tables smaller than the group width, trailing control
1608+
// bytes outside the range of the table are filled with
1609+
// EMPTY entries. These will unfortunately trigger a
1610+
// match, but once masked may point to a full bucket that
1611+
// is already occupied. We detect this situation here and
1612+
// perform a second scan starting at the beginning of the
1613+
// table. This second scan is guaranteed to find an empty
1614+
// slot (due to the load factor) before hitting the trailing
1615+
// control bytes (containing EMPTY).
1616+
if unlikely(self.is_bucket_full(index)) {
1617+
debug_assert!(self.bucket_mask < Group::WIDTH);
1618+
// SAFETY:
1619+
//
1620+
// * We are in range and `ptr = self.ctrl(0)` are valid for reads
1621+
// and properly aligned, because the table is already allocated
1622+
// (see `TableLayout::calculate_layout_for` and `ptr::read`);
1623+
//
1624+
// * For tables larger than the group width, we will never end up in the given
1625+
// branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1626+
// full bucket index. For tables smaller than the group width, calling the
1627+
// `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1628+
// safe, as the trailing control bytes outside the range of the table are filled
1629+
// with EMPTY bytes, so this second scan either finds an empty slot (due to the
1630+
// load factor) or hits the trailing control bytes (containing EMPTY). See
1631+
// `intrinsics::cttz_nonzero` for more information.
1632+
Group::load_aligned(self.ctrl(0))
1633+
.match_empty_or_deleted()
1634+
.lowest_set_bit_nonzero()
1635+
} else {
1636+
index
1637+
}
1638+
}
1639+
16041640
/// Finds the position to insert something in a group.
1641+
/// This may have false positives and must be fixed up with `fix_insert_slot` before it's used.
16051642
#[inline]
16061643
fn find_insert_slot_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option<usize> {
16071644
let bit = group.match_empty_or_deleted().lowest_set_bit();
16081645

16091646
if likely(bit.is_some()) {
1610-
let mut index = (probe_seq.pos + bit.unwrap()) & self.bucket_mask;
1611-
1612-
// In tables smaller than the group width, trailing control
1613-
// bytes outside the range of the table are filled with
1614-
// EMPTY entries. These will unfortunately trigger a
1615-
// match, but once masked may point to a full bucket that
1616-
// is already occupied. We detect this situation here and
1617-
// perform a second scan starting at the beginning of the
1618-
// table. This second scan is guaranteed to find an empty
1619-
// slot (due to the load factor) before hitting the trailing
1620-
// control bytes (containing EMPTY).
1621-
//
1622-
// SAFETY: The `index` is guaranteed to be in range `0..self.bucket_mask`
1623-
// due to masking with `self.bucket_mask`
1624-
unsafe {
1625-
if unlikely(self.is_bucket_full(index)) {
1626-
debug_assert!(self.bucket_mask < Group::WIDTH);
1627-
debug_assert_ne!(probe_seq.pos, 0);
1628-
// SAFETY:
1629-
//
1630-
// * We are in range and `ptr = self.ctrl(0)` are valid for reads
1631-
// and properly aligned, because the table is already allocated
1632-
// (see `TableLayout::calculate_layout_for` and `ptr::read`);
1633-
//
1634-
// * For tables larger than the group width, we will never end up in the given
1635-
// branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1636-
// full bucket index. For tables smaller than the group width, calling the
1637-
// `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1638-
// safe, as the trailing control bytes outside the range of the table are filled
1639-
// with EMPTY bytes, so this second scan either finds an empty slot (due to the
1640-
// load factor) or hits the trailing control bytes (containing EMPTY). See
1641-
// `intrinsics::cttz_nonzero` for more information.
1642-
index = Group::load_aligned(self.ctrl(0))
1643-
.match_empty_or_deleted()
1644-
.lowest_set_bit_nonzero();
1645-
}
1646-
}
1647-
1648-
Some(index)
1647+
Some((probe_seq.pos + bit.unwrap()) & self.bucket_mask)
16491648
} else {
16501649
None
16511650
}
@@ -1690,10 +1689,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16901689
// We must have found a insert slot by now, since the current group contains at
16911690
// least one. For tables smaller than the group width, there will still be an
16921691
// empty element in the current (and only) group due to the load factor.
1693-
debug_assert!(insert_slot.is_some());
1694-
match insert_slot {
1695-
Some(insert_slot) => return (insert_slot, false),
1696-
None => unsafe { hint::unreachable_unchecked() },
1692+
unsafe {
1693+
return (self.fix_insert_slot(insert_slot.unwrap_unchecked()), false);
16971694
}
16981695
}
16991696

@@ -1756,7 +1753,7 @@ impl<A: Allocator + Clone> RawTableInner<A> {
17561753
let index = self.find_insert_slot_in_group(&group, &probe_seq);
17571754

17581755
if likely(index.is_some()) {
1759-
return index.unwrap();
1756+
return self.fix_insert_slot(index.unwrap_unchecked());
17601757
}
17611758
}
17621759
probe_seq.move_next(self.bucket_mask);

0 commit comments

Comments
 (0)