From 9ab08a63c424ff86dd86f6532f6ef00f6639751f Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Sun, 16 Jul 2023 20:05:48 +0200 Subject: [PATCH] More docs on builder utilities --- .../zerotrie/src/builder/branch_meta.rs | 4 +- experimental/zerotrie/src/builder/bytestr.rs | 4 + .../zerotrie/src/builder/konst/store.rs | 77 ++++++++++++++----- .../zerotrie/src/builder/nonconst/store.rs | 29 ++++++- 4 files changed, 92 insertions(+), 22 deletions(-) diff --git a/experimental/zerotrie/src/builder/branch_meta.rs b/experimental/zerotrie/src/builder/branch_meta.rs index 03dc3087d70..59fb881b6ab 100644 --- a/experimental/zerotrie/src/builder/branch_meta.rs +++ b/experimental/zerotrie/src/builder/branch_meta.rs @@ -5,7 +5,8 @@ /// Intermediate metadata for a branch node under construction. #[derive(Debug, Clone, Copy)] pub(crate) struct BranchMeta { - /// The lead byte for this branch. + /// The lead byte for this branch. Formerly it was required to be an ASCII byte, but now + /// it can be any byte. pub ascii: u8, /// The size in bytes of the trie data reachable from this branch. pub local_length: usize, @@ -16,6 +17,7 @@ pub(crate) struct BranchMeta { } impl BranchMeta { + /// Creates a new empty [`BranchMeta`]. pub const fn const_default() -> Self { BranchMeta { ascii: 0, diff --git a/experimental/zerotrie/src/builder/bytestr.rs b/experimental/zerotrie/src/builder/bytestr.rs index 11e614573fe..9910efd7ffd 100644 --- a/experimental/zerotrie/src/builder/bytestr.rs +++ b/experimental/zerotrie/src/builder/bytestr.rs @@ -7,6 +7,7 @@ use core::borrow::Borrow; #[cfg(feature = "serde")] use alloc::boxed::Box; +/// A struct transparent over `[u8]` with convenient helper functions. #[repr(transparent)] #[derive(PartialEq, Eq, PartialOrd, Ord)] pub(crate) struct ByteStr([u8]); @@ -71,10 +72,12 @@ impl ByteStr { self.0.get(index).copied() } + /// Returns the byte at the given index, panicking if out of bounds. pub(crate) const fn byte_at_or_panic(&self, index: usize) -> u8 { self.0[index] } + /// Const function to evaluate `self < other`. pub(crate) const fn is_less_then(&self, other: &Self) -> bool { let mut i = 0; while i < self.len() && i < other.len() { @@ -89,6 +92,7 @@ impl ByteStr { self.len() < other.len() } + /// Const function to evaluate `self[..prefix_len] == other[..prefix_len]` pub(crate) const fn prefix_eq(&self, other: &ByteStr, prefix_len: usize) -> bool { assert!(prefix_len <= self.len()); assert!(prefix_len <= other.len()); diff --git a/experimental/zerotrie/src/builder/konst/store.rs b/experimental/zerotrie/src/builder/konst/store.rs index 5b6cde31ec8..d2e6ad43be4 100644 --- a/experimental/zerotrie/src/builder/konst/store.rs +++ b/experimental/zerotrie/src/builder/konst/store.rs @@ -6,15 +6,21 @@ use super::super::branch_meta::BranchMeta; -/// A const-friendly slice type. +/// A const-friendly slice type. It is backed by a full slice but is primarily intended +/// to represent subslices of the full slice. We need this only because we can't take +/// subslices in const Rust. #[derive(Debug, Copy, Clone)] pub(crate) struct ConstSlice<'a, T> { + /// The full slice. full_slice: &'a [T], + /// The start index of the slice represented by this [`ConstSlice`]. start: usize, + /// The non-inclusive end index of the slice represented by this [`ConstSlice`]. limit: usize, } impl<'a, T> ConstSlice<'a, T> { + /// Creates a [`ConstSlice`] representing an entire slice. pub const fn from_slice(other: &'a [T]) -> Self { ConstSlice { full_slice: other, @@ -23,6 +29,7 @@ impl<'a, T> ConstSlice<'a, T> { } } + /// Creates a [`ConstSlice`] with the given start and limit. pub const fn from_manual_slice(full_slice: &'a [T], start: usize, limit: usize) -> Self { ConstSlice { full_slice, @@ -31,14 +38,17 @@ impl<'a, T> ConstSlice<'a, T> { } } + /// Returns the length of the [`ConstSlice`]. pub const fn len(&self) -> usize { self.limit - self.start } + /// Gets the element at `index`, panicking if not present. pub const fn get_or_panic(&self, index: usize) -> &T { &self.full_slice[index + self.start] } + /// Gets the first element or `None` if empty. #[cfg(test)] pub const fn first(&self) -> Option<&T> { if self.len() == 0 { @@ -48,6 +58,7 @@ impl<'a, T> ConstSlice<'a, T> { } } + /// Gets the last element or `None` if empty. pub const fn last(&self) -> Option<&T> { if self.len() == 0 { None @@ -56,6 +67,7 @@ impl<'a, T> ConstSlice<'a, T> { } } + /// Gets a subslice of this slice. #[cfg(test)] pub const fn get_subslice_or_panic( &self, @@ -71,6 +83,7 @@ impl<'a, T> ConstSlice<'a, T> { } } + /// Non-const function that returns this [`ConstSlice`] as a regular slice. #[cfg(any(test, feature = "alloc"))] pub fn as_slice(&self) -> &'a [T] { &self.full_slice[self.start..self.limit] @@ -98,6 +111,9 @@ impl Default for ConstArrayBuilder { } impl ConstArrayBuilder { + /// Creates a new, empty builder of the given size. `cursor` indicates where in the + /// array new elements will be inserted first. Since we use a lot of prepend operations, + /// it is common to set `cursor` to `N`. pub const fn new_empty(full_array: [T; N], cursor: usize) -> Self { assert!(cursor <= N); Self { @@ -107,6 +123,7 @@ impl ConstArrayBuilder { } } + /// Creates a new builder with some initial content in `[start, limit)`. pub const fn from_manual_slice(full_array: [T; N], start: usize, limit: usize) -> Self { assert!(start <= limit); assert!(limit <= N); @@ -117,39 +134,42 @@ impl ConstArrayBuilder { } } + /// Returns the number of initialized elements in the builder. pub const fn len(&self) -> usize { self.limit - self.start } + /// Whether there are no initialized elements in the builder. #[allow(dead_code)] pub const fn is_empty(&self) -> bool { self.len() == 0 } + /// Returns the initialized elements as a [`ConstSlice`]. pub const fn as_const_slice(&self) -> ConstSlice { ConstSlice::from_manual_slice(&self.full_array, self.start, self.limit) } + /// Non-const function that returns a slice of the initialized elements. #[cfg(feature = "alloc")] pub fn as_slice(&self) -> &[T] { &self.full_array[self.start..self.limit] } } -impl ConstArrayBuilder { - pub const fn const_bitor_assign(mut self, index: usize, other: u8) -> Self { - self.full_array[self.start + index] |= other; - self - } - // Can't be generic because T has a destructor - pub const fn const_take_or_panic(self) -> [u8; N] { +// Certain functions that involve dropping `T` require that it be `Copy` +impl ConstArrayBuilder { + /// Takes a fully initialized builder as an array. Panics if the builder is not + /// fully initialized. + pub const fn const_take_or_panic(self) -> [T; N] { if self.start != 0 || self.limit != N { panic!("AsciiTrieBuilder buffer too large"); } self.full_array } - // Can't be generic because T has a destructor - pub const fn const_push_front_or_panic(mut self, value: u8) -> Self { + + /// Prepends an element to the front of the builder, panicking if there is no room. + pub const fn const_push_front_or_panic(mut self, value: T) -> Self { if self.start == 0 { panic!("AsciiTrieBuilder buffer too small"); } @@ -157,8 +177,9 @@ impl ConstArrayBuilder { self.full_array[self.start] = value; self } - // Can't be generic because T has a destructor - pub const fn const_extend_front_or_panic(mut self, other: ConstSlice) -> Self { + + /// Prepends multiple elements to the front of the builder, panicking if there is no room. + pub const fn const_extend_front_or_panic(mut self, other: ConstSlice) -> Self { if self.start < other.len() { panic!("AsciiTrieBuilder buffer too small"); } @@ -172,15 +193,16 @@ impl ConstArrayBuilder { } } -impl ConstArrayBuilder { - pub const fn push_front_or_panic(mut self, value: T) -> Self { - if self.start == 0 { - panic!("AsciiTrieBuilder buffer too small"); - } - self.start -= 1; - self.full_array[self.start] = value; +impl ConstArrayBuilder { + /// Specialized function that performs `self[index] |= other` + pub const fn const_bitor_assign(mut self, index: usize, other: u8) -> Self { + self.full_array[self.start + index] |= other; self } +} + +impl ConstArrayBuilder { + /// Swaps the elements at positions `i` and `j`. #[cfg(feature = "alloc")] pub fn swap_or_panic(mut self, i: usize, j: usize) -> Self { self.full_array.swap(self.start + i, self.start + j); @@ -188,6 +210,11 @@ impl ConstArrayBuilder { } } +/// Evaluates a block over each element of a const slice. Takes three arguments: +/// +/// 1. Expression that resolves to the [`ConstSlice`]. +/// 2. Token that will be assigned the value of the element. +/// 3. Block to evaluate for each element. macro_rules! const_for_each { ($safe_const_slice:expr, $item:tt, $inner:expr) => {{ let mut i = 0; @@ -201,6 +228,7 @@ macro_rules! const_for_each { pub(crate) use const_for_each; +/// A data structure that holds up to N [`BranchMeta`] items. pub(crate) struct ConstLengthsStack { data: [Option; N], idx: usize, @@ -213,6 +241,7 @@ impl core::fmt::Debug for ConstLengthsStack { } impl ConstLengthsStack { + /// Creates a new empty [`ConstLengthsStack`]. pub const fn new() -> Self { Self { data: [None; N], @@ -220,10 +249,12 @@ impl ConstLengthsStack { } } + /// Returns whether the stack is empty. pub const fn is_empty(&self) -> bool { self.idx == 0 } + /// Adds a [`BranchMeta`] to the stack, panicking if there is no room. #[must_use] pub const fn push_or_panic(mut self, meta: BranchMeta) -> Self { if self.idx >= N { @@ -238,6 +269,8 @@ impl ConstLengthsStack { self } + /// Returns a copy of the [`BranchMeta`] on the top of the stack, panicking if + /// the stack is empty. pub const fn peek_or_panic(&self) -> BranchMeta { if self.idx == 0 { panic!("AsciiTrie Builder: Attempted to peek from an empty stack"); @@ -245,6 +278,7 @@ impl ConstLengthsStack { self.get_or_panic(0) } + /// Returns a copy of the [`BranchMeta`] at the specified index. const fn get_or_panic(&self, index: usize) -> BranchMeta { if self.idx <= index { panic!("AsciiTrie Builder: Attempted to get too deep in a stack"); @@ -255,6 +289,7 @@ impl ConstLengthsStack { } } + /// Removes many [`BranchMeta`]s from the stack, returning them in a [`ConstArrayBuilder`]. pub const fn pop_many_or_panic( mut self, len: usize, @@ -267,7 +302,7 @@ impl ConstLengthsStack { break; } let i = self.idx - ix - 1; - result = result.push_front_or_panic(match self.data[i] { + result = result.const_push_front_or_panic(match self.data[i] { Some(x) => x, None => panic!("Not enough items in the ConstLengthsStack"), }); @@ -277,12 +312,14 @@ impl ConstLengthsStack { (self, result) } + /// Non-const function that returns the initialized elements as a slice. fn as_slice(&self) -> &[Option] { &self.data[0..self.idx] } } impl ConstArrayBuilder { + /// Converts this builder-array of [`BranchMeta`] to one of the `ascii` fields. pub const fn map_to_ascii_bytes(&self) -> ConstArrayBuilder { let mut result = ConstArrayBuilder::new_empty([0; N], N); let self_as_slice = self.as_const_slice(); diff --git a/experimental/zerotrie/src/builder/nonconst/store.rs b/experimental/zerotrie/src/builder/nonconst/store.rs index cb6e8fd482b..3a1acb9c086 100644 --- a/experimental/zerotrie/src/builder/nonconst/store.rs +++ b/experimental/zerotrie/src/builder/nonconst/store.rs @@ -9,16 +9,33 @@ use super::super::konst::ConstArrayBuilder; use alloc::collections::VecDeque; use alloc::vec::Vec; +/// A trait applied to a data structure for building a ZeroTrie. pub trait TrieBuilderStore { + /// Create a new empty store. fn atbs_new_empty() -> Self; + + /// Return the length in bytes of the store. fn atbs_len(&self) -> usize; + + /// Push a byte to the front of the store. fn atbs_push_front(&mut self, byte: u8); + + /// Push multiple bytes to the front of the store. fn atbs_extend_front(&mut self, other: &[u8]); + + /// Read the store into a `Vec`. fn atbs_to_bytes(&self) -> Vec; + + /// Perform the operation `self[index] |= other` fn atbs_bitor_assign(&mut self, index: usize, other: u8); + + /// Swap the adjacent ranges `self[start..mid]` and `self[mid..limit]`. fn atbs_swap_ranges(&mut self, start: usize, mid: usize, limit: usize); + + /// Remove and return the first element in the store, or None if empty. fn atbs_pop_front(&mut self) -> Option; + /// Prepend `n` zeros to the front of the store. fn atbs_prepend_n_zeros(&mut self, n: usize) { let mut i = 0; while i < n { @@ -65,6 +82,8 @@ impl TrieBuilderStore for VecDeque { self.len() ); } + // The following algorithm is an in-place swap of two adjacent ranges of potentially + // different lengths. Would make a good coding interview question. loop { if start == mid || mid == limit { return; @@ -92,6 +111,7 @@ impl TrieBuilderStore for VecDeque { } } +/// A data structure that holds any number of [`BranchMeta`] items. pub(crate) struct NonConstLengthsStack { data: Vec, } @@ -103,22 +123,28 @@ impl core::fmt::Debug for NonConstLengthsStack { } impl NonConstLengthsStack { + /// Creates a new empty [`ConstLengthsStack`]. pub const fn new() -> Self { Self { data: Vec::new() } } + /// Returns whether the stack is empty. pub fn is_empty(&self) -> bool { self.data.is_empty() } + /// Adds a [`BranchMeta`] to the stack. pub fn push(&mut self, meta: BranchMeta) { self.data.push(meta); } + /// Returns a copy of the [`BranchMeta`] on the top of the stack, panicking if + /// the stack is empty. pub fn peek_or_panic(&self) -> BranchMeta { *self.data.last().unwrap() } + /// Removes many [`BranchMeta`]s from the stack, returning them in a [`ConstArrayBuilder`]. pub fn pop_many_or_panic(&mut self, len: usize) -> ConstArrayBuilder<256, BranchMeta> { debug_assert!(len <= 256); let mut result = ConstArrayBuilder::new_empty([BranchMeta::const_default(); 256], 256); @@ -129,7 +155,7 @@ impl NonConstLengthsStack { } let i = self.data.len() - ix - 1; // Won't panic because len <= 256 - result = result.push_front_or_panic(match self.data.get(i) { + result = result.const_push_front_or_panic(match self.data.get(i) { Some(x) => *x, None => panic!("Not enough items in the ConstLengthsStack"), }); @@ -139,6 +165,7 @@ impl NonConstLengthsStack { result } + /// Non-const function that returns the initialized elements as a slice. fn as_slice(&self) -> &[BranchMeta] { &self.data }