Skip to content

Commit

Permalink
Add get_strict fn to ZeroTrie
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc committed Sep 24, 2024
1 parent 7fa33d8 commit 2fab467
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 4 deletions.
27 changes: 27 additions & 0 deletions utils/zerotrie/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@ pub(crate) enum CaseSensitivity {
IgnoreCase,
}

/// How to handle lookup for strings with mixed ASCII case. Only used in ignore-case tries
#[derive(Copy, Clone)]
pub(crate) enum LookupStrictness {
/// Select strings that differ in case so long as their `to_ascii_lowercase` matches
Normal,
/// Select strings only if they match exactly
Strict,
}

impl CaseSensitivity {
#[cfg(feature = "serde")]
const fn to_u8_flag(self) -> u8 {
Expand All @@ -89,6 +98,7 @@ pub(crate) struct ZeroTrieBuilderOptions {
pub ascii_mode: AsciiMode,
pub capacity_mode: CapacityMode,
pub case_sensitivity: CaseSensitivity,
pub lookup_strictness: LookupStrictness,
}

impl ZeroTrieBuilderOptions {
Expand All @@ -113,6 +123,7 @@ impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> {
ascii_mode: AsciiMode::AsciiOnly,
capacity_mode: CapacityMode::Normal,
case_sensitivity: CaseSensitivity::Sensitive,
lookup_strictness: LookupStrictness::Normal,
};
}

Expand All @@ -129,6 +140,7 @@ impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> {
ascii_mode: AsciiMode::AsciiOnly,
capacity_mode: CapacityMode::Normal,
case_sensitivity: CaseSensitivity::IgnoreCase,
lookup_strictness: LookupStrictness::Normal,
};
}

Expand All @@ -137,13 +149,27 @@ impl<S: ?Sized> crate::ZeroAsciiIgnoreCaseTrie<S> {
pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags();
}

/// Internal struct to power `get_strict`
pub(crate) struct ZeroAsciiIgnoreCaseStrictTrie;

impl ZeroTrieWithOptions for ZeroAsciiIgnoreCaseStrictTrie {
const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
phf_mode: PhfMode::BinaryOnly,
ascii_mode: AsciiMode::AsciiOnly,
capacity_mode: CapacityMode::Normal,
case_sensitivity: CaseSensitivity::IgnoreCase,
lookup_strictness: LookupStrictness::Strict,
};
}

/// Branch nodes could be either binary search or PHF.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> {
const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
phf_mode: PhfMode::UsePhf,
ascii_mode: AsciiMode::BinarySpans,
capacity_mode: CapacityMode::Normal,
case_sensitivity: CaseSensitivity::Sensitive,
lookup_strictness: LookupStrictness::Normal,
};
}

Expand All @@ -159,6 +185,7 @@ impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> {
ascii_mode: AsciiMode::BinarySpans,
capacity_mode: CapacityMode::Extended,
case_sensitivity: CaseSensitivity::Sensitive,
lookup_strictness: LookupStrictness::Normal,
};
}

Expand Down
23 changes: 19 additions & 4 deletions utils/zerotrie/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ pub(crate) fn get_parameterized<T: ZeroTrieWithOptions + ?Sized>(
};
if let Some((c, temp)) = ascii.split_first() {
if matches!(byte_type, NodeType::Ascii) {
let is_match = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase)
let is_match = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) && matches!(T::OPTIONS.lookup_strictness, LookupStrictness::Normal)
{
b.to_ascii_lowercase() == c.to_ascii_lowercase()
} else {
Expand Down Expand Up @@ -369,10 +369,25 @@ pub(crate) fn get_parameterized<T: ZeroTrieWithOptions + ?Sized>(
(search, trie) = trie.debug_split_at(x);
let bsearch_result =
if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) {
search.binary_search_by_key(&c.to_ascii_lowercase(), |x| {
x.to_ascii_lowercase()
})
if matches!(T::OPTIONS.lookup_strictness, LookupStrictness::Normal) {
// Ordering: (A=a), (B=b), (C=c), ..., (Z=z)
search.binary_search_by_key(&c.to_ascii_lowercase(), |x| {
x.to_ascii_lowercase()
})
} else {
// Ordering: A, a, B, b, C, c, ..., Z, z
let c_lowercase = c.to_ascii_lowercase();
search.binary_search_by(move |p| {
let p_lowercase = p.to_ascii_lowercase();
if c_lowercase == p_lowercase {
p.cmp(c)
} else {
p_lowercase.cmp(&c_lowercase)
}
})
}
} else {
// Ordering: A, B, C, ..., Z, a, b, c, ..., z
search.binary_search(c)
};
i = bsearch_result.ok()?;
Expand Down
33 changes: 33 additions & 0 deletions utils/zerotrie/src/zerotrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,39 @@ impl_zerotrie_subtype!(
Vec::into_boxed_slice
);

impl<Store> ZeroAsciiIgnoreCaseTrie<Store>
where
Store: AsRef<[u8]> + ?Sized,
{
/// Queries the trie for a string, requiring that it matches case.
///
/// # Examples
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
///
/// let mut map = LiteMap::new_vec();
/// map.insert(&b"foo"[..], 1);
/// map.insert(b"Bar", 2);
/// map.insert(b"Bingo", 3);
///
/// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
///
/// assert_eq!(trie.get(b"foo"), Some(1));
/// assert_eq!(trie.get(b"bar"), Some(2));
/// assert_eq!(trie.get(b"BaR"), Some(2));
/// assert_eq!(trie.get_strict(b"bar"), None);
/// assert_eq!(trie.get_strict(b"BaR"), None);
/// assert_eq!(trie.get_strict(b"Bar"), Some(2));
///
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
/// ```
pub fn get_strict<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]> {
reader::get_parameterized::<crate::options::ZeroAsciiIgnoreCaseStrictTrie>(self.store.as_ref(), key.as_ref())
}
}

macro_rules! impl_dispatch {
($self:ident, $inner_fn:ident()) => {
match $self.0 {
Expand Down

0 comments on commit 2fab467

Please sign in to comment.