Skip to content

Commit

Permalink
Merge pull request #1 from CeresDB/remove-collect
Browse files Browse the repository at this point in the history
remove unnecessary collect
  • Loading branch information
Rachelint authored Jun 6, 2023
2 parents 5dd9045 + 2a49129 commit ac8ef01
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 45 deletions.
34 changes: 19 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,28 @@ or

```toml
[dependencies]
xorfilter-rs = { git = "https://github.com/bnclabs/xorfilter" }
xorfilter-rs = { git = "https://github.com/prataprc/xorfilter" }
```

```rust
use xorfilter::Xor8;

let mut keys: Vec<u64> = vec![];
for _ in 0..num_keys {
keys.push(rng.gen());
}

let mut filter = Xor8::new(); // new filter.
filter.populate_keys(&keys); // populate keys.
filter.build(); // build bitmap.

for key in 0..lookup {
// there can be false positives, but no false negatives.
filter.contains_key(key);
use xorfilter::{xor8::Xor8Builder, BuildHasherDefault};

fn main() {
let num_keys = 1000;
let lookup = 10;
let mut b = Xor8Builder::<BuildHasherDefault>::default();
for i in 0..num_keys {
if i % 2 == 0 {
b.insert(&i);
}
}

// build bitmap.
let filter = b.build().unwrap();
for key in 0..lookup {
// there can be false positives, but no false negatives.
println!("{key}: {}", filter.contains(&key));
}
}
```

Expand Down
88 changes: 58 additions & 30 deletions src/xor8/builder.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
use std::hash::BuildHasher;
use std::hash::Hash;
use std::hash::Hasher;
use std::sync::Arc;

use crate::xor8::filter::fingerprint;
use crate::xor8::filter::splitmix64;
use crate::xor8::filter::XorSet;
use crate::xor8::Xor8;
use crate::BuildHasherDefault;
use std::{
hash::{BuildHasher, Hash, Hasher},
sync::Arc,
};

use crate::{
xor8::{
filter::{fingerprint, splitmix64, XorSet},
Xor8,
},
BuildHasherDefault,
};

#[derive(Clone, Copy, Default)]
struct KeyIndex {
Expand Down Expand Up @@ -56,15 +58,17 @@ type U64HashSet = ::std::collections::HashSet<u64, U64IdentifyBuildHasher>;
/// ```
#[derive(Clone, Debug)]
pub struct Xor8Builder<H = BuildHasherDefault>
where H: BuildHasher + Clone
where
H: BuildHasher + Clone,
{
digests: U64HashSet,
pub num_digests: usize,
pub hash_builder: H,
}

impl<H> Default for Xor8Builder<H>
where H: BuildHasher + Clone + Default
where
H: BuildHasher + Clone + Default,
{
fn default() -> Self {
Self {
Expand All @@ -76,11 +80,14 @@ where H: BuildHasher + Clone + Default
}

impl<H> Xor8Builder<H>
where H: BuildHasher + Clone
where
H: BuildHasher + Clone,
{
/// New Xor8 builder initialized with [BuildHasherDefault].
pub fn new() -> Self
where H: Default {
where
H: Default,
{
Self::default()
}

Expand All @@ -107,8 +114,8 @@ where H: BuildHasher + Clone

/// Insert 64-bit digest of a single key.
///
/// Digest for the key shall be generated using the default-hasher or via hasher
/// supplied via [Xor8Builder::with_hasher] method.
/// Digest for the key shall be generated using the default-hasher or via
/// hasher supplied via [Xor8Builder::with_hasher] method.
pub fn insert<K: ?Sized + Hash>(&mut self, key: &K) {
let digest = self.hash(key);

Expand All @@ -118,8 +125,8 @@ where H: BuildHasher + Clone

/// Populate with 64-bit digests for a collection of keys of type `K`.
///
/// Digest for key shall be generated using the default-hasher or via hasher supplied
/// via [Xor8Builder::with_hasher] method.
/// Digest for key shall be generated using the default-hasher or via hasher
/// supplied via [Xor8Builder::with_hasher] method.
pub fn populate<'i, K: Hash + 'i, I: IntoIterator<Item = &'i K>>(&mut self, keys: I) {
let mut n = 0;

Expand All @@ -145,28 +152,49 @@ where H: BuildHasher + Clone
self.num_digests += n;
}

/// Build bitmap for keys that where previously inserted using [Xor8Builder::insert],
/// [Xor8Builder::populate] and [Xor8Builder::populate_digests] method.
/// Build bitmap for keys that where previously inserted using
/// [Xor8Builder::insert], [Xor8Builder::populate] and
/// [Xor8Builder::populate_digests] method.
pub fn build(&mut self) -> Result<Xor8<H>, crate::Error> {
let digests = self.digests.iter().copied().collect::<Vec<u64>>();
self.build_from_digests(&digests)
let digest_len = self.digests.len();
Self::build_inner(
self.hash_builder.clone(),
self.digests.iter().copied(),
digest_len,
)
}

/// Build a bitmap for pre-computed 64-bit digests for keys.
///
/// If keys where previously inserted using [Xor8Builder::insert] or
/// [Xor8Builder::populate] or [Xor8Builder::populate_digests] methods, they shall be
/// ignored.
/// [Xor8Builder::populate] or [Xor8Builder::populate_digests] methods, they
/// shall be ignored.
///
/// It is upto the caller to ensure that digests are unique, that there no duplicates.
/// It is upto the caller to ensure that digests are unique, that there no
/// duplicates.
pub fn build_from_digests(
&mut self,
digests: &[u64],
) -> Result<Xor8<H>, crate::Error> {
let mut ff = Xor8::<H>::new(self.hash_builder.clone());
Self::build_inner(
self.hash_builder.clone(),
digests.iter().copied(),
digests.len(),
)
}

ff.num_keys = Some(digests.len());
let (size, mut rngcounter) = (digests.len(), 1_u64);
fn build_inner<It>(
hash_builder: H,
digest_iter: It,
digest_len: usize,
) -> Result<Xor8<H>, crate::Error>
where
It: Iterator<Item = u64> + Clone,
{
let mut ff = Xor8::<H>::new(hash_builder);

ff.num_keys = Some(digest_len);
let (size, mut rngcounter) = (digest_len, 1_u64);
let capacity = {
let capacity = 32 + ((1.23 * (size as f64)).ceil() as u32);
capacity / 3 * 3 // round it down to a multiple of 3
Expand All @@ -185,8 +213,8 @@ where H: BuildHasher + Clone
let mut sets2: Vec<XorSet> = vec![XorSet::default(); block_length];

loop {
for key in digests.iter() {
let hs = ff.get_h0h1h2(*key);
for key in digest_iter.clone() {
let hs = ff.get_h0h1h2(key);
sets0[hs.h0 as usize].xor_mask ^= hs.h;
sets0[hs.h0 as usize].count += 1;
sets1[hs.h1 as usize].xor_mask ^= hs.h;
Expand Down

0 comments on commit ac8ef01

Please sign in to comment.