From 1c6f81023a64a2f9dde35e43d5d1041a0f9cf712 Mon Sep 17 00:00:00 2001 From: Gavin Mendel-Gleason Date: Tue, 30 Apr 2024 13:53:24 +0200 Subject: [PATCH] circulant parameter count --- parallel-hnsw/benches/bench.rs | 11 ----------- parallel-hnsw/src/lib.rs | 17 ++++++++++++----- parallel-hnsw/src/parameters.rs | 8 ++++---- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/parallel-hnsw/benches/bench.rs b/parallel-hnsw/benches/bench.rs index 6d83036..0ecb36d 100644 --- a/parallel-hnsw/benches/bench.rs +++ b/parallel-hnsw/benches/bench.rs @@ -50,14 +50,3 @@ fn create_test_data(length: usize) -> SillyComparator { data: Arc::new(vec), } } - -#[bench] -fn bla(b: &mut Bencher) { - const LENGTH: usize = 10000; - let comparator = create_test_data(LENGTH); - let vs: Vec = (0..LENGTH).map(VectorId).collect(); - - b.iter(|| { - let _result: Hnsw<_> = Hnsw::generate(comparator.clone(), vs.clone(), 24, 48, 2); - }); -} diff --git a/parallel-hnsw/src/lib.rs b/parallel-hnsw/src/lib.rs index 1dd149b..11f58e3 100644 --- a/parallel-hnsw/src/lib.rs +++ b/parallel-hnsw/src/lib.rs @@ -32,6 +32,11 @@ use std::fmt::Debug; use crate::search::assert_layer_invariants; use crate::{priority_queue::PriorityQueue, search::match_within_epsilon}; +const PRIMES: [usize; 40] = [ + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, + 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, +]; + pub enum WrappedBorrowable<'a, T: ?Sized, Borrowable: Deref + 'a> { Left(Borrowable), Right(&'a T), @@ -158,12 +163,11 @@ impl Layer { } pub fn routing_nodes(&self, nodeid: NodeId, sp: SearchParameters) -> Vec { - let primes = [1, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37]; // Calculate using the circulants let size = self.node_count(); - primes + PRIMES .iter() - .take(sp.grid_network_dimension) + .take(sp.circulant_parameter_count) .map(|prime| NodeId((nodeid.0 + prime) % size)) .filter(|i| *i != nodeid) .collect() @@ -2251,7 +2255,9 @@ mod tests { fn test_recall() { let size = 10_000; let dimension = 1536; - let bp = BuildParameters::default(); + let mut bp = BuildParameters::default(); + bp.initial_partition_search.circulant_parameter_count = 0; + bp.optimization.search.circulant_parameter_count = 0; let mut hnsw: Hnsw = bigvec::make_random_hnsw_with_build_parameters(size, dimension, bp); do_test_recall(&hnsw, 0.9); @@ -2487,7 +2493,8 @@ mod tests { let cc = Comparator32 { data: vecs.into() }; let vids: Vec = (0..size).map(VectorId).collect(); let mut bp = BuildParameters::default(); - bp.optimization.search.grid_network_dimension = 0; + bp.initial_partition_search.circulant_parameter_count = 0; + bp.optimization.search.circulant_parameter_count = 0; let mut hnsw: Hnsw = Hnsw::generate(cc, vids, bp, &mut ()); hnsw.improve_index(bp, None, &mut ()); panic!() diff --git a/parallel-hnsw/src/parameters.rs b/parallel-hnsw/src/parameters.rs index e593895..403c55a 100644 --- a/parallel-hnsw/src/parameters.rs +++ b/parallel-hnsw/src/parameters.rs @@ -5,7 +5,7 @@ pub struct SearchParameters { pub number_of_candidates: usize, pub upper_layer_candidate_count: usize, pub probe_depth: usize, - pub grid_network_dimension: usize, + pub circulant_parameter_count: usize, } impl Default for SearchParameters { @@ -14,7 +14,7 @@ impl Default for SearchParameters { number_of_candidates: 300, upper_layer_candidate_count: 300, probe_depth: 2, - grid_network_dimension: 12, + circulant_parameter_count: 12, } } } @@ -34,7 +34,7 @@ impl Default for OptimizationParameters { promotion_threshold: 0.01, neighborhood_threshold: 0.01, recall_proportion: 0.1, - promotion_proportion: 1.0, + promotion_proportion: 0.0, search: SearchParameters::default(), } } @@ -60,7 +60,7 @@ impl Default for BuildParameters { number_of_candidates: 6, upper_layer_candidate_count: 6, probe_depth: 2, - grid_network_dimension: 3, + circulant_parameter_count: 12, }, } }