Skip to content

Commit

Permalink
chore: directory structure (#104)
Browse files Browse the repository at this point in the history
Signed-off-by: usamoi <usamoi@outlook.com>
  • Loading branch information
usamoi authored Nov 20, 2024
1 parent c28c3a8 commit d30a203
Show file tree
Hide file tree
Showing 27 changed files with 169 additions and 185 deletions.
14 changes: 8 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
#![allow(clippy::too_many_arguments)]
#![allow(clippy::type_complexity)]

mod algorithm;
mod datatype;
mod gucs;
mod index;
mod postgres;
mod projection;
mod types;
mod upgrade;
mod utils;
mod vchordrq;

pgrx::pg_module_magic!();
pgrx::extension_sql_file!("./sql/bootstrap.sql", bootstrap);
Expand All @@ -24,8 +22,12 @@ unsafe extern "C" fn _PG_init() {
}
detect::init();
unsafe {
index::init();
gucs::init();
vchordrq::init();

#[cfg(any(feature = "pg13", feature = "pg14"))]
pgrx::pg_sys::EmitWarningsOnPlaceholders(c"vchord".as_ptr());
#[cfg(any(feature = "pg15", feature = "pg16", feature = "pg17"))]
pgrx::pg_sys::MarkGUCPrefixReserved(c"vchord".as_ptr());
}
}

Expand Down
76 changes: 76 additions & 0 deletions src/projection.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use nalgebra::DMatrix;
use std::sync::OnceLock;

fn random_matrix(n: usize) -> DMatrix<f32> {
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha12Rng;
use rand_distr::StandardNormal;
let mut rng = ChaCha12Rng::from_seed([7; 32]);
DMatrix::from_fn(n, n, |_, _| rng.sample(StandardNormal))
}

#[ignore]
#[test]
fn check_all_matrixs_are_full_rank() {
let parallelism = std::thread::available_parallelism().unwrap().get();
std::thread::scope(|scope| {
let mut threads = vec![];
for remainder in 0..parallelism {
threads.push(scope.spawn(move || {
for n in (0..=60000).filter(|x| x % parallelism == remainder) {
let matrix = random_matrix(n);
assert!(matrix.is_invertible());
}
}));
}
for thread in threads {
thread.join().unwrap();
}
});
}

#[test]
fn check_matrices() {
assert_eq!(
orthogonal_matrix(2),
vec![vec![-0.5424608, -0.8400813], vec![0.8400813, -0.54246056]]
);
assert_eq!(
orthogonal_matrix(3),
vec![
vec![-0.5309615, -0.69094884, -0.49058124],
vec![0.8222731, -0.56002235, -0.10120347],
vec![0.20481002, 0.45712686, -0.86549866]
]
);
}

fn orthogonal_matrix(n: usize) -> Vec<Vec<f32>> {
use nalgebra::QR;
let matrix = random_matrix(n);
// QR decomposition is unique if the matrix is full rank
let qr = QR::new(matrix);
let q = qr.q();
let mut projection = Vec::new();
for row in q.row_iter() {
projection.push(row.iter().copied().collect::<Vec<f32>>());
}
projection
}

static MATRIXS: [OnceLock<Vec<Vec<f32>>>; 1 + 60000] = [const { OnceLock::new() }; 1 + 60000];

pub fn prewarm(n: usize) {
if n <= 60000 {
MATRIXS[n].get_or_init(|| orthogonal_matrix(n));
}
}

pub fn project(vector: &[f32]) -> Vec<f32> {
use base::scalar::ScalarLike;
let n = vector.len();
let matrix = MATRIXS[n].get_or_init(|| orthogonal_matrix(n));
(0..n)
.map(|i| f32::reduce_sum_of_xy(vector, &matrix[i]))
.collect()
}
30 changes: 15 additions & 15 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ use validator::{Validate, ValidationError, ValidationErrors};

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[serde(deny_unknown_fields)]
pub struct RabbitholeInternalBuildOptions {
#[serde(default = "RabbitholeInternalBuildOptions::default_lists")]
#[validate(length(min = 1, max = 8), custom(function = RabbitholeInternalBuildOptions::validate_lists))]
pub struct VchordrqInternalBuildOptions {
#[serde(default = "VchordrqInternalBuildOptions::default_lists")]
#[validate(length(min = 1, max = 8), custom(function = VchordrqInternalBuildOptions::validate_lists))]
pub lists: Vec<u32>,
#[serde(default = "RabbitholeInternalBuildOptions::default_spherical_centroids")]
#[serde(default = "VchordrqInternalBuildOptions::default_spherical_centroids")]
pub spherical_centroids: bool,
#[serde(default = "RabbitholeInternalBuildOptions::default_build_threads")]
#[serde(default = "VchordrqInternalBuildOptions::default_build_threads")]
#[validate(range(min = 1, max = 255))]
pub build_threads: u16,
}

impl RabbitholeInternalBuildOptions {
impl VchordrqInternalBuildOptions {
fn default_lists() -> Vec<u32> {
vec![1000]
}
Expand All @@ -35,7 +35,7 @@ impl RabbitholeInternalBuildOptions {
}
}

impl Default for RabbitholeInternalBuildOptions {
impl Default for VchordrqInternalBuildOptions {
fn default() -> Self {
Self {
lists: Self::default_lists(),
Expand All @@ -47,27 +47,27 @@ impl Default for RabbitholeInternalBuildOptions {

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[serde(deny_unknown_fields)]
pub struct RabbitholeExternalBuildOptions {
pub struct VchordrqExternalBuildOptions {
pub table: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "snake_case")]
pub enum RabbitholeBuildOptions {
Internal(RabbitholeInternalBuildOptions),
External(RabbitholeExternalBuildOptions),
pub enum VchordrqBuildOptions {
Internal(VchordrqInternalBuildOptions),
External(VchordrqExternalBuildOptions),
}

impl Default for RabbitholeBuildOptions {
impl Default for VchordrqBuildOptions {
fn default() -> Self {
Self::Internal(Default::default())
}
}

impl Validate for RabbitholeBuildOptions {
impl Validate for VchordrqBuildOptions {
fn validate(&self) -> Result<(), ValidationErrors> {
use RabbitholeBuildOptions::*;
use VchordrqBuildOptions::*;
match self {
Internal(internal_build) => internal_build.validate(),
External(external_build) => external_build.validate(),
Expand All @@ -80,7 +80,7 @@ impl Validate for RabbitholeBuildOptions {
pub struct VchordrqIndexingOptions {
#[serde(default = "VchordrqIndexingOptions::default_residual_quantization")]
pub residual_quantization: bool,
pub build: RabbitholeBuildOptions,
pub build: VchordrqBuildOptions,
}

impl VchordrqIndexingOptions {
Expand Down
21 changes: 0 additions & 21 deletions src/utils/cells.rs

This file was deleted.

1 change: 0 additions & 1 deletion src/utils/mod.rs

This file was deleted.

32 changes: 16 additions & 16 deletions src/algorithm/build.rs → src/vchordrq/algorithm/build.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use crate::algorithm::k_means;
use crate::algorithm::rabitq;
use crate::algorithm::tuples::*;
use crate::algorithm::vectors;
use crate::index::am_options::Opfamily;
use crate::postgres::BufferWriteGuard;
use crate::postgres::Relation;
use crate::types::RabbitholeBuildOptions;
use crate::types::RabbitholeExternalBuildOptions;
use crate::types::RabbitholeInternalBuildOptions;
use crate::types::VchordrqBuildOptions;
use crate::types::VchordrqExternalBuildOptions;
use crate::types::VchordrqIndexingOptions;
use crate::types::VchordrqInternalBuildOptions;
use crate::vchordrq::algorithm::k_means;
use crate::vchordrq::algorithm::rabitq;
use crate::vchordrq::algorithm::tuples::*;
use crate::vchordrq::algorithm::vectors;
use crate::vchordrq::index::am_options::Opfamily;
use base::distance::DistanceKind;
use base::index::VectorOptions;
use base::scalar::ScalarLike;
Expand Down Expand Up @@ -40,12 +40,12 @@ pub fn build<T: HeapRelation, R: Reporter>(
let is_residual =
vchordrq_options.residual_quantization && vector_options.d == DistanceKind::L2;
let structures = match vchordrq_options.build {
RabbitholeBuildOptions::External(external_build) => Structure::extern_build(
VchordrqBuildOptions::External(external_build) => Structure::extern_build(
vector_options.clone(),
heap_relation.opfamily(),
external_build.clone(),
),
RabbitholeBuildOptions::Internal(internal_build) => {
VchordrqBuildOptions::Internal(internal_build) => {
let mut tuples_total = 0_u64;
let samples = {
let mut rand = rand::thread_rng();
Expand Down Expand Up @@ -149,16 +149,16 @@ impl Structure {
}
fn internal_build(
vector_options: VectorOptions,
internal_build: RabbitholeInternalBuildOptions,
internal_build: VchordrqInternalBuildOptions,
mut samples: Vec<Vec<f32>>,
) -> Vec<Self> {
use std::iter::once;
for sample in samples.iter_mut() {
*sample = rabitq::project(sample);
*sample = crate::projection::project(sample);
}
let mut result = Vec::<Self>::new();
for w in internal_build.lists.iter().rev().copied().chain(once(1)) {
let means = crate::algorithm::parallelism::RayonParallelism::scoped(
let means = crate::vchordrq::algorithm::parallelism::RayonParallelism::scoped(
internal_build.build_threads as _,
Arc::new(|| {
pgrx::check_for_interrupts!();
Expand Down Expand Up @@ -199,10 +199,10 @@ impl Structure {
fn extern_build(
vector_options: VectorOptions,
_opfamily: Opfamily,
external_build: RabbitholeExternalBuildOptions,
external_build: VchordrqExternalBuildOptions,
) -> Vec<Self> {
use std::collections::BTreeMap;
let RabbitholeExternalBuildOptions { table } = external_build;
let VchordrqExternalBuildOptions { table } = external_build;
let query = format!("SELECT id, parent, vector FROM {table};");
let mut parents = BTreeMap::new();
let mut vectors = BTreeMap::new();
Expand All @@ -226,7 +226,7 @@ impl Structure {
if vector_options.dims != vector.as_borrowed().dims() {
pgrx::error!("extern build: incorrect dimension, id = {id}");
}
vectors.insert(id, rabitq::project(vector.slice()));
vectors.insert(id, crate::projection::project(vector.slice()));
}
});
let mut children = parents
Expand Down
10 changes: 5 additions & 5 deletions src/algorithm/insert.rs → src/vchordrq/algorithm/insert.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::algorithm::rabitq;
use crate::algorithm::rabitq::fscan_process_lowerbound;
use crate::algorithm::tuples::*;
use crate::algorithm::vectors;
use crate::postgres::Relation;
use crate::vchordrq::algorithm::rabitq;
use crate::vchordrq::algorithm::rabitq::fscan_process_lowerbound;
use crate::vchordrq::algorithm::tuples::*;
use crate::vchordrq::algorithm::vectors;
use base::always_equal::AlwaysEqual;
use base::distance::Distance;
use base::distance::DistanceKind;
Expand All @@ -21,7 +21,7 @@ pub fn insert(relation: Relation, payload: Pointer, vector: Vec<f32>, distance_k
.expect("data corruption");
let dims = meta_tuple.dims;
assert_eq!(dims as usize, vector.len(), "invalid vector dimensions");
let vector = rabitq::project(&vector);
let vector = crate::projection::project(&vector);
let is_residual = meta_tuple.is_residual;
let default_lut = if !is_residual {
Some(rabitq::fscan_preprocess(&vector))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::algorithm::parallelism::{ParallelIterator, Parallelism};
use crate::vchordrq::algorithm::parallelism::{ParallelIterator, Parallelism};
use base::scalar::*;
use half::f16;
use rand::rngs::StdRng;
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::algorithm::tuples::*;
use crate::algorithm::vectors;
use crate::postgres::Relation;
use crate::vchordrq::algorithm::tuples::*;
use crate::vchordrq::algorithm::vectors;
use std::fmt::Write;

pub fn prewarm(relation: Relation, height: i32) -> String {
Expand Down
Loading

0 comments on commit d30a203

Please sign in to comment.