Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions python/python/tests/compat/test_vector_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pathlib import Path

import lance
import numpy as np
import pyarrow as pa
import pyarrow.compute as pc

Expand Down Expand Up @@ -210,3 +211,62 @@ def check_write(self):
ds.insert(data)
ds.optimize.optimize_indices()
ds.optimize.compact_files()


@compat_test(min_version="0.39.0")
class IvfRqVectorIndex(UpgradeDowngradeTest):
"""Test IVF_RQ vector index compatibility."""

def __init__(self, path: Path):
self.path = path

def create(self):
"""Create dataset with IVF_RQ vector index."""
shutil.rmtree(self.path, ignore_errors=True)
ndims = 32
nvecs = 512

data = pa.table(
{
"id": pa.array(range(nvecs)),
"vec": pa.FixedSizeListArray.from_arrays(
pc.random(ndims * nvecs).cast(pa.float32()), ndims
),
}
)

dataset = lance.write_dataset(data, self.path)
dataset.create_index(
"vec",
"IVF_RQ",
num_partitions=4,
num_bits=1,
)

def check_read(self):
"""Verify vector query can run (indexed or brute-force fallback)."""
ds = lance.dataset(self.path)
q = np.random.random(32).astype(np.float32)
result = ds.to_table(
nearest={
"q": q,
"k": 4,
"column": "vec",
}
)
assert result.num_rows == 4

def check_write(self):
"""Verify can insert vectors and run optimize workflows."""
ds = lance.dataset(self.path)
data = pa.table(
{
"id": pa.array([1000]),
"vec": pa.FixedSizeListArray.from_arrays(
pc.random(32).cast(pa.float32()), 32
),
}
)
ds.insert(data)
ds.optimize.optimize_indices()
ds.optimize.compact_files()
53 changes: 48 additions & 5 deletions rust/lance-index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,13 @@ pub const INDEX_FILE_NAME: &str = "index.idx";
pub const INDEX_AUXILIARY_FILE_NAME: &str = "auxiliary.idx";
pub const INDEX_METADATA_SCHEMA_KEY: &str = "lance:index";

// Currently all vector indexes are version 1
/// Default version for vector index metadata.
///
/// Most vector indices should use this version unless they need to bump for a
/// format change.
pub const VECTOR_INDEX_VERSION: u32 = 1;
/// Version for IVF_RQ indices.
pub const IVF_RQ_INDEX_VERSION: u32 = 2;

/// The factor of threshold to trigger split / join for vector index.
///
Expand Down Expand Up @@ -268,16 +273,20 @@ impl IndexType {
Self::BloomFilter => 0,
Self::RTree => 0,

// for now all vector indices are built by the same builder,
// so they share the same version.
// IMPORTANT: if any vector index subtype needs a format bump that is
// not backward compatible, its new version must be set to
// (current max vector index version + 1), even if only one subtype
// changed. Compatibility filtering currently cannot distinguish vector
// subtypes from details-only metadata, so vector versions effectively
// share one global monotonic compatibility level.
Self::Vector
| Self::IvfFlat
| Self::IvfSq
| Self::IvfPq
| Self::IvfHnswSq
| Self::IvfHnswPq
| Self::IvfHnswFlat
| Self::IvfRq => 1,
| Self::IvfHnswFlat => VECTOR_INDEX_VERSION as i32,
Self::IvfRq => IVF_RQ_INDEX_VERSION as i32,
}
}

Expand All @@ -299,6 +308,24 @@ impl IndexType {
_ => 8192,
}
}

/// Returns the highest supported vector index version in this Lance build.
pub fn max_vector_version() -> u32 {
[
Self::Vector,
Self::IvfFlat,
Self::IvfSq,
Self::IvfPq,
Self::IvfHnswSq,
Self::IvfHnswPq,
Self::IvfHnswFlat,
Self::IvfRq,
]
.into_iter()
.map(|index_type| index_type.version() as u32)
.max()
.unwrap_or(VECTOR_INDEX_VERSION)
}
}

pub trait IndexParams: Send + Sync {
Expand Down Expand Up @@ -329,3 +356,19 @@ pub fn infer_system_index_type(
None
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_ivf_rq_has_dedicated_index_version() {
assert!(IndexType::IvfRq.version() > IndexType::IvfPq.version());
assert_eq!(IndexType::IvfRq.version() as u32, IVF_RQ_INDEX_VERSION);
}

#[test]
fn test_max_vector_version_tracks_highest_supported() {
assert_eq!(IndexType::max_vector_version(), IVF_RQ_INDEX_VERSION);
}
}
12 changes: 8 additions & 4 deletions rust/lance/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ use lance_index::vector::flat::index::{FlatBinQuantizer, FlatIndex, FlatQuantize
use lance_index::vector::hnsw::HNSW;
use lance_index::vector::pq::ProductQuantizer;
use lance_index::vector::sq::ScalarQuantizer;
use lance_index::{
DatasetIndexExt, INDEX_METADATA_SCHEMA_KEY, IndexDescription, VECTOR_INDEX_VERSION,
};
use lance_index::{DatasetIndexExt, INDEX_METADATA_SCHEMA_KEY, IndexDescription};
use lance_index::{INDEX_FILE_NAME, Index, IndexType, pb, vector::VectorIndex};
use lance_index::{
IndexCriteria, is_system_index,
Expand Down Expand Up @@ -347,6 +345,12 @@ pub(crate) async fn remap_index(
}
}
it if it.is_vector() => {
let index_version = u32::try_from(matched.index_version).map_err(|_| {
Error::index(format!(
"Invalid vector index version {} on index {}",
matched.index_version, matched.name
))
})?;
remap_vector_index(
Arc::new(dataset.clone()),
&field_path,
Expand All @@ -361,7 +365,7 @@ pub(crate) async fn remap_index(
&lance_table::format::pb::VectorIndexDetails::default(),
)
.unwrap(),
index_version: VECTOR_INDEX_VERSION,
index_version,
}
}
_ => {
Expand Down
6 changes: 4 additions & 2 deletions rust/lance/src/index/append.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use std::sync::Arc;

use futures::FutureExt;
use lance_core::{Error, Result};
use lance_index::VECTOR_INDEX_VERSION;
use lance_index::metrics::NoOpMetricsCollector;
use lance_index::optimize::OptimizeOptions;
use lance_index::progress::NoopIndexBuildProgress;
Expand Down Expand Up @@ -218,7 +217,10 @@ pub async fn merge_indices_with_unindexed_frags<'a>(
indices_merged,
CreatedIndex {
index_details: vector_index_details(),
index_version: VECTOR_INDEX_VERSION,
// retain_supported_indices guarantees all old_indices have
// index_version <= our max supported version, so we can safely
// write the current library's version for this index type.
index_version: it.version() as u32,
},
))
}
Expand Down
7 changes: 4 additions & 3 deletions rust/lance/src/index/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{
use futures::future::BoxFuture;
use lance_core::datatypes::format_field_path;
use lance_index::progress::{IndexBuildProgress, NoopIndexBuildProgress};
use lance_index::{IndexParams, IndexType, VECTOR_INDEX_VERSION, scalar::CreatedIndex};
use lance_index::{IndexParams, IndexType, scalar::CreatedIndex};
use lance_index::{
metrics::NoOpMetricsCollector,
scalar::{LANCE_SCALAR_INDEX, ScalarIndexParams, inverted::tokenizer::InvertedIndexParams},
Expand Down Expand Up @@ -311,6 +311,7 @@ impl<'a> CreateIndexBuilder<'a> {
.ok_or_else(|| {
Error::index("Vector index type must take a VectorIndexParams".to_string())
})?;
let index_version = vec_params.index_type().version() as u32;

if train {
// Check if this is distributed indexing (fragment-level)
Expand Down Expand Up @@ -354,7 +355,7 @@ impl<'a> CreateIndexBuilder<'a> {
}
CreatedIndex {
index_details: vector_index_details(),
index_version: VECTOR_INDEX_VERSION,
index_version,
}
}
// Can't use if let Some(...) here because it's not stable yet.
Expand Down Expand Up @@ -388,7 +389,7 @@ impl<'a> CreateIndexBuilder<'a> {
}
CreatedIndex {
index_details: vector_index_details(),
index_version: VECTOR_INDEX_VERSION,
index_version: self.index_type.version() as u32,
}
}
(IndexType::FragmentReuse, _) => {
Expand Down
9 changes: 7 additions & 2 deletions rust/lance/src/index/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use lance_index::scalar::{
ScalarIndex, ScalarIndexParams, bitmap::BITMAP_LOOKUP_NAME, inverted::INVERT_LIST_FILE,
lance_format::LanceIndexStore,
};
use lance_index::{DatasetIndexExt, IndexCriteria, IndexType, VECTOR_INDEX_VERSION};
use lance_index::{DatasetIndexExt, IndexCriteria, IndexType};
use lance_table::format::{Fragment, IndexMetadata};
use log::info;
use tracing::instrument;
Expand Down Expand Up @@ -243,7 +243,12 @@ impl IndexDetails {
/// Returns the index version
pub fn index_version(&self) -> Result<u32> {
if self.is_vector() {
Ok(VECTOR_INDEX_VERSION)
// VectorIndexDetails currently does not include the concrete vector
// subtype (IVF_PQ / IVF_RQ / ...), so compatibility filtering cannot
// do per-subtype version checks here. Use the highest supported
// vector index version as a safe upper bound; older binaries still
// ignore newer indices based on their own lower bound.
Ok(IndexType::max_vector_version())
} else {
self.get_plugin().map(|p| p.version())
}
Expand Down
3 changes: 1 addition & 2 deletions rust/lance/src/index/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ use lance_index::vector::{
};
use lance_index::{
DatasetIndexExt, INDEX_AUXILIARY_FILE_NAME, INDEX_METADATA_SCHEMA_KEY, IndexType,
VECTOR_INDEX_VERSION,
};
use lance_io::traits::Reader;
use lance_linalg::distance::*;
Expand Down Expand Up @@ -1553,7 +1552,7 @@ pub async fn initialize_vector_index(
dataset_version: target_dataset.manifest.version,
fragment_bitmap,
index_details: Some(Arc::new(vector_index_details())),
index_version: VECTOR_INDEX_VERSION as i32,
index_version: source_index.index_version,
created_at: Some(chrono::Utc::now()),
base_id: None,
};
Expand Down