Skip to content

Introduce getQuantizedVectorValues method in LeafReader to access QuantizedByteVectorValues #14792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/CodecReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import org.apache.lucene.util.quantization.QuantizedVectorsReader;

/** LeafReader implemented by codec APIs. */
public abstract class CodecReader extends LeafReader {
Expand Down Expand Up @@ -258,6 +261,26 @@ public final ByteVectorValues getByteVectorValues(String field) throws IOExcepti
return getVectorReader().getByteVectorValues(field);
}

@Override
public final QuantizedByteVectorValues getQuantizedVectorValues(String field) throws IOException {
ensureOpen();
FieldInfo fi = getFieldInfos().fieldInfo(field);
if (fi == null
|| fi.getVectorDimension() == 0
|| fi.getVectorEncoding() != VectorEncoding.FLOAT32) {
// Field does not exist or does not index vectors
return null;
}
KnnVectorsReader vectorsReader = getVectorReader();
if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader) {
vectorsReader = ((PerFieldKnnVectorsFormat.FieldsReader) vectorsReader).getFieldReader(field);
}
if (vectorsReader instanceof QuantizedVectorsReader) {
return ((QuantizedVectorsReader) vectorsReader).getQuantizedVectorValues(field);
}
return null;
}

@Override
public final void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

abstract class DocValuesLeafReader extends LeafReader {
@Override
Expand Down Expand Up @@ -57,6 +58,11 @@ public final ByteVectorValues getByteVectorValues(String field) throws IOExcepti
throw new UnsupportedOperationException();
}

@Override
public QuantizedByteVectorValues getQuantizedVectorValues(String field) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

/**
* The {@link ExitableDirectoryReader} wraps a real index {@link DirectoryReader} and allows for a
Expand Down Expand Up @@ -331,6 +332,15 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException {
return new ExitableByteVectorValues(vectorValues);
}

@Override
public QuantizedByteVectorValues getQuantizedVectorValues(String field) throws IOException {
final QuantizedByteVectorValues vectorValues = in.getQuantizedVectorValues(field);
if (vectorValues == null) {
return null;
}
return new ExitableQuantizedByteVectorValues(vectorValues);
}

@Override
public void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs)
Expand Down Expand Up @@ -513,6 +523,49 @@ public ByteVectorValues copy() {
throw new UnsupportedOperationException();
}
}

private class ExitableQuantizedByteVectorValues extends QuantizedByteVectorValues {
private final QuantizedByteVectorValues vectorValues;

public ExitableQuantizedByteVectorValues(QuantizedByteVectorValues vectorValues) {
this.vectorValues = vectorValues;
}

@Override
public int dimension() {
return vectorValues.dimension();
}

@Override
public int size() {
return vectorValues.size();
}

@Override
public byte[] vectorValue(int ord) throws IOException {
return vectorValues.vectorValue(ord);
}

@Override
public int ordToDoc(int ord) {
return vectorValues.ordToDoc(ord);
}

@Override
public DocIndexIterator iterator() {
return createExitableIterator(vectorValues.iterator(), queryTimeout);
}

@Override
public VectorScorer scorer(byte[] target) throws IOException {
return vectorValues.scorer(target);
}

@Override
public float getScoreCorrectionConstant(int ord) throws IOException {
return vectorValues.getScoreCorrectionConstant(ord);
}
}
}

private static KnnVectorValues.DocIndexIterator createExitableIterator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOBooleanSupplier;
import org.apache.lucene.util.Unwrappable;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

/**
* A <code>FilterLeafReader</code> contains another LeafReader, which it uses as its basic source of
Expand Down Expand Up @@ -363,6 +364,11 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException {
return in.getByteVectorValues(field);
}

@Override
public QuantizedByteVectorValues getQuantizedVectorValues(String field) throws IOException {
return in.getQuantizedVectorValues(field);
}

@Override
public void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
Expand Down
11 changes: 11 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/LeafReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.search.TopKnnCollector;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

/**
* {@code LeafReader} is an abstract class, providing an interface for accessing an index. Search of
Expand Down Expand Up @@ -225,6 +226,16 @@ public final PostingsEnum postings(Term term) throws IOException {
*/
public abstract ByteVectorValues getByteVectorValues(String field) throws IOException;

/**
* Returns {@link QuantizedByteVectorValues} for this field, or null if no {@link
* QuantizedByteVectorValues} were indexed. The returned instance should only be used by a single
* thread.
*
* @lucene.experimental
*/
public abstract QuantizedByteVectorValues getQuantizedVectorValues(String field)
throws IOException;

/**
* Return the k nearest neighbor documents as determined by comparison of their vector values for
* this field, to the given vector, by the field's similarity function. The score of each document
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

/**
* An {@link LeafReader} which reads multiple, parallel indexes. Each index added must have the same
Expand Down Expand Up @@ -463,6 +464,13 @@ public ByteVectorValues getByteVectorValues(String fieldName) throws IOException
return reader == null ? null : reader.getByteVectorValues(fieldName);
}

@Override
public QuantizedByteVectorValues getQuantizedVectorValues(String fieldName) throws IOException {
ensureOpen();
LeafReader reader = fieldToReader.get(fieldName);
return reader == null ? null : reader.getQuantizedVectorValues(fieldName);
}

@Override
public void searchNearestVectors(
String fieldName, float[] target, KnnCollector knnCollector, Bits acceptDocs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
Expand All @@ -40,9 +42,11 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SuppressForbidden;
import org.apache.lucene.util.TestVectorUtil;
import org.apache.lucene.util.hnsw.HnswGraphBuilder;

/**
* Test that uses a default/lucene Implementation of {@link QueryTimeout} to exit out long running
Expand Down Expand Up @@ -564,6 +568,87 @@ public void testByteVectorValues() throws IOException {
directory.close();
}

public void testQuantizedByteVectorValues() throws Exception {
Codec codec =
TestUtil.alwaysKnnVectorsFormat(
new Lucene99HnswScalarQuantizedVectorsFormat(10, HnswGraphBuilder.DEFAULT_BEAM_WIDTH));
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig().setCodec(codec));

int numDoc = atLeast(20);
int deletedDoc = atMost(5);
int dimension = atLeast(3);

for (int i = 0; i < numDoc; i++) {
Document doc = new Document();

float[] value = new float[dimension];
for (int j = 0; j < dimension; j++) {
value[j] = random().nextFloat();
}
FieldType fieldType =
KnnFloatVectorField.createFieldType(dimension, VectorSimilarityFunction.COSINE);
doc.add(new KnnFloatVectorField("vector", value, fieldType));

doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
writer.addDocument(doc);
}

writer.forceMerge(1);
writer.commit();

for (int i = 0; i < deletedDoc; i++) {
writer.deleteDocuments(new Term("id", Integer.toString(i)));
}

writer.close();

QueryTimeout queryTimeout;
if (random().nextBoolean()) {
queryTimeout = immediateQueryTimeout();
} else {
queryTimeout = infiniteQueryTimeout();
}
DirectoryReader directoryReader = DirectoryReader.open(directory);
DirectoryReader exitableDirectoryReader =
new ExitableDirectoryReader(directoryReader, queryTimeout);
IndexReader reader = new TestReader(getOnlyLeafReader(exitableDirectoryReader));

LeafReaderContext context = reader.leaves().get(0);
LeafReader leaf = context.reader();

if (queryTimeout.shouldExit()) {
expectThrows(
ExitingReaderException.class,
() -> {
KnnVectorValues values = leaf.getQuantizedVectorValues("vector");
scanAndRetrieve(leaf, values);
});

expectThrows(
ExitingReaderException.class,
() ->
leaf.searchNearestVectors(
"vector",
TestVectorUtil.randomVector(dimension),
5,
leaf.getLiveDocs(),
Integer.MAX_VALUE));
} else {
KnnVectorValues values = leaf.getQuantizedVectorValues("vector");
scanAndRetrieve(leaf, values);

leaf.searchNearestVectors(
"vector",
TestVectorUtil.randomVector(dimension),
5,
leaf.getLiveDocs(),
Integer.MAX_VALUE);
}
reader.close();
directory.close();
}

private static void scanAndRetrieve(LeafReader leaf, KnnVectorValues values) throws IOException {
KnnVectorValues.DocIndexIterator iter = values.iterator();
for (iter.nextDoc();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

public class TestSegmentToThreadMapping extends LuceneTestCase {

Expand Down Expand Up @@ -116,6 +117,11 @@ public ByteVectorValues getByteVectorValues(String field) {
return null;
}

@Override
public QuantizedByteVectorValues getQuantizedVectorValues(String field) {
return null;
}

@Override
public void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

/**
* Wraps a Terms with a {@link org.apache.lucene.index.LeafReader}, typically from term vectors.
Expand Down Expand Up @@ -179,6 +180,11 @@ public ByteVectorValues getByteVectorValues(String fieldName) {
return null;
}

@Override
public QuantizedByteVectorValues getQuantizedVectorValues(String field) throws IOException {
return null;
}

@Override
public void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util.RecyclingIntBlockAllocator;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

/**
* High-performance single-document main memory Apache Lucene fulltext search index.
Expand Down Expand Up @@ -1727,6 +1728,11 @@ public ByteVectorValues getByteVectorValues(String fieldName) {
return new MemoryByteVectorValues(info);
}

@Override
public QuantizedByteVectorValues getQuantizedVectorValues(String field) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;

/**
* This is a hack to make index sorting fast, with a {@link LeafReader} that always returns merge
Expand Down Expand Up @@ -239,6 +240,11 @@ public ByteVectorValues getByteVectorValues(String fieldName) throws IOException
return in.getByteVectorValues(fieldName);
}

@Override
public QuantizedByteVectorValues getQuantizedVectorValues(String field) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public void searchNearestVectors(
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
Expand Down
Loading