Skip to content

Commit cf17110

Browse files
[8.19] ES|QL dense vector field type support (elastic#126456) (elastic#128495)
* ES|QL dense vector field type support (elastic#126456) (cherry picked from commit b759161) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java # x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java # x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java # x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java # x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java # x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java * [CI] Auto commit changes from spotless * Fix switch expression --------- Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
1 parent 6482b97 commit cf17110

File tree

28 files changed

+709
-51
lines changed

28 files changed

+709
-51
lines changed

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@
1111

1212
import org.apache.lucene.index.BinaryDocValues;
1313
import org.apache.lucene.index.DocValues;
14+
import org.apache.lucene.index.FloatVectorValues;
1415
import org.apache.lucene.index.LeafReaderContext;
1516
import org.apache.lucene.index.NumericDocValues;
1617
import org.apache.lucene.index.SortedDocValues;
1718
import org.apache.lucene.index.SortedNumericDocValues;
1819
import org.apache.lucene.index.SortedSetDocValues;
1920
import org.apache.lucene.util.BytesRef;
2021
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
22+
import org.elasticsearch.index.IndexVersion;
2123
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2224
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2325
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -26,6 +28,7 @@
2628
import org.elasticsearch.index.mapper.BlockLoader.DoubleBuilder;
2729
import org.elasticsearch.index.mapper.BlockLoader.IntBuilder;
2830
import org.elasticsearch.index.mapper.BlockLoader.LongBuilder;
31+
import org.elasticsearch.index.mapper.vectors.VectorEncoderDecoder;
2932
import org.elasticsearch.search.fetch.StoredFieldsSpec;
3033

3134
import java.io.IOException;
@@ -504,6 +507,85 @@ public String toString() {
504507
}
505508
}
506509

510+
public static class DenseVectorBlockLoader extends DocValuesBlockLoader {
511+
private final String fieldName;
512+
private final int dimensions;
513+
514+
public DenseVectorBlockLoader(String fieldName, int dimensions) {
515+
this.fieldName = fieldName;
516+
this.dimensions = dimensions;
517+
}
518+
519+
@Override
520+
public Builder builder(BlockFactory factory, int expectedCount) {
521+
return factory.denseVectors(expectedCount, dimensions);
522+
}
523+
524+
@Override
525+
public AllReader reader(LeafReaderContext context) throws IOException {
526+
FloatVectorValues floatVectorValues = context.reader().getFloatVectorValues(fieldName);
527+
if (floatVectorValues != null) {
528+
return new DenseVectorValuesBlockReader(floatVectorValues, dimensions);
529+
}
530+
return new ConstantNullsReader();
531+
}
532+
}
533+
534+
private static class DenseVectorValuesBlockReader extends BlockDocValuesReader {
535+
private final FloatVectorValues floatVectorValues;
536+
private final int dimensions;
537+
538+
DenseVectorValuesBlockReader(FloatVectorValues floatVectorValues, int dimensions) {
539+
this.floatVectorValues = floatVectorValues;
540+
this.dimensions = dimensions;
541+
}
542+
543+
@Override
544+
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException {
545+
// Doubles from doc values ensures that the values are in order
546+
try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count(), dimensions)) {
547+
for (int i = 0; i < docs.count(); i++) {
548+
int doc = docs.get(i);
549+
if (doc < floatVectorValues.docID()) {
550+
throw new IllegalStateException("docs within same block must be in order");
551+
}
552+
read(doc, builder);
553+
}
554+
return builder.build();
555+
}
556+
}
557+
558+
@Override
559+
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException {
560+
read(docId, (BlockLoader.FloatBuilder) builder);
561+
}
562+
563+
private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException {
564+
if (floatVectorValues.advance(doc) == doc) {
565+
builder.beginPositionEntry();
566+
float[] floats = floatVectorValues.vectorValue();
567+
assert floats.length == dimensions
568+
: "unexpected dimensions for vector value; expected " + dimensions + " but got " + floats.length;
569+
for (float aFloat : floats) {
570+
builder.appendFloat(aFloat);
571+
}
572+
builder.endPositionEntry();
573+
} else {
574+
builder.appendNull();
575+
}
576+
}
577+
578+
@Override
579+
public int docId() {
580+
return floatVectorValues.docID();
581+
}
582+
583+
@Override
584+
public String toString() {
585+
return "BlockDocValuesReader.FloatVectorValuesBlockReader";
586+
}
587+
}
588+
507589
public static class BytesRefsFromOrdsBlockLoader extends DocValuesBlockLoader {
508590
private final String fieldName;
509591

@@ -752,6 +834,94 @@ public String toString() {
752834
}
753835
}
754836

837+
public static class DenseVectorFromBinaryBlockLoader extends DocValuesBlockLoader {
838+
private final String fieldName;
839+
private final int dims;
840+
private final IndexVersion indexVersion;
841+
842+
public DenseVectorFromBinaryBlockLoader(String fieldName, int dims, IndexVersion indexVersion) {
843+
this.fieldName = fieldName;
844+
this.dims = dims;
845+
this.indexVersion = indexVersion;
846+
}
847+
848+
@Override
849+
public Builder builder(BlockFactory factory, int expectedCount) {
850+
return factory.denseVectors(expectedCount, dims);
851+
}
852+
853+
@Override
854+
public AllReader reader(LeafReaderContext context) throws IOException {
855+
BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName);
856+
if (docValues == null) {
857+
return new ConstantNullsReader();
858+
}
859+
return new DenseVectorFromBinary(docValues, dims, indexVersion);
860+
}
861+
}
862+
863+
private static class DenseVectorFromBinary extends BlockDocValuesReader {
864+
private final BinaryDocValues docValues;
865+
private final IndexVersion indexVersion;
866+
private final int dimensions;
867+
private final float[] scratch;
868+
869+
private int docID = -1;
870+
871+
DenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) {
872+
this.docValues = docValues;
873+
this.scratch = new float[dims];
874+
this.indexVersion = indexVersion;
875+
this.dimensions = dims;
876+
}
877+
878+
@Override
879+
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException {
880+
try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count(), dimensions)) {
881+
for (int i = 0; i < docs.count(); i++) {
882+
int doc = docs.get(i);
883+
if (doc < docID) {
884+
throw new IllegalStateException("docs within same block must be in order");
885+
}
886+
read(doc, builder);
887+
}
888+
return builder.build();
889+
}
890+
}
891+
892+
@Override
893+
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException {
894+
read(docId, (BlockLoader.FloatBuilder) builder);
895+
}
896+
897+
private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException {
898+
this.docID = doc;
899+
if (false == docValues.advanceExact(doc)) {
900+
builder.appendNull();
901+
return;
902+
}
903+
BytesRef bytesRef = docValues.binaryValue();
904+
assert bytesRef.length > 0;
905+
VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch);
906+
907+
builder.beginPositionEntry();
908+
for (float value : scratch) {
909+
builder.appendFloat(value);
910+
}
911+
builder.endPositionEntry();
912+
}
913+
914+
@Override
915+
public int docId() {
916+
return docID;
917+
}
918+
919+
@Override
920+
public String toString() {
921+
return "DenseVectorFromBinary.Bytes";
922+
}
923+
}
924+
755925
public static class BooleansBlockLoader extends DocValuesBlockLoader {
756926
private final String fieldName;
757927

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,11 @@ interface BlockFactory {
373373
*/
374374
DoubleBuilder doubles(int expectedCount);
375375

376+
/**
377+
* Build a builder to load dense vectors without any loading constraints.
378+
*/
379+
FloatBuilder denseVectors(int expectedVectorsCount, int dimensions);
380+
376381
/**
377382
* Build a builder to load ints as loaded from doc values.
378383
* Doc values load ints in sorted order.

server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,49 @@ public String toString() {
302302
}
303303
}
304304

305+
/**
306+
* Load {@code float}s from {@code _source}.
307+
*/
308+
public static class DenseVectorBlockLoader extends SourceBlockLoader {
309+
private final int dimensions;
310+
311+
public DenseVectorBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, int dimensions) {
312+
super(fetcher, lookup);
313+
this.dimensions = dimensions;
314+
}
315+
316+
@Override
317+
public Builder builder(BlockFactory factory, int expectedCount) {
318+
return factory.denseVectors(expectedCount, dimensions);
319+
}
320+
321+
@Override
322+
public RowStrideReader rowStrideReader(LeafReaderContext context, DocIdSetIterator iter) {
323+
return new DenseVectors(fetcher, iter);
324+
}
325+
326+
@Override
327+
protected String name() {
328+
return "DenseVectors";
329+
}
330+
}
331+
332+
private static class DenseVectors extends BlockSourceReader {
333+
DenseVectors(ValueFetcher fetcher, DocIdSetIterator iter) {
334+
super(fetcher, iter);
335+
}
336+
337+
@Override
338+
protected void append(BlockLoader.Builder builder, Object v) {
339+
((BlockLoader.FloatBuilder) builder).appendFloat(((Number) v).floatValue());
340+
}
341+
342+
@Override
343+
public String toString() {
344+
return "BlockSourceReader.DenseVectors";
345+
}
346+
}
347+
305348
/**
306349
* Load {@code int}s from {@code _source}.
307350
*/

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,26 @@
5050
import org.elasticsearch.index.fielddata.FieldDataContext;
5151
import org.elasticsearch.index.fielddata.IndexFieldData;
5252
import org.elasticsearch.index.mapper.ArraySourceValueFetcher;
53+
import org.elasticsearch.index.mapper.BlockDocValuesReader;
54+
import org.elasticsearch.index.mapper.BlockLoader;
55+
import org.elasticsearch.index.mapper.BlockSourceReader;
5356
import org.elasticsearch.index.mapper.DocumentParserContext;
5457
import org.elasticsearch.index.mapper.FieldMapper;
5558
import org.elasticsearch.index.mapper.MappedFieldType;
5659
import org.elasticsearch.index.mapper.Mapper;
5760
import org.elasticsearch.index.mapper.MapperBuilderContext;
5861
import org.elasticsearch.index.mapper.MapperParsingException;
5962
import org.elasticsearch.index.mapper.MappingParser;
63+
import org.elasticsearch.index.mapper.NumberFieldMapper;
6064
import org.elasticsearch.index.mapper.SimpleMappedFieldType;
6165
import org.elasticsearch.index.mapper.SourceLoader;
66+
import org.elasticsearch.index.mapper.SourceValueFetcher;
6267
import org.elasticsearch.index.mapper.TextSearchInfo;
6368
import org.elasticsearch.index.mapper.ValueFetcher;
6469
import org.elasticsearch.index.query.SearchExecutionContext;
6570
import org.elasticsearch.search.DocValueFormat;
6671
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
72+
import org.elasticsearch.search.lookup.Source;
6773
import org.elasticsearch.search.vectors.DenseVectorQuery;
6874
import org.elasticsearch.search.vectors.ESDiversifyingChildrenByteKnnVectorQuery;
6975
import org.elasticsearch.search.vectors.ESDiversifyingChildrenFloatKnnVectorQuery;
@@ -84,10 +90,12 @@
8490
import java.time.ZoneId;
8591
import java.util.Arrays;
8692
import java.util.HexFormat;
93+
import java.util.List;
8794
import java.util.Locale;
8895
import java.util.Map;
8996
import java.util.Objects;
9097
import java.util.Optional;
98+
import java.util.Set;
9199
import java.util.function.Function;
92100
import java.util.function.Supplier;
93101
import java.util.stream.Stream;
@@ -324,7 +332,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) {
324332
indexed.getValue(),
325333
similarity.getValue(),
326334
indexOptions.getValue(),
327-
meta.getValue()
335+
meta.getValue(),
336+
context.isSourceSynthetic()
328337
),
329338
builderParams(this, context),
330339
indexOptions.getValue(),
@@ -2053,6 +2062,7 @@ public static final class DenseVectorFieldType extends SimpleMappedFieldType {
20532062
private final VectorSimilarity similarity;
20542063
private final IndexVersion indexVersionCreated;
20552064
private final IndexOptions indexOptions;
2065+
private final boolean isSyntheticSource;
20562066

20572067
public DenseVectorFieldType(
20582068
String name,
@@ -2062,7 +2072,8 @@ public DenseVectorFieldType(
20622072
boolean indexed,
20632073
VectorSimilarity similarity,
20642074
IndexOptions indexOptions,
2065-
Map<String, String> meta
2075+
Map<String, String> meta,
2076+
boolean isSyntheticSource
20662077
) {
20672078
super(name, indexed, false, indexed == false, TextSearchInfo.NONE, meta);
20682079
this.elementType = elementType;
@@ -2071,6 +2082,7 @@ public DenseVectorFieldType(
20712082
this.similarity = similarity;
20722083
this.indexVersionCreated = indexVersionCreated;
20732084
this.indexOptions = indexOptions;
2085+
this.isSyntheticSource = isSyntheticSource;
20742086
}
20752087

20762088
@Override
@@ -2329,6 +2341,44 @@ ElementType getElementType() {
23292341
public IndexOptions getIndexOptions() {
23302342
return indexOptions;
23312343
}
2344+
2345+
@Override
2346+
public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
2347+
if (elementType != ElementType.FLOAT) {
2348+
// Just float dense vector support for now
2349+
return null;
2350+
}
2351+
2352+
if (indexed) {
2353+
return new BlockDocValuesReader.DenseVectorBlockLoader(name(), dims);
2354+
}
2355+
2356+
if (hasDocValues() && (blContext.fieldExtractPreference() == FieldExtractPreference.DOC_VALUES || isSyntheticSource)) {
2357+
return new BlockDocValuesReader.DenseVectorFromBinaryBlockLoader(name(), dims, indexVersionCreated);
2358+
}
2359+
2360+
BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupMatchingAll();
2361+
return new BlockSourceReader.DenseVectorBlockLoader(sourceValueFetcher(blContext.sourcePaths(name())), lookup, dims);
2362+
}
2363+
2364+
private SourceValueFetcher sourceValueFetcher(Set<String> sourcePaths) {
2365+
return new SourceValueFetcher(sourcePaths, null) {
2366+
@Override
2367+
protected Object parseSourceValue(Object value) {
2368+
if (value.equals("")) {
2369+
return null;
2370+
}
2371+
return NumberFieldMapper.NumberType.FLOAT.parse(value, false);
2372+
}
2373+
2374+
@Override
2375+
public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValues) {
2376+
List<Object> result = super.fetchValues(source, doc, ignoredValues);
2377+
assert result.size() == dims : "Unexpected number of dimensions; got " + result.size() + " but expected " + dims;
2378+
return result;
2379+
}
2380+
};
2381+
}
23322382
}
23332383

23342384
private final IndexOptions indexOptions;
@@ -2383,7 +2433,8 @@ public void parse(DocumentParserContext context) throws IOException {
23832433
fieldType().indexed,
23842434
fieldType().similarity,
23852435
fieldType().indexOptions,
2386-
fieldType().meta()
2436+
fieldType().meta(),
2437+
fieldType().isSyntheticSource
23872438
);
23882439
Mapper update = new DenseVectorFieldMapper(
23892440
leafName(),

0 commit comments

Comments
 (0)