Skip to content

Introduce builder for $vectorSearch aggregation stage #1200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions driver-core/src/main/com/mongodb/client/model/Aggregates.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@
package com.mongodb.client.model;

import com.mongodb.MongoNamespace;
import com.mongodb.annotations.Beta;
import com.mongodb.client.model.densify.DensifyOptions;
import com.mongodb.client.model.densify.DensifyRange;
import com.mongodb.client.model.fill.FillOptions;
import com.mongodb.client.model.fill.FillOutputField;
import com.mongodb.client.model.geojson.Point;
import com.mongodb.client.model.search.FieldSearchPath;
import com.mongodb.client.model.search.SearchCollector;
import com.mongodb.client.model.search.SearchOperator;
import com.mongodb.client.model.search.SearchOptions;
import com.mongodb.client.model.search.VectorSearchOptions;
import com.mongodb.lang.Nullable;
import org.bson.BsonArray;
import org.bson.BsonBoolean;
Expand All @@ -34,6 +37,7 @@
import org.bson.BsonString;
import org.bson.BsonType;
import org.bson.BsonValue;
import org.bson.Document;
import org.bson.codecs.configuration.CodecRegistry;
import org.bson.conversions.Bson;

Expand All @@ -47,6 +51,7 @@
import static com.mongodb.client.model.GeoNearOptions.geoNearOptions;
import static com.mongodb.client.model.densify.DensifyOptions.densifyOptions;
import static com.mongodb.client.model.search.SearchOptions.searchOptions;
import static com.mongodb.client.model.search.VectorSearchOptions.vectorSearchOptions;
import static com.mongodb.internal.Iterables.concat;
import static com.mongodb.internal.client.model.Util.sizeAtLeast;
import static java.util.Arrays.asList;
Expand Down Expand Up @@ -933,6 +938,90 @@ public static Bson searchMeta(final SearchCollector collector, final SearchOptio
return new SearchStage("$searchMeta", notNull("collector", collector), notNull("options", options));
}

/**
* Creates a {@code $vectorSearch} pipeline stage supported by MongoDB Atlas.
* You may use the {@code $meta: "vectorSearchScore"} expression, e.g., via {@link Projections#metaVectorSearchScore(String)},
* to extract the relevance score assigned to each found document.
*
* @param queryVector The query vector. The number of dimensions must match that of the {@code index}.
* @param path The field to be searched.
* @param index The name of the index to use.
* @param numCandidates The number of candidates.
* @param limit The limit on the number of documents produced by the pipeline stage.
* @return The {@code $vectorSearch} pipeline stage.
*
* @mongodb.atlas.manual atlas-vector-search/vector-search-stage/ $vectorSearch
* @mongodb.atlas.manual atlas-search/scoring/ Scoring
* @mongodb.server.release 6.0.10
* @since 4.11
*/
@Beta(Beta.Reason.SERVER)
public static Bson vectorSearch(
final FieldSearchPath path,
final Iterable<Double> queryVector,
final String index,
final long numCandidates,
final long limit) {
return vectorSearch(notNull("path", path), notNull("queryVector", queryVector), notNull("index", index), numCandidates, limit,
vectorSearchOptions());
}

/**
* Creates a {@code $vectorSearch} pipeline stage supported by MongoDB Atlas.
* You may use the {@code $meta: "vectorSearchScore"} expression, e.g., via {@link Projections#metaVectorSearchScore(String)},
* to extract the relevance score assigned to each found document.
*
* @param queryVector The query vector. The number of dimensions must match that of the {@code index}.
* @param path The field to be searched.
* @param index The name of the index to use.
* @param numCandidates The number of candidates.
* @param limit The limit on the number of documents produced by the pipeline stage.
* @param options Optional {@code $vectorSearch} pipeline stage fields.
* @return The {@code $vectorSearch} pipeline stage.
*
* @mongodb.atlas.manual atlas-vector-search/vector-search-stage/ $vectorSearch
* @mongodb.atlas.manual atlas-search/scoring/ Scoring
* @mongodb.server.release 6.0.10
* @since 4.11
*/
@Beta(Beta.Reason.SERVER)
public static Bson vectorSearch(
final FieldSearchPath path,
final Iterable<Double> queryVector,
final String index,
final long numCandidates,
final long limit,
final VectorSearchOptions options) {
notNull("path", path);
notNull("queryVector", queryVector);
notNull("index", index);
notNull("options", options);
return new Bson() {
@Override
public <TDocument> BsonDocument toBsonDocument(final Class<TDocument> documentClass, final CodecRegistry codecRegistry) {
Document specificationDoc = new Document("path", path.toValue())
.append("queryVector", queryVector)
.append("index", index)
.append("numCandidates", numCandidates)
.append("limit", limit);
specificationDoc.putAll(options.toBsonDocument(documentClass, codecRegistry));
return new Document("$vectorSearch", specificationDoc).toBsonDocument(documentClass, codecRegistry);
}

@Override
public String toString() {
return "Stage{name=$vectorSearch"
+ ", path=" + path
+ ", queryVector=" + queryVector
+ ", index=" + index
+ ", numCandidates=" + numCandidates
+ ", limit=" + limit
+ ", options=" + options
+ '}';
}
};
}

/**
* Creates an $unset pipeline stage that removes/excludes fields from documents
*
Expand Down
20 changes: 20 additions & 0 deletions driver-core/src/main/com/mongodb/client/model/Filters.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.mongodb.client.model;

import com.mongodb.annotations.Beta;
import com.mongodb.client.model.geojson.Geometry;
import com.mongodb.client.model.geojson.Point;
import com.mongodb.client.model.search.SearchCollector;
Expand Down Expand Up @@ -84,11 +85,30 @@ public static <TItem> Bson eq(@Nullable final TItem value) {
* @param <TItem> the value type
* @return the filter
* @mongodb.driver.manual reference/operator/query/eq $eq
* @see #eqFull(String, Object)
*/
public static <TItem> Bson eq(final String fieldName, @Nullable final TItem value) {
return new SimpleEncodingFilter<>(fieldName, value);
}

/**
* Creates a filter that matches all documents where the value of the field name equals the specified value.
* Unlike {@link #eq(String, Object)}, this method creates a full form of {@code $eq}.
* This method exists temporarily until Atlas starts supporting the short form of {@code $eq}.
* It will likely be removed in the next driver release.
*
* @param fieldName the field name
* @param value the value, which may be null
* @param <TItem> the value type
* @return the filter
* @mongodb.driver.manual reference/operator/query/eq $eq
* @since 4.11
*/
@Beta(Beta.Reason.SERVER)
public static <TItem> Bson eqFull(final String fieldName, @Nullable final TItem value) {
return new OperatorFilter<>("$eq", fieldName, value);
Copy link
Member Author

@stIncMale stIncMale Sep 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

$vectorSearch does not accept the short form of $eq, which we are currently using. We have the following options:

  1. Change the implementation of Filters.eq, which is what I did initially. Turned out, this is not an option, because there are specification tests that expect equality to be expressed in its short variant.
  2. Introduce a new Filters.eqFull (I am not sure how to name it), and document that users are not to use Filters.eq with VectorSearchOptions.filter. This is really ugly.
  3. Introduce a whole new API for filters. Even if we ever do this, we will definitely not do this as part of the current task.
  4. Patch filters on the fly. Sounds like a bad idea.
  5. Figure out if the current server behavior is intentional, or is a bug.

P.S. More thoughts on the current approach, which turned out to be unviable anyway. On one hand, nobody should care whether we use a full form or not. On the other hand, for some reason we documented that we are not using the full form. This, combined with the regex behavior, allows users to abuse Filters.eq to dynamically use either equality, or regex matching, even though the method is clearly intended only for equality (Filters.regex is intended for regex matching). This makes the change breaking, but I am still willing to do it.

Copy link
Member Author

@stIncMale stIncMale Sep 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not supporting the short $eq form was intentional. Atlas will support it in the future, but not now.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we annotate this as Beta, and remove it once Atlas supports the short form?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alexander Lukyanchikov said that allAtlas clusters will get the updated behavior, unless they are "pinned on an older version for some reason" (I am not even sure if this can be done by a user). So this should work.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A ticket to remove Filters.eqFull https://jira.mongodb.org/browse/JAVA-5174.

}

/**
* Creates a filter that matches all documents where the value of the field name does not equal the specified value.
*
Expand Down
20 changes: 20 additions & 0 deletions driver-core/src/main/com/mongodb/client/model/Projections.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@

package com.mongodb.client.model;

import com.mongodb.annotations.Beta;
import com.mongodb.client.model.search.FieldSearchPath;
import com.mongodb.client.model.search.SearchCollector;
import com.mongodb.client.model.search.SearchCount;
import com.mongodb.client.model.search.SearchOperator;
import com.mongodb.client.model.search.SearchOptions;
import com.mongodb.client.model.search.VectorSearchOptions;
import org.bson.BsonArray;
import org.bson.BsonDocument;
import org.bson.BsonInt32;
Expand Down Expand Up @@ -163,6 +166,7 @@ public static Bson elemMatch(final String fieldName, final Bson filter) {
* @since 4.1
* @see #metaTextScore(String)
* @see #metaSearchScore(String)
* @see #metaVectorSearchScore(String)
* @see #metaSearchHighlights(String)
*/
public static Bson meta(final String fieldName, final String metaFieldName) {
Expand Down Expand Up @@ -196,6 +200,22 @@ public static Bson metaSearchScore(final String fieldName) {
return meta(fieldName, "searchScore");
}

/**
* Creates a projection to the given field name of the vectorSearchScore,
* for use with {@link Aggregates#vectorSearch(FieldSearchPath, Iterable, String, long, long, VectorSearchOptions)}.
* Calling this method is equivalent to calling {@link #meta(String, String)} with {@code "vectorSearchScore"} as the second argument.
*
* @param fieldName the field name
* @return the projection
* @mongodb.atlas.manual atlas-search/scoring/ Scoring
* @mongodb.server.release 6.0.10
* @since 4.11
*/
@Beta(Beta.Reason.SERVER)
public static Bson metaVectorSearchScore(final String fieldName) {
return meta(fieldName, "vectorSearchScore");
}

/**
* Creates a projection to the given field name of the searchHighlights,
* for use with {@link Aggregates#search(SearchOperator, SearchOptions)} / {@link Aggregates#search(SearchCollector, SearchOptions)}.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright 2008-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.client.model.search;

import com.mongodb.annotations.Immutable;
import com.mongodb.internal.client.model.AbstractConstructibleBson;
import org.bson.BsonDocument;
import org.bson.Document;
import org.bson.conversions.Bson;

import static com.mongodb.assertions.Assertions.notNull;

final class VectorSearchConstructibleBson extends AbstractConstructibleBson<VectorSearchConstructibleBson> implements VectorSearchOptions {
/**
* An {@linkplain Immutable immutable} {@link BsonDocument#isEmpty() empty} instance.
*/
static final VectorSearchConstructibleBson EMPTY_IMMUTABLE = new VectorSearchConstructibleBson(AbstractConstructibleBson.EMPTY_IMMUTABLE);

VectorSearchConstructibleBson(final Bson base) {
super(base);
}

private VectorSearchConstructibleBson(final Bson base, final Document appended) {
super(base, appended);
}

@Override
protected VectorSearchConstructibleBson newSelf(final Bson base, final Document appended) {
return new VectorSearchConstructibleBson(base, appended);
}

@Override
public VectorSearchOptions filter(final Bson filter) {
return newAppended("filter", notNull("name", filter));
}

@Override
public VectorSearchOptions option(final String name, final Object value) {
return newAppended(notNull("name", name), notNull("value", value));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright 2008-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.client.model.search;

import com.mongodb.annotations.Beta;
import com.mongodb.annotations.Sealed;
import com.mongodb.client.model.Aggregates;
import com.mongodb.client.model.Filters;
import org.bson.conversions.Bson;

/**
* Represents optional fields of the {@code $vectorSearch} pipeline stage of an aggregation pipeline.
*
* @see Aggregates#vectorSearch(FieldSearchPath, Iterable, String, long, long, VectorSearchOptions)
* @mongodb.atlas.manual atlas-vector-search/vector-search-stage/ $vectorSearch
* @mongodb.server.release 6.0.10
* @since 4.11
*/
@Sealed
@Beta(Beta.Reason.SERVER)
public interface VectorSearchOptions extends Bson {
/**
* Creates a new {@link VectorSearchOptions} with the filter specified.
*
* @param filter A filter that is applied before applying the
* {@link Aggregates#vectorSearch(FieldSearchPath, Iterable, String, long, long, VectorSearchOptions) queryVector}.
* One may use {@link Filters} to create this filter, though not all filters may be supported.
* See the MongoDB documentation for the list of supported filters.
* <p>
* Note that for now one has to use {@link Filters#eqFull(String, Object)} instead of {@link Filters#eq(String, Object)}.</p>
* @return A new {@link VectorSearchOptions}.
*/
VectorSearchOptions filter(Bson filter);

/**
* Creates a new {@link VectorSearchOptions} with the specified option in situations when there is no builder method
* that better satisfies your needs.
* This method cannot be used to validate the syntax.
* <p>
* <i>Example</i><br>
* The following code creates two functionally equivalent {@link VectorSearchOptions} objects,
* though they may not be {@linkplain Object#equals(Object) equal}.
* <pre>{@code
* VectorSearchOptions options1 = VectorSearchOptions.vectorSearchOptions()
* .filter(Filters.lt("fieldName", 1));
* VectorSearchOptions options2 = VectorSearchOptions.vectorSearchOptions()
* .option("filter", Filters.lt("fieldName", 1));
* }</pre>
*
* @param name The option name.
* @param value The option value.
* @return A new {@link VectorSearchOptions}.
*/
VectorSearchOptions option(String name, Object value);

/**
* Returns {@link VectorSearchOptions} that represents server defaults.
*
* @return {@link VectorSearchOptions} that represents server defaults.
*/
static VectorSearchOptions vectorSearchOptions() {
return VectorSearchConstructibleBson.EMPTY_IMMUTABLE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
*
* @see com.mongodb.client.model.Aggregates#search(SearchOperator, SearchOptions)
* @see com.mongodb.client.model.Aggregates#search(SearchCollector, SearchOptions)
* @see com.mongodb.client.model.Aggregates#vectorSearch(FieldSearchPath, java.lang.Iterable, java.lang.String, long, long, VectorSearchOptions)
* @mongodb.atlas.manual atlas-search/ Atlas Search
* @mongodb.atlas.manual atlas-search/query-syntax/ Atlas Search aggregation pipeline stages
* @since 4.7
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class FiltersFunctionalSpecification extends OperationFunctionalSpecification {
def 'eq'() {
expect:
find(eq('x', 1)) == [a]
find(eq('_id', 2)) == [b]
find(eq(2)) == [b]
}

Expand Down
Loading