From e861cc1aac82895d7edb1712da1ad2cbe60a11cf Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Mon, 31 Oct 2022 15:08:57 -0700 Subject: [PATCH] Add integration tests for neural query (#47) Adds a series of integration tests for neural query type. Adds shared functionality to base class as well as a utility class. Increase test cluster heap to 1 GB. Signed-off-by: John Mazanec (cherry picked from commit e30285be2529180586135dcc68b924fa9670a50b) --- build.gradle | 4 + .../common/BaseNeuralSearchIT.java | 263 ++++++++++++- .../neuralsearch/plugin/TestUtils.java | 30 ++ .../plugin/query/NeuralQueryIT.java | 361 ++++++++++++++++++ src/test/resources/model/all-MiniLM-L6-v2.zip | 3 - .../resources/processor/IndexMappings.json | 8 +- .../processor/UploadModelRequestBody.json | 8 +- 7 files changed, 656 insertions(+), 21 deletions(-) create mode 100644 src/test/java/org/opensearch/neuralsearch/plugin/query/NeuralQueryIT.java delete mode 100644 src/test/resources/model/all-MiniLM-L6-v2.zip diff --git a/build.gradle b/build.gradle index 37a7ef10d..0784d23be 100644 --- a/build.gradle +++ b/build.gradle @@ -232,6 +232,10 @@ testClusters.integTest { debugPort += 1 } } + + // Increase heap size from default of 512mb to 1gb. When heap size is 512mb, our integ tests sporadically fail due + // to ml-commons memory circuit breaker exception + jvmArgs("-Xms1g", "-Xmx1g") } // Remote Integration Tests diff --git a/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java b/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java index 1d1023d02..914acb345 100644 --- a/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java +++ b/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java @@ -6,15 +6,22 @@ package org.opensearch.neuralsearch.common; import static org.apache.http.entity.ContentType.APPLICATION_JSON; +import static org.opensearch.neuralsearch.common.VectorUtil.vectorAsListToArray; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.function.Predicate; +import java.util.stream.Collectors; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.SneakyThrows; import org.apache.commons.lang3.StringUtils; import org.apache.http.Header; @@ -28,14 +35,20 @@ import org.opensearch.client.Response; import org.opensearch.client.RestClient; import org.opensearch.client.WarningsHandler; +import org.opensearch.common.Strings; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.XContentType; -import org.opensearch.test.rest.OpenSearchRestTestCase; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.knn.index.SpaceType; +import org.opensearch.neuralsearch.OpenSearchSecureRestTestCase; +import org.opensearch.rest.RestStatus; import com.google.common.collect.ImmutableList; -public abstract class BaseNeuralSearchIT extends OpenSearchRestTestCase { +public abstract class BaseNeuralSearchIT extends OpenSearchSecureRestTestCase { private static final Locale LOCALE = Locale.ROOT; @@ -45,7 +58,7 @@ public abstract class BaseNeuralSearchIT extends OpenSearchRestTestCase { protected final ClassLoader classLoader = this.getClass().getClassLoader(); - public String uploadModel(String requestBody) throws Exception { + protected String uploadModel(String requestBody) throws Exception { Response uploadResponse = makeRequest( client(), "POST", @@ -74,7 +87,7 @@ public String uploadModel(String requestBody) throws Exception { return modelId; } - public void loadModel(String modelId) throws IOException, InterruptedException { + protected void loadModel(String modelId) throws IOException, InterruptedException { Response uploadResponse = makeRequest( client(), "POST", @@ -100,6 +113,56 @@ public void loadModel(String modelId) throws IOException, InterruptedException { } } + /** + * Upload default model and load into the cluster + * + * @return modelID + */ + @SneakyThrows + protected String prepareModel() { + String requestBody = Files.readString(Path.of(classLoader.getResource("processor/UploadModelRequestBody.json").toURI())); + String modelId = uploadModel(requestBody); + loadModel(modelId); + return modelId; + } + + /** + * Execute model inference on the provided query text + * + * @param modelId id of model to run inference + * @param queryText text to be transformed to a model + * @return text embedding + */ + @SuppressWarnings("unchecked") + @SneakyThrows + protected float[] runInference(String modelId, String queryText) { + Response inferenceResponse = makeRequest( + client(), + "POST", + String.format(LOCALE, "/_plugins/_ml/_predict/text_embedding/%s", modelId), + null, + toHttpEntity(String.format(LOCALE, "{\"text_docs\": [\"%s\"],\"target_response\": [\"sentence_embedding\"]}", queryText)), + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "Kibana")) + ); + + Map inferenceResJson = XContentHelper.convertToMap( + XContentFactory.xContent(XContentType.JSON), + EntityUtils.toString(inferenceResponse.getEntity()), + false + ); + + Object inference_results = inferenceResJson.get("inference_results"); + assertTrue(inference_results instanceof List); + List inferenceResultsAsMap = (List) inference_results; + assertEquals(1, inferenceResultsAsMap.size()); + Map result = (Map) inferenceResultsAsMap.get(0); + List output = (List) result.get("output"); + assertEquals(1, output.size()); + Map map = (Map) output.get(0); + List data = ((List) map.get("data")).stream().map(Double::floatValue).collect(Collectors.toList()); + return vectorAsListToArray(data); + } + protected void createIndexWithConfiguration(String indexName, String indexConfiguration, String pipelineName) throws Exception { if (StringUtils.isNotBlank(pipelineName)) { indexConfiguration = String.format(LOCALE, indexConfiguration, pipelineName); @@ -121,7 +184,7 @@ protected void createIndexWithConfiguration(String indexName, String indexConfig assertEquals(indexName, node.get("index").toString()); } - public void createPipelineProcessor(String modelId, String pipelineName) throws Exception { + protected void createPipelineProcessor(String modelId, String pipelineName) throws Exception { Response pipelineCreateResponse = makeRequest( client(), "PUT", @@ -144,7 +207,155 @@ public void createPipelineProcessor(String modelId, String pipelineName) throws assertEquals("true", node.get("acknowledged").toString()); } - public Map getTaskQueryResponse(String taskId) throws IOException { + /** + * Get the number of documents in a particular index + * + * @param indexName name of index + * @return number of documents indexed to that index + */ + @SneakyThrows + protected int getDocCount(String indexName) { + Request request = new Request("GET", "/" + indexName + "/_count"); + Response response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + String responseBody = EntityUtils.toString(response.getEntity()); + Map responseMap = createParser(XContentType.JSON.xContent(), responseBody).map(); + return (Integer) responseMap.get("count"); + } + + /** + * Execute a search request initialized from a neural query builder + * + * @param index Index to search against + * @param queryBuilder queryBuilder to produce source of query + * @param resultSize number of results to return in the search + * @return Search results represented as a map + */ + protected Map search(String index, QueryBuilder queryBuilder, int resultSize) { + return search(index, queryBuilder, null, resultSize); + } + + /** + * Execute a search request initialized from a neural query builder that can add a rescore query to the request + * + * @param index Index to search against + * @param queryBuilder queryBuilder to produce source of query + * @param rescorer used for rescorer query builder + * @param resultSize number of results to return in the search + * @return Search results represented as a map + */ + @SneakyThrows + protected Map search(String index, QueryBuilder queryBuilder, QueryBuilder rescorer, int resultSize) { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("query"); + queryBuilder.toXContent(builder, ToXContent.EMPTY_PARAMS); + + if (rescorer != null) { + builder.startObject("rescore").startObject("query").field("query_weight", 0.0f).field("rescore_query"); + rescorer.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject().endObject(); + } + + builder.endObject(); + + Request request = new Request("POST", "/" + index + "/_search"); + request.addParameter("size", Integer.toString(resultSize)); + request.setJsonEntity(Strings.toString(builder)); + + Response response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + + String responseBody = EntityUtils.toString(response.getEntity()); + + return XContentHelper.convertToMap(XContentFactory.xContent(XContentType.JSON), responseBody, false); + } + + /** + * Add a set of knn vectors + * + * @param index Name of the index + * @param docId ID of document to be added + * @param vectorFieldNames List of vectir fields to be added + * @param vectors List of vectors corresponding to those fields + */ + protected void addKnnDoc(String index, String docId, List vectorFieldNames, List vectors) { + addKnnDoc(index, docId, vectorFieldNames, vectors, Collections.emptyList(), Collections.emptyList()); + } + + /** + * Add a set of knn vectors and text to an index + * + * @param index Name of the index + * @param docId ID of document to be added + * @param vectorFieldNames List of vectir fields to be added + * @param vectors List of vectors corresponding to those fields + * @param textFieldNames List of text fields to be added + * @param texts List of text corresponding to those fields + */ + @SneakyThrows + protected void addKnnDoc( + String index, + String docId, + List vectorFieldNames, + List vectors, + List textFieldNames, + List texts + ) { + Request request = new Request("POST", "/" + index + "/_doc/" + docId + "?refresh=true"); + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + for (int i = 0; i < vectorFieldNames.size(); i++) { + builder.field(vectorFieldNames.get(i), vectors.get(i)); + } + + for (int i = 0; i < textFieldNames.size(); i++) { + builder.field(textFieldNames.get(i), texts.get(i)); + } + builder.endObject(); + + request.setJsonEntity(Strings.toString(builder)); + Response response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", RestStatus.CREATED, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + } + + /** + * Parse the first returned hit from a search response as a map + * + * @param searchResponseAsMap Complete search response as a map + * @return Map of first internal hit from the search + */ + @SuppressWarnings("unchecked") + protected Map getFirstInnerHit(Map searchResponseAsMap) { + Map hits1map = (Map) searchResponseAsMap.get("hits"); + List hits2List = (List) hits1map.get("hits"); + assertTrue(hits2List.size() > 0); + return (Map) hits2List.get(0); + } + + /** + * Create a k-NN index from a list of KNNFieldConfigs + * + * @param indexName of index to be created + * @param knnFieldConfigs list of configs specifying field + */ + @SneakyThrows + protected void prepareKnnIndex(String indexName, List knnFieldConfigs) { + createIndexWithConfiguration(indexName, buildIndexConfiguration(knnFieldConfigs), ""); + } + + /** + * Computes the expected distance between an indexVector and query text without using the neural query type. + * + * @param modelId ID of model to run inference + * @param indexVector vector to compute score against + * @param spaceType Space to measure distance + * @param queryText Text to produce query vector from + * @return Expected OpenSearch score for this indexVector + */ + protected float computeExpectedScore(String modelId, float[] indexVector, SpaceType spaceType, String queryText) { + float[] queryVector = runInference(modelId, queryText); + return spaceType.getVectorSimilarityFunction().compare(queryVector, indexVector); + } + + protected Map getTaskQueryResponse(String taskId) throws IOException { Response taskQueryResponse = makeRequest( client(), "GET", @@ -160,12 +371,37 @@ public Map getTaskQueryResponse(String taskId) throws IOExceptio ); } - public boolean checkComplete(Map node) { + protected boolean checkComplete(Map node) { Predicate> predicate = x -> node.get("error") != null || "COMPLETED".equals(String.valueOf(node.get("state"))); return predicate.test(node); } - public static Response makeRequest( + @SneakyThrows + private String buildIndexConfiguration(List knnFieldConfigs) { + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .startObject("settings") + .field("number_of_shards", 3) + .field("index.knn", true) + .endObject() + .startObject("mappings") + .startObject("properties"); + + for (KNNFieldConfig knnFieldConfig : knnFieldConfigs) { + xContentBuilder.startObject(knnFieldConfig.getName()) + .field("type", "knn_vector") + .field("dimension", Integer.toString(knnFieldConfig.getDimension())) + .startObject("method") + .field("engine", "lucene") + .field("space_type", knnFieldConfig.getSpaceType().getValue()) + .field("name", "hnsw") + .endObject() + .endObject(); + } + return Strings.toString(xContentBuilder.endObject().endObject().endObject()); + } + + protected static Response makeRequest( RestClient client, String method, String endpoint, @@ -176,7 +412,7 @@ public static Response makeRequest( return makeRequest(client, method, endpoint, params, entity, headers, false); } - public static Response makeRequest( + protected static Response makeRequest( RestClient client, String method, String endpoint, @@ -203,8 +439,15 @@ public static Response makeRequest( return client.performRequest(request); } - public static HttpEntity toHttpEntity(String jsonString) { + protected static HttpEntity toHttpEntity(String jsonString) { return new StringEntity(jsonString, APPLICATION_JSON); } + @AllArgsConstructor + @Getter + protected static class KNNFieldConfig { + private final String name; + private final Integer dimension; + private final SpaceType spaceType; + } } diff --git a/src/test/java/org/opensearch/neuralsearch/plugin/TestUtils.java b/src/test/java/org/opensearch/neuralsearch/plugin/TestUtils.java index d51b2c26c..75ac7a1eb 100644 --- a/src/test/java/org/opensearch/neuralsearch/plugin/TestUtils.java +++ b/src/test/java/org/opensearch/neuralsearch/plugin/TestUtils.java @@ -5,6 +5,8 @@ package org.opensearch.neuralsearch.plugin; +import static org.opensearch.test.OpenSearchTestCase.randomFloat; + import java.util.Map; import org.opensearch.common.bytes.BytesReference; @@ -21,4 +23,32 @@ public class TestUtils { public static Map xContentBuilderToMap(XContentBuilder xContentBuilder) { return XContentHelper.convertToMap(BytesReference.bytes(xContentBuilder), true, xContentBuilder.contentType()).v2(); } + + /** + * Utility method to convert an object to a float + * + * @param obj object to be converted to float + * @return object as float + */ + public static Float objectToFloat(Object obj) { + if (obj instanceof Number) { + return ((Number) obj).floatValue(); + } + + throw new IllegalArgumentException("Object provided must be of type Number"); + } + + /** + * Create a random vector of provided dimension + * + * @param dimension of vector to be created + * @return dimension-dimensional floating point array with random content + */ + public static float[] createRandomVector(int dimension) { + float[] vector = new float[dimension]; + for (int j = 0; j < dimension; j++) { + vector[j] = randomFloat(); + } + return vector; + } } diff --git a/src/test/java/org/opensearch/neuralsearch/plugin/query/NeuralQueryIT.java b/src/test/java/org/opensearch/neuralsearch/plugin/query/NeuralQueryIT.java new file mode 100644 index 000000000..35afa9690 --- /dev/null +++ b/src/test/java/org/opensearch/neuralsearch/plugin/query/NeuralQueryIT.java @@ -0,0 +1,361 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.neuralsearch.plugin.query; + +import static org.opensearch.neuralsearch.plugin.TestUtils.createRandomVector; +import static org.opensearch.neuralsearch.plugin.TestUtils.objectToFloat; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +import lombok.SneakyThrows; + +import org.junit.Before; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchAllQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.knn.index.SpaceType; +import org.opensearch.neuralsearch.common.BaseNeuralSearchIT; + +import com.google.common.primitives.Floats; + +public class NeuralQueryIT extends BaseNeuralSearchIT { + private static final String TEST_BASIC_INDEX_NAME = "test-neural-basic-index"; + private static final String TEST_MULTI_VECTOR_FIELD_INDEX_NAME = "test-neural-multi-vector-field-index"; + private static final String TEST_TEXT_AND_VECTOR_FIELD_INDEX_NAME = "test-neural-text-and-vector-field-index"; + private static final String TEST_NESTED_INDEX_NAME = "test-neural-nested-index"; + private static final String TEST_QUERY_TEXT = "Hello world"; + private static final String TEST_KNN_VECTOR_FIELD_NAME_1 = "test-knn-vector-1"; + private static final String TEST_KNN_VECTOR_FIELD_NAME_2 = "test-knn-vector-2"; + private static final String TEST_TEXT_FIELD_NAME_1 = "test-text-field"; + private static final String TEST_KNN_VECTOR_FIELD_NAME_NESTED = "nested.knn.field"; + + private static final int TEST_DIMENSION = 768; + private static final SpaceType TEST_SPACE_TYPE = SpaceType.L2; + private final float[] testVector = createRandomVector(TEST_DIMENSION); + private final AtomicReference modelId = new AtomicReference<>(); + + @Before + public void setUp() throws Exception { + super.setUp(); + modelId.compareAndSet(null, prepareModel()); + } + + /** + * Tests basic query: + * { + * "query": { + * "neural": { + * "text_knn": { + * "query_text": "Hello world", + * "model_id": "dcsdcasd", + * "k": 1 + * } + * } + * } + * } + */ + @SneakyThrows + public void testBasicQuery() { + initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME); + NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder( + TEST_KNN_VECTOR_FIELD_NAME_1, + TEST_QUERY_TEXT, + modelId.get(), + 1, + null + ); + Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, neuralQueryBuilder, 1); + Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); + + assertEquals("1", firstInnerHit.get("_id")); + float expectedScore = computeExpectedScore(modelId.get(), testVector, TEST_SPACE_TYPE, TEST_QUERY_TEXT); + assertEquals(expectedScore, objectToFloat(firstInnerHit.get("_score")), 0.0); + } + + /** + * Tests basic query with boost parameter: + * { + * "query": { + * "neural": { + * "text_knn": { + * "query_text": "Hello world", + * "model_id": "dcsdcasd", + * "k": 1, + * "boost": 2.0 + * } + * } + * } + * } + */ + @SneakyThrows + public void testBoostQuery() { + initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME); + NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder( + TEST_KNN_VECTOR_FIELD_NAME_1, + TEST_QUERY_TEXT, + modelId.get(), + 1, + null + ); + + final float boost = 2.0f; + neuralQueryBuilder.boost(boost); + Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, neuralQueryBuilder, 1); + Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); + + assertEquals("1", firstInnerHit.get("_id")); + float expectedScore = 2 * computeExpectedScore(modelId.get(), testVector, TEST_SPACE_TYPE, TEST_QUERY_TEXT); + assertEquals(expectedScore, objectToFloat(firstInnerHit.get("_score")), 0.0); + } + + /** + * Tests rescore query: + * { + * "query" : { + * "match_all": {} + * }, + * "rescore": { + * "query": { + * "rescore_query": { + * "neural": { + * "text_knn": { + * "query_text": "Hello world", + * "model_id": "dcsdcasd", + * "k": 1 + * } + * } + * } + * } + * } + */ + @SneakyThrows + public void testRescoreQuery() { + initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME); + MatchAllQueryBuilder matchAllQueryBuilder = new MatchAllQueryBuilder(); + NeuralQueryBuilder rescoreNeuralQueryBuilder = new NeuralQueryBuilder( + TEST_KNN_VECTOR_FIELD_NAME_1, + TEST_QUERY_TEXT, + modelId.get(), + 1, + null + ); + + Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, matchAllQueryBuilder, rescoreNeuralQueryBuilder, 1); + Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); + + assertEquals("1", firstInnerHit.get("_id")); + float expectedScore = computeExpectedScore(modelId.get(), testVector, TEST_SPACE_TYPE, TEST_QUERY_TEXT); + assertEquals(expectedScore, objectToFloat(firstInnerHit.get("_score")), 0.0); + } + + /** + * Tests bool should query with vectors: + * { + * "query": { + * "bool" : { + * "should": [ + * "neural": { + * "field_1": { + * "query_text": "Hello world", + * "model_id": "dcsdcasd", + * "k": 1 + * }, + * }, + * "neural": { + * "field_2": { + * "query_text": "Hello world", + * "model_id": "dcsdcasd", + * "k": 1 + * } + * } + * ] + * } + * } + * } + */ + @SneakyThrows + public void testBooleanQuery_withMultipleNeuralQueries() { + initializeIndexIfNotExist(TEST_MULTI_VECTOR_FIELD_INDEX_NAME); + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); + + NeuralQueryBuilder neuralQueryBuilder1 = new NeuralQueryBuilder( + TEST_KNN_VECTOR_FIELD_NAME_1, + TEST_QUERY_TEXT, + modelId.get(), + 1, + null + ); + NeuralQueryBuilder neuralQueryBuilder2 = new NeuralQueryBuilder( + TEST_KNN_VECTOR_FIELD_NAME_2, + TEST_QUERY_TEXT, + modelId.get(), + 1, + null + ); + + boolQueryBuilder.should(neuralQueryBuilder1).should(neuralQueryBuilder2); + + Map searchResponseAsMap = search(TEST_MULTI_VECTOR_FIELD_INDEX_NAME, boolQueryBuilder, 1); + Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); + + assertEquals("1", firstInnerHit.get("_id")); + float expectedScore = 2 * computeExpectedScore(modelId.get(), testVector, TEST_SPACE_TYPE, TEST_QUERY_TEXT); + assertEquals(expectedScore, objectToFloat(firstInnerHit.get("_score")), 0.0); + } + + /** + * Tests bool should with BM25 and neural query: + * { + * "query": { + * "bool" : { + * "should": [ + * "neural": { + * "field_1": { + * "query_text": "Hello world", + * "model_id": "dcsdcasd", + * "k": 1 + * }, + * }, + * "match": { + * "field_2": { + * "query": "Hello world" + * } + * } + * ] + * } + * } + * } + */ + @SneakyThrows + public void testBooleanQuery_withNeuralAndBM25Queries() { + initializeIndexIfNotExist(TEST_TEXT_AND_VECTOR_FIELD_INDEX_NAME); + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); + + NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder( + TEST_KNN_VECTOR_FIELD_NAME_1, + TEST_QUERY_TEXT, + modelId.get(), + 1, + null + ); + + MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT); + + boolQueryBuilder.should(neuralQueryBuilder).should(matchQueryBuilder); + + Map searchResponseAsMap = search(TEST_TEXT_AND_VECTOR_FIELD_INDEX_NAME, boolQueryBuilder, 1); + Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); + + assertEquals("1", firstInnerHit.get("_id")); + float minExpectedScore = computeExpectedScore(modelId.get(), testVector, TEST_SPACE_TYPE, TEST_QUERY_TEXT); + assertTrue(minExpectedScore < objectToFloat(firstInnerHit.get("_score"))); + } + + /** + * Tests nested query: + * { + * "query": { + * "nested" : { + * "query": { + * "neural": { + * "field_1": { + * "query_text": "Hello world", + * "model_id": "dcsdcasd", + * "k": 1 + * }, + * } + * } + * } + * } + * } + */ + @SneakyThrows + public void testNestedQuery() { + initializeIndexIfNotExist(TEST_NESTED_INDEX_NAME); + + NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder( + TEST_KNN_VECTOR_FIELD_NAME_NESTED, + TEST_QUERY_TEXT, + modelId.get(), + 1, + null + ); + + Map searchResponseAsMap = search(TEST_NESTED_INDEX_NAME, neuralQueryBuilder, 1); + Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); + + assertEquals("1", firstInnerHit.get("_id")); + float expectedScore = computeExpectedScore(modelId.get(), testVector, TEST_SPACE_TYPE, TEST_QUERY_TEXT); + assertEquals(expectedScore, objectToFloat(firstInnerHit.get("_score")), 0.0); + } + + private void initializeIndexIfNotExist(String indexName) throws IOException { + if (TEST_BASIC_INDEX_NAME.equals(indexName) && !indexExists(TEST_BASIC_INDEX_NAME)) { + prepareKnnIndex( + TEST_BASIC_INDEX_NAME, + Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)) + ); + addKnnDoc( + TEST_BASIC_INDEX_NAME, + "1", + Collections.singletonList(TEST_KNN_VECTOR_FIELD_NAME_1), + Collections.singletonList(Floats.asList(testVector).toArray()) + ); + assertEquals(1, getDocCount(TEST_BASIC_INDEX_NAME)); + } + + if (TEST_MULTI_VECTOR_FIELD_INDEX_NAME.equals(indexName) && !indexExists(TEST_MULTI_VECTOR_FIELD_INDEX_NAME)) { + prepareKnnIndex( + TEST_MULTI_VECTOR_FIELD_INDEX_NAME, + List.of( + new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE), + new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_2, TEST_DIMENSION, TEST_SPACE_TYPE) + ) + ); + addKnnDoc( + TEST_MULTI_VECTOR_FIELD_INDEX_NAME, + "1", + List.of(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_KNN_VECTOR_FIELD_NAME_2), + List.of(Floats.asList(testVector).toArray(), Floats.asList(testVector).toArray()) + ); + assertEquals(1, getDocCount(TEST_MULTI_VECTOR_FIELD_INDEX_NAME)); + } + + if (TEST_NESTED_INDEX_NAME.equals(indexName) && !indexExists(TEST_NESTED_INDEX_NAME)) { + prepareKnnIndex( + TEST_NESTED_INDEX_NAME, + Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_NESTED, TEST_DIMENSION, TEST_SPACE_TYPE)) + ); + addKnnDoc( + TEST_NESTED_INDEX_NAME, + "1", + Collections.singletonList(TEST_KNN_VECTOR_FIELD_NAME_NESTED), + Collections.singletonList(Floats.asList(testVector).toArray()) + ); + assertEquals(1, getDocCount(TEST_NESTED_INDEX_NAME)); + } + + if (TEST_TEXT_AND_VECTOR_FIELD_INDEX_NAME.equals(indexName) && !indexExists(TEST_TEXT_AND_VECTOR_FIELD_INDEX_NAME)) { + prepareKnnIndex( + TEST_TEXT_AND_VECTOR_FIELD_INDEX_NAME, + Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)) + ); + addKnnDoc( + TEST_TEXT_AND_VECTOR_FIELD_INDEX_NAME, + "1", + Collections.singletonList(TEST_KNN_VECTOR_FIELD_NAME_1), + Collections.singletonList(Floats.asList(testVector).toArray()), + Collections.singletonList(TEST_TEXT_FIELD_NAME_1), + Collections.singletonList(TEST_QUERY_TEXT) + ); + assertEquals(1, getDocCount(TEST_TEXT_AND_VECTOR_FIELD_INDEX_NAME)); + } + } +} diff --git a/src/test/resources/model/all-MiniLM-L6-v2.zip b/src/test/resources/model/all-MiniLM-L6-v2.zip deleted file mode 100644 index 90d46c2c1..000000000 --- a/src/test/resources/model/all-MiniLM-L6-v2.zip +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9376c2ebd7c83f99ec2526323786c348d2382e6d86576f750c89ea544d6bbb14 -size 83408741 diff --git a/src/test/resources/processor/IndexMappings.json b/src/test/resources/processor/IndexMappings.json index 11838dc9d..f015446d9 100644 --- a/src/test/resources/processor/IndexMappings.json +++ b/src/test/resources/processor/IndexMappings.json @@ -13,7 +13,7 @@ "properties": { "title_knn": { "type": "knn_vector", - "dimension": 384, + "dimension": 768, "method": { "name": "hnsw", "space_type": "l2", @@ -26,7 +26,7 @@ }, "favorites.game_knn": { "type": "knn_vector", - "dimension": 384, + "dimension": 768, "method": { "name": "hnsw", "space_type": "l2", @@ -39,7 +39,7 @@ }, "favorites.movie_knn": { "type": "knn_vector", - "dimension": 384, + "dimension": 768, "method": { "name": "hnsw", "space_type": "l2", @@ -55,7 +55,7 @@ "properties": { "knn": { "type": "knn_vector", - "dimension": 384, + "dimension": 768, "method": { "name": "hnsw", "space_type": "l2", diff --git a/src/test/resources/processor/UploadModelRequestBody.json b/src/test/resources/processor/UploadModelRequestBody.json index d56a61f3e..8f50954b6 100644 --- a/src/test/resources/processor/UploadModelRequestBody.json +++ b/src/test/resources/processor/UploadModelRequestBody.json @@ -1,13 +1,13 @@ { - "name": "all-MiniLM-L6-v2", - "version": 1, + "name": "traced_small_model", + "version": "1.0.0", "model_format": "TORCH_SCRIPT", "model_task_type": "text_embedding", "model_config": { "model_type": "bert", - "embedding_dimension": 384, + "embedding_dimension": 768, "framework_type": "sentence_transformers", "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}" }, - "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/all-MiniLM-L6-v2_torchscript_sentence-transformer.zip?raw=true" + "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true" }