From 02002458c1d1b57dc1f3901644eb8d47474e4195 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Sat, 13 Jul 2024 11:35:40 +0800 Subject: [PATCH] Neural sparse query two-phase search processor's bwc test (#777) (#831) * Poc of pipeline Signed-off-by: conggguan * Complete some settings for two phase pipeline. Signed-off-by: conggguan * Change the implement of two-phase from QueryBuilderVistor to custom process funciton. Signed-off-by: conggguan * Add It and fix some bug on the state of multy same neuralsparsequerybuilder. Signed-off-by: conggguan * Simplify some logic, and correct some format. Signed-off-by: conggguan * Optimize some format. Signed-off-by: conggguan * Add some test case. Signed-off-by: conggguan * Optimize some logic for zhichao-aws's comments. Signed-off-by: conggguan * Optimize a line without application. Signed-off-by: conggguan * Add some comments, remove some redundant lines, fix some format. Signed-off-by: conggguan * Remove a redundant null check, fix a if format. Signed-off-by: conggguan * Fix a typo for a comment, camelcase format for some variable. Signed-off-by: conggguan * Add some comments to illustrate the influence of the modify on 2-phase search pipeline to neural sparse query builder. Signed-off-by: conggguan * Add restart and rolling upgrade bwc test for neural sparse two phase processor. Signed-off-by: conggguan * Spotless on qa. Signed-off-by: conggguan * Update change log for two-phase BWC test. Signed-off-by: conggguan * Remove redundant lines of two-phase BWC test. Signed-off-by: conggguan * Add changelog. Signed-off-by: conggguan * Add the PR link and number for the CHANGELOG.md. Signed-off-by: conggguan * [Fix] NeuralSparseTwoPhaseProcessorIT created wrong ingest pipeline, fix it to correct API. Signed-off-by: conggguan --------- Signed-off-by: conggguan Signed-off-by: conggguan <157357330+conggguan@users.noreply.github.com> (cherry picked from commit b0b51284d29532ed67fcff745fee1d45653ce776) Co-authored-by: conggguan <157357330+conggguan@users.noreply.github.com> --- CHANGELOG.md | 1 + qa/restart-upgrade/build.gradle | 14 ++++ .../bwc/NeuralSparseTwoPhaseProcessorIT.java | 65 ++++++++++++++++ ...lSparseTwoPhaseProcessorConfiguration.json | 16 ++++ qa/rolling-upgrade/build.gradle | 39 ++++++++++ .../bwc/NeuralSparseTwoPhaseProcessorIT.java | 78 +++++++++++++++++++ ...lSparseTwoPhaseProcessorConfiguration.json | 16 ++++ 7 files changed, 229 insertions(+) create mode 100644 qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java create mode 100644 qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json create mode 100644 qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java create mode 100644 qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json diff --git a/CHANGELOG.md b/CHANGELOG.md index e34d419dd..024a02311 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix for missing HybridQuery results when concurrent segment search is enabled ([#800](https://github.com/opensearch-project/neural-search/pull/800)) ### Infrastructure - Add BWC for batch ingestion ([#769](https://github.com/opensearch-project/neural-search/pull/769)) +- Add backward test cases for neural sparse two phase processor ([#777](https://github.com/opensearch-project/neural-search/pull/777)) ### Documentation ### Maintenance ### Refactoring diff --git a/qa/restart-upgrade/build.gradle b/qa/restart-upgrade/build.gradle index dc6e4504e..c5badd248 100644 --- a/qa/restart-upgrade/build.gradle +++ b/qa/restart-upgrade/build.gradle @@ -98,6 +98,13 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -155,6 +162,13 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java new file mode 100644 index 000000000..4b00a7916 --- /dev/null +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.neuralsearch.bwc; + +import org.opensearch.common.settings.Settings; +import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; +import org.opensearch.neuralsearch.util.TestUtils; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER; +import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR; + +public class NeuralSparseTwoPhaseProcessorIT extends AbstractRestartUpgradeRestTestCase { + + private static final String NEURAL_SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-dense"; + private static final String NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse"; + private static final String TEST_ENCODING_FIELD = "passage_embedding"; + private static final String TEST_TEXT_FIELD = "passage_text"; + private static final String TEXT_1 = "Hello world a b"; + + public void testNeuralSparseQueryTwoPhaseProcessor_NeuralSearch_E2EFlow() throws Exception { + waitForClusterHealthGreen(NODES_BWC_CLUSTER); + NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1); + if (isRunningAgainstOldCluster()) { + String modelId = uploadSparseEncodingModel(); + loadModel(modelId); + neuralSparseQueryBuilder.modelId(modelId); + createPipelineForSparseEncodingProcessor(modelId, NEURAL_SPARSE_INGEST_PIPELINE_NAME); + createIndexWithConfiguration( + getIndexNameForTest(), + Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())), + NEURAL_SPARSE_INGEST_PIPELINE_NAME + ); + addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1)); + createNeuralSparseTwoPhaseSearchProcessor(NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME); + updateIndexSettings( + getIndexNameForTest(), + Settings.builder().put("index.search.default_pipeline", NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME) + ); + Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"); + assertNotNull(resultWith2PhasePipeline); + } else { + String modelId = null; + try { + modelId = TestUtils.getModelId(getIngestionPipeline(NEURAL_SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR); + loadModel(modelId); + neuralSparseQueryBuilder.modelId(modelId); + Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"); + assertNotNull(resultWith2PhasePipeline); + } finally { + wipeOfTestResources( + getIndexNameForTest(), + NEURAL_SPARSE_INGEST_PIPELINE_NAME, + modelId, + NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME + ); + } + } + } +} diff --git a/qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json b/qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json new file mode 100644 index 000000000..45e435268 --- /dev/null +++ b/qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json @@ -0,0 +1,16 @@ +{ + "request_processors": [ + { + "neural_sparse_two_phase_processor": { + "tag": "neural-sparse", + "description": "This processor is making two-phase rescorer.", + "enabled": true, + "two_phase_parameter": { + "prune_ratio": %f, + "expansion_rate": %f, + "max_window_size": %d + } + } + } + ] +} diff --git a/qa/rolling-upgrade/build.gradle b/qa/rolling-upgrade/build.gradle index 1146bac19..285e65093 100644 --- a/qa/rolling-upgrade/build.gradle +++ b/qa/rolling-upgrade/build.gradle @@ -98,6 +98,16 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") + || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") + || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -156,6 +166,16 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") + || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") + || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -213,6 +233,16 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") + || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") + || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -270,6 +300,15 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) { } } + // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") + || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") + || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java new file mode 100644 index 000000000..c95ee93e0 --- /dev/null +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java @@ -0,0 +1,78 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.neuralsearch.bwc; + +import org.opensearch.common.settings.Settings; +import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; +import org.opensearch.neuralsearch.util.TestUtils; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER; +import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR; + +public class NeuralSparseTwoPhaseProcessorIT extends AbstractRollingUpgradeTestCase { + // add prefix to avoid conflicts with other IT class, since don't wipe resources after first round + private static final String SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-sparse"; + private static final String SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse"; + private static final String TEST_ENCODING_FIELD = "passage_embedding"; + private static final String TEST_TEXT_FIELD = "passage_text"; + private static final String TEXT_1 = "Hello world a b"; + private String sparseModelId = ""; + + // test of NeuralSparseTwoPhaseProcessor supports neural_sparse query's two phase speed up + // the feature is introduced from 2.15 + public void testNeuralSparseTwoPhaseProcessorIT_NeuralSparseSearch_E2EFlow() throws Exception { + waitForClusterHealthGreen(NODES_BWC_CLUSTER); + // will set the model_id after we obtain the id + NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1); + + switch (getClusterType()) { + case OLD: + sparseModelId = uploadSparseEncodingModel(); + loadModel(sparseModelId); + neuralSparseQueryBuilder.modelId(sparseModelId); + createPipelineForSparseEncodingProcessor(sparseModelId, SPARSE_INGEST_PIPELINE_NAME); + createIndexWithConfiguration( + getIndexNameForTest(), + Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())), + SPARSE_INGEST_PIPELINE_NAME + ); + addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1)); + createNeuralSparseTwoPhaseSearchProcessor(SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME); + updateIndexSettings( + getIndexNameForTest(), + Settings.builder().put("index.search.default_pipeline", SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME) + ); + assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits")); + break; + case MIXED: + sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR); + loadModel(sparseModelId); + neuralSparseQueryBuilder.modelId(sparseModelId); + assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits")); + break; + case UPGRADED: + try { + sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR); + loadModel(sparseModelId); + neuralSparseQueryBuilder.modelId(sparseModelId); + assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits")); + } finally { + wipeOfTestResources( + getIndexNameForTest(), + SPARSE_INGEST_PIPELINE_NAME, + sparseModelId, + SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME + ); + } + break; + default: + throw new IllegalStateException("Unexpected value: " + getClusterType()); + } + } +} diff --git a/qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json b/qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json new file mode 100644 index 000000000..45e435268 --- /dev/null +++ b/qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json @@ -0,0 +1,16 @@ +{ + "request_processors": [ + { + "neural_sparse_two_phase_processor": { + "tag": "neural-sparse", + "description": "This processor is making two-phase rescorer.", + "enabled": true, + "two_phase_parameter": { + "prune_ratio": %f, + "expansion_rate": %f, + "max_window_size": %d + } + } + } + ] +}