From e69752c3824e73e7cd2302b112e02c5091a8f096 Mon Sep 17 00:00:00 2001
From: Yuye Zhu <yuyezhu@amazon.com>
Date: Wed, 17 Apr 2024 18:04:46 +0800
Subject: [PATCH] Test: bwc test for text chunking processor (#661)

* bwc test for text chunking processor

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* spotless apply

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* update changelog

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* spotless apply

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* add test document for restart upgrade

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* rename pipeline configuration file

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* fix pipeline create bug

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* fix pipeline create bug

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* filter tests for lower versions

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* index create in chunking bwc test

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* index create in chunking bwc test

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* index create in chunking bwc test

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* index validate in chunking bwc test

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* filter bwc test for lower version

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* bug fix in document ingestion in text chunking test

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* ensure index creation in text chunking bwc test

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* add comment

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* update index setting

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* update change log

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* update gradle comment format

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* update gradle file format

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* rename bwc test filename

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* update gradle file format

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* update gradle file to filter tests

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* merge method createPipelineProcessorWithoutModelId

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* text chunking processor it: create pipeline method rename

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* fix it failure

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* include index mapping for text chunking index setting

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

* update nitpicking

Signed-off-by: yuye-aws <yuyezhu@amazon.com>

---------

Signed-off-by: yuye-aws <yuyezhu@amazon.com>
---
 CHANGELOG.md                                  |  1 +
 qa/restart-upgrade/build.gradle               | 18 +++-
 .../AbstractRestartUpgradeRestTestCase.java   |  9 ++
 .../bwc/TextChunkingProcessorIT.java          | 75 +++++++++++++++
 .../processor/ChunkingIndexSettings.json      | 17 ++++
 ...ForTextChunkingProcessorConfiguration.json | 18 ++++
 qa/rolling-upgrade/build.gradle               | 30 +++++-
 .../bwc/AbstractRollingUpgradeTestCase.java   |  9 ++
 .../bwc/NeuralSparseSearchIT.java             |  2 +-
 .../bwc/TextChunkingProcessorIT.java          | 93 +++++++++++++++++++
 .../processor/ChunkingIndexSettings.json      | 17 ++++
 ...rSparseEncodingProcessorConfiguration.json | 20 ++--
 ...ForTextChunkingProcessorConfiguration.json | 18 ++++
 .../processor/TextChunkingProcessorIT.java    | 20 +---
 .../opensearch/neuralsearch/TestUtils.java    |  1 +
 15 files changed, 317 insertions(+), 31 deletions(-)
 create mode 100644 qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java
 create mode 100644 qa/restart-upgrade/src/test/resources/processor/ChunkingIndexSettings.json
 create mode 100644 qa/restart-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json
 create mode 100644 qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java
 create mode 100644 qa/rolling-upgrade/src/test/resources/processor/ChunkingIndexSettings.json
 create mode 100644 qa/rolling-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5021a36d1..ed09e0836 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 ## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.13...2.x)
 ### Features
 ### Enhancements
+- BWC tests for text chunking processor ([#661](https://github.com/opensearch-project/neural-search/pull/661))
 - Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
 ### Bug Fixes
 - Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
diff --git a/qa/restart-upgrade/build.gradle b/qa/restart-upgrade/build.gradle
index 1a6d0a104..8fca43f3a 100644
--- a/qa/restart-upgrade/build.gradle
+++ b/qa/restart-upgrade/build.gradle
@@ -65,7 +65,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
     systemProperty 'tests.skip_delete_model_index', 'true'
     systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version
 
-    //Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
+    // Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
     // because these features were released in 2.11 version.
     if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
         filter {
@@ -83,6 +83,13 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
         }
     }
 
+    // Excluding the text chunking processor test because we introduce this feature in 2.13
+    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'
@@ -107,7 +114,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
     systemProperty 'tests.is_old_cluster', 'false'
     systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version
 
-    //Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
+    // Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
     // because these features were released in 2.11 version.
     if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
         filter {
@@ -125,6 +132,13 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
         }
     }
 
+    // Excluding the text chunking processor test because we introduce this feature in 2.13
+    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'
diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java
index c2d2657f4..395573c6a 100644
--- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java
+++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java
@@ -4,9 +4,11 @@
  */
 package org.opensearch.neuralsearch.bwc;
 
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Locale;
+import java.util.Objects;
 import java.util.Optional;
 import org.junit.Before;
 import org.opensearch.common.settings.Settings;
@@ -99,4 +101,11 @@ protected void createPipelineForSparseEncodingProcessor(final String modelId, fi
         );
         createPipelineProcessor(requestBody, pipelineName, modelId);
     }
+
+    protected void createPipelineForTextChunkingProcessor(String pipelineName) throws Exception {
+        String requestBody = Files.readString(
+            Path.of(classLoader.getResource("processor/PipelineForTextChunkingProcessorConfiguration.json").toURI())
+        );
+        createPipelineProcessor(requestBody, pipelineName, "");
+    }
 }
diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java
new file mode 100644
index 000000000..20eb0d05c
--- /dev/null
+++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.bwc;
+
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.opensearch.index.query.MatchAllQueryBuilder;
+import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
+
+public class TextChunkingProcessorIT extends AbstractRestartUpgradeRestTestCase {
+
+    private static final String PIPELINE_NAME = "pipeline-text-chunking";
+    private static final String INPUT_FIELD = "body";
+    private static final String OUTPUT_FIELD = "body_chunk";
+    private static final String TEST_INDEX_SETTING_PATH = "processor/ChunkingIndexSettings.json";
+    private static final String TEST_INGEST_TEXT =
+        "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.";
+    List<String> expectedPassages = List.of(
+        "This is an example document to be chunked. The document ",
+        "contains a single paragraph, two sentences and 24 tokens by ",
+        "standard tokenizer in OpenSearch."
+    );
+
+    // Test rolling-upgrade text chunking processor
+    // Create Text Chunking Processor, Ingestion Pipeline and add document
+    // Validate process, pipeline and document count in restart-upgrade scenario
+    public void testTextChunkingProcessor_E2EFlow() throws Exception {
+        waitForClusterHealthGreen(NODES_BWC_CLUSTER);
+        String indexName = getIndexNameForTest();
+        if (isRunningAgainstOldCluster()) {
+            createPipelineForTextChunkingProcessor(PIPELINE_NAME);
+            createChunkingIndex(indexName);
+            addDocument(indexName, "0", INPUT_FIELD, TEST_INGEST_TEXT, null, null);
+            validateTestIndex(indexName, OUTPUT_FIELD, 1, expectedPassages);
+        } else {
+            try {
+                addDocument(indexName, "1", INPUT_FIELD, TEST_INGEST_TEXT, null, null);
+                validateTestIndex(indexName, OUTPUT_FIELD, 2, expectedPassages);
+            } finally {
+                wipeOfTestResources(indexName, PIPELINE_NAME, null, null);
+            }
+        }
+    }
+
+    private void createChunkingIndex(String indexName) throws Exception {
+        URL documentURLPath = classLoader.getResource(TEST_INDEX_SETTING_PATH);
+        Objects.requireNonNull(documentURLPath);
+        String indexSetting = Files.readString(Path.of(documentURLPath.toURI()));
+        createIndexWithConfiguration(indexName, indexSetting, PIPELINE_NAME);
+    }
+
+    private void validateTestIndex(String indexName, String fieldName, int documentCount, Object expected) {
+        int docCount = getDocCount(indexName);
+        assertEquals(documentCount, docCount);
+        MatchAllQueryBuilder query = new MatchAllQueryBuilder();
+        Map<String, Object> searchResults = search(indexName, query, 10);
+        assertNotNull(searchResults);
+        Map<String, Object> document = getFirstInnerHit(searchResults);
+        assertNotNull(document);
+        Object documentSource = document.get("_source");
+        assert (documentSource instanceof Map);
+        @SuppressWarnings("unchecked")
+        Map<String, Object> documentSourceMap = (Map<String, Object>) documentSource;
+        assert (documentSourceMap).containsKey(fieldName);
+        Object ingestOutputs = documentSourceMap.get(fieldName);
+        assertEquals(expected, ingestOutputs);
+    }
+}
diff --git a/qa/restart-upgrade/src/test/resources/processor/ChunkingIndexSettings.json b/qa/restart-upgrade/src/test/resources/processor/ChunkingIndexSettings.json
new file mode 100644
index 000000000..956ffc585
--- /dev/null
+++ b/qa/restart-upgrade/src/test/resources/processor/ChunkingIndexSettings.json
@@ -0,0 +1,17 @@
+{
+  "settings":{
+    "default_pipeline": "%s",
+    "number_of_shards": 3,
+    "number_of_replicas": 1
+  },
+  "mappings": {
+    "properties": {
+      "body": {
+        "type": "text"
+      },
+      "body_chunk": {
+        "type": "text"
+      }
+    }
+  }
+}
diff --git a/qa/restart-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json b/qa/restart-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json
new file mode 100644
index 000000000..6c727b3b4
--- /dev/null
+++ b/qa/restart-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json
@@ -0,0 +1,18 @@
+{
+  "description": "An example fixed token length chunker pipeline with standard tokenizer",
+  "processors" : [
+    {
+      "text_chunking": {
+        "field_map": {
+          "body": "body_chunk"
+        },
+        "algorithm": {
+          "fixed_token_length": {
+            "token_limit": 10,
+            "tokenizer": "standard"
+          }
+        }
+      }
+    }
+  ]
+}
diff --git a/qa/rolling-upgrade/build.gradle b/qa/rolling-upgrade/build.gradle
index 591e83d58..eedea2d2d 100644
--- a/qa/rolling-upgrade/build.gradle
+++ b/qa/rolling-upgrade/build.gradle
@@ -83,6 +83,13 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
         }
     }
 
+    // Excluding the text chunking processor test because we introduce this feature in 2.13
+    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'
@@ -126,6 +133,13 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) {
         }
     }
 
+    // Excluding the text chunking processor test because we introduce this feature in 2.13
+    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'
@@ -150,7 +164,7 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) {
     systemProperty 'tests.skip_delete_model_index', 'true'
     systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version
 
-    //Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
+    // Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
     // because these features were released in 2.11 version.
     if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
         filter {
@@ -168,6 +182,13 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) {
         }
     }
 
+    // Excluding the text chunking processor test because we introduce this feature in 2.13
+    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'
@@ -210,6 +231,13 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) {
         }
     }
 
+    // Excluding the text chunking processor test because we introduce this feature in 2.13
+    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'
diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java
index 16ed2d229..ed1613e2f 100644
--- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java
+++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java
@@ -4,9 +4,11 @@
  */
 package org.opensearch.neuralsearch.bwc;
 
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Locale;
+import java.util.Objects;
 import java.util.Optional;
 import org.junit.Before;
 import org.opensearch.common.settings.Settings;
@@ -130,4 +132,11 @@ protected void createPipelineForSparseEncodingProcessor(String modelId, String p
         );
         createPipelineProcessor(requestBody, pipelineName, modelId);
     }
+
+    protected void createPipelineForTextChunkingProcessor(String pipelineName) throws Exception {
+        String requestBody = Files.readString(
+            Path.of(classLoader.getResource("processor/PipelineForTextChunkingProcessorConfiguration.json").toURI())
+        );
+        createPipelineProcessor(requestBody, pipelineName, "");
+    }
 }
diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java
index 70513686b..d0f13c766 100644
--- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java
+++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseSearchIT.java
@@ -36,7 +36,7 @@ public class NeuralSparseSearchIT extends AbstractRollingUpgradeTestCase {
 
     // Test rolling-upgrade test sparse embedding processor
     // Create Sparse Encoding Processor, Ingestion Pipeline and add document
-    // Validate process , pipeline and document count in restart-upgrade scenario
+    // Validate process , pipeline and document count in rolling-upgrade scenario
     public void testSparseEncodingProcessor_E2EFlow() throws Exception {
         waitForClusterHealthGreen(NODES_BWC_CLUSTER);
         switch (getClusterType()) {
diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java
new file mode 100644
index 000000000..ed869c876
--- /dev/null
+++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.bwc;
+
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.opensearch.index.query.MatchAllQueryBuilder;
+import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
+
+public class TextChunkingProcessorIT extends AbstractRollingUpgradeTestCase {
+
+    private static final String PIPELINE_NAME = "pipeline-text-chunking";
+    private static final String INPUT_FIELD = "body";
+    private static final String OUTPUT_FIELD = "body_chunk";
+    private static final String TEST_INDEX_SETTING_PATH = "processor/ChunkingIndexSettings.json";
+    private static final int NUM_DOCS_PER_ROUND = 1;
+    private static final String TEST_INGEST_TEXT =
+        "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.";
+
+    List<String> expectedPassages = List.of(
+        "This is an example document to be chunked. The document ",
+        "contains a single paragraph, two sentences and 24 tokens by ",
+        "standard tokenizer in OpenSearch."
+    );
+
+    // Test rolling-upgrade text chunking processor
+    // Create Text Chunking Processor, Ingestion Pipeline and add document
+    // Validate process, pipeline and document count in rolling-upgrade scenario
+    public void testTextChunkingProcessor_E2EFlow() throws Exception {
+        waitForClusterHealthGreen(NODES_BWC_CLUSTER);
+        String indexName = getIndexNameForTest();
+        switch (getClusterType()) {
+            case OLD:
+                createPipelineForTextChunkingProcessor(PIPELINE_NAME);
+                createChunkingIndex(indexName);
+                addDocument(indexName, "0", INPUT_FIELD, TEST_INGEST_TEXT, null, null);
+                break;
+            case MIXED:
+                int totalDocsCountMixed;
+                if (isFirstMixedRound()) {
+                    totalDocsCountMixed = NUM_DOCS_PER_ROUND;
+                    validateTestIndex(indexName, OUTPUT_FIELD, totalDocsCountMixed, expectedPassages);
+                    addDocument(indexName, "1", INPUT_FIELD, TEST_INGEST_TEXT, null, null);
+                } else {
+                    totalDocsCountMixed = 2 * NUM_DOCS_PER_ROUND;
+                    validateTestIndex(indexName, OUTPUT_FIELD, totalDocsCountMixed, expectedPassages);
+                }
+                break;
+            case UPGRADED:
+                try {
+                    int totalDocsCountUpgraded = 3 * NUM_DOCS_PER_ROUND;
+                    addDocument(indexName, "2", INPUT_FIELD, TEST_INGEST_TEXT, null, null);
+                    validateTestIndex(indexName, OUTPUT_FIELD, totalDocsCountUpgraded, expectedPassages);
+                } finally {
+                    wipeOfTestResources(indexName, PIPELINE_NAME, null, null);
+                }
+                break;
+            default:
+                throw new IllegalStateException("Unexpected value: " + getClusterType());
+        }
+    }
+
+    private void createChunkingIndex(String indexName) throws Exception {
+        URL documentURLPath = classLoader.getResource(TEST_INDEX_SETTING_PATH);
+        Objects.requireNonNull(documentURLPath);
+        String indexSetting = Files.readString(Path.of(documentURLPath.toURI()));
+        createIndexWithConfiguration(indexName, indexSetting, PIPELINE_NAME);
+    }
+
+    private void validateTestIndex(String indexName, String fieldName, int documentCount, Object expected) {
+        int docCount = getDocCount(indexName);
+        assertEquals(documentCount, docCount);
+        MatchAllQueryBuilder query = new MatchAllQueryBuilder();
+        Map<String, Object> searchResults = search(indexName, query, 10);
+        assertNotNull(searchResults);
+        Map<String, Object> document = getFirstInnerHit(searchResults);
+        assertNotNull(document);
+        Object documentSource = document.get("_source");
+        assert (documentSource instanceof Map);
+        @SuppressWarnings("unchecked")
+        Map<String, Object> documentSourceMap = (Map<String, Object>) documentSource;
+        assert (documentSourceMap).containsKey(fieldName);
+        Object ingestOutputs = documentSourceMap.get(fieldName);
+        assertEquals(expected, ingestOutputs);
+    }
+}
diff --git a/qa/rolling-upgrade/src/test/resources/processor/ChunkingIndexSettings.json b/qa/rolling-upgrade/src/test/resources/processor/ChunkingIndexSettings.json
new file mode 100644
index 000000000..956ffc585
--- /dev/null
+++ b/qa/rolling-upgrade/src/test/resources/processor/ChunkingIndexSettings.json
@@ -0,0 +1,17 @@
+{
+  "settings":{
+    "default_pipeline": "%s",
+    "number_of_shards": 3,
+    "number_of_replicas": 1
+  },
+  "mappings": {
+    "properties": {
+      "body": {
+        "type": "text"
+      },
+      "body_chunk": {
+        "type": "text"
+      }
+    }
+  }
+}
diff --git a/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json b/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json
index d9a358c24..fe885a0a2 100644
--- a/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json
+++ b/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json
@@ -1,13 +1,13 @@
 {
-    "description": "An sparse encoding ingest pipeline",
-    "processors": [
-      {
-        "sparse_encoding": {
-          "model_id": "%s",
-          "field_map": {
-            "passage_text": "passage_embedding"
-          }
+  "description": "An sparse encoding ingest pipeline",
+  "processors": [
+    {
+      "sparse_encoding": {
+        "model_id": "%s",
+        "field_map": {
+          "passage_text": "passage_embedding"
         }
       }
-    ]
-  }
+    }
+  ]
+}
diff --git a/qa/rolling-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json b/qa/rolling-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json
new file mode 100644
index 000000000..6c727b3b4
--- /dev/null
+++ b/qa/rolling-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json
@@ -0,0 +1,18 @@
+{
+  "description": "An example fixed token length chunker pipeline with standard tokenizer",
+  "processors" : [
+    {
+      "text_chunking": {
+        "field_map": {
+          "body": "body_chunk"
+        },
+        "algorithm": {
+          "fixed_token_length": {
+            "token_limit": 10,
+            "tokenizer": "standard"
+          }
+        }
+      }
+    }
+  ]
+}
diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java
index dd517aa17..d85865bb5 100644
--- a/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java
+++ b/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java
@@ -24,7 +24,6 @@
 import org.opensearch.common.xcontent.XContentType;
 import org.opensearch.index.query.MatchAllQueryBuilder;
 import org.opensearch.neuralsearch.BaseNeuralSearchIT;
-import static org.opensearch.neuralsearch.TestUtils.DEFAULT_USER_AGENT;
 
 public class TextChunkingProcessorIT extends BaseNeuralSearchIT {
     private static final String INDEX_NAME = "text_chunking_test_index";
@@ -197,20 +196,7 @@ private void createPipelineProcessor(String pipelineName) throws Exception {
         URL pipelineURLPath = classLoader.getResource(PIPELINE_CONFIGS_BY_NAME.get(pipelineName));
         Objects.requireNonNull(pipelineURLPath);
         String requestBody = Files.readString(Path.of(pipelineURLPath.toURI()));
-        Response pipelineCreateResponse = makeRequest(
-            client(),
-            "PUT",
-            "/_ingest/pipeline/" + pipelineName,
-            null,
-            toHttpEntity(String.format(LOCALE, requestBody)),
-            ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, DEFAULT_USER_AGENT))
-        );
-        Map<String, Object> node = XContentHelper.convertToMap(
-            XContentType.JSON.xContent(),
-            EntityUtils.toString(pipelineCreateResponse.getEntity()),
-            false
-        );
-        assertEquals("true", node.get("acknowledged").toString());
+        createPipelineProcessor(requestBody, pipelineName, "");
     }
 
     private void createTextChunkingIndex(String indexName, String pipelineName) throws Exception {
@@ -222,13 +208,13 @@ private void createTextChunkingIndex(String indexName, String pipelineName) thro
     private void ingestDocument(String documentPath) throws Exception {
         URL documentURLPath = classLoader.getResource(documentPath);
         Objects.requireNonNull(documentURLPath);
-        String ingestDocument = Files.readString(Path.of(documentURLPath.toURI()));
+        String document = Files.readString(Path.of(documentURLPath.toURI()));
         Response response = makeRequest(
             client(),
             "POST",
             INDEX_NAME + "/_doc?refresh",
             null,
-            toHttpEntity(ingestDocument),
+            toHttpEntity(document),
             ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "Kibana"))
         );
         Map<String, Object> map = XContentHelper.convertToMap(
diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java b/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java
index a6f4a3e0f..14efca5f7 100644
--- a/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java
+++ b/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java
@@ -62,6 +62,7 @@ public class TestUtils {
     public static final String DEFAULT_COMBINATION_METHOD = "arithmetic_mean";
     public static final String PARAM_NAME_WEIGHTS = "weights";
     public static final String SPARSE_ENCODING_PROCESSOR = "sparse_encoding";
+    public static final String TEXT_CHUNKING_PROCESSOR = "text_chunking";
     public static final int MAX_TIME_OUT_INTERVAL = 3000;
     public static final int MAX_RETRY = 5;