Fix: text chunking processor ingestion bug on multi-node cluster (ope…

…nsearch-project#713) * fix multi node text chunking processor index bug Signed-off-by: yuye-aws <yuyezhu@amazon.com> * add change log Signed-off-by: yuye-aws <yuyezhu@amazon.com> * bug fix: no max token count setting in index Signed-off-by: yuye-aws <yuyezhu@amazon.com> * make program faster without creating index settings object Signed-off-by: yuye-aws <yuyezhu@amazon.com> * add comment Signed-off-by: yuye-aws <yuyezhu@amazon.com> * fix comment Signed-off-by: yuye-aws <yuyezhu@amazon.com> * resolve code review Signed-off-by: yuye-aws <yuyezhu@amazon.com> * simplify the code given toInt in NumberUtils Signed-off-by: yuye-aws <yuyezhu@amazon.com> * resolve code review comments Signed-off-by: yuye-aws <yuyezhu@amazon.com> --------- Signed-off-by: yuye-aws <yuyezhu@amazon.com> (cherry picked from commit 2d42408)
yuye-aws · May 1, 2024 · bd32dd2 · bd32dd2
1 parent 0abb958
commit bd32dd2
Show file tree

Hide file tree

Showing 6 changed files with 9 additions and 45 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
 ### Bug Fixes
 - Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
+- Fix multi node "no such index" error in text chunking processor ([#713](https://github.com/opensearch-project/neural-search/pull/713))
 ### Infrastructure
 ### Documentation
 ### Maintenance

diff --git a/src/main/java/org/opensearch/neuralsearch/plugin/NeuralSearch.java b/src/main/java/org/opensearch/neuralsearch/plugin/NeuralSearch.java
@@ -118,12 +118,7 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
             TextImageEmbeddingProcessor.TYPE,
             new TextImageEmbeddingProcessorFactory(clientAccessor, parameters.env, parameters.ingestService.getClusterService()),
             TextChunkingProcessor.TYPE,
-            new TextChunkingProcessorFactory(
-                parameters.env,
-                parameters.ingestService.getClusterService(),
-                parameters.indicesService,
-                parameters.analysisRegistry
-            )
+            new TextChunkingProcessorFactory(parameters.env, parameters.ingestService.getClusterService(), parameters.analysisRegistry)
         );
     }
 

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java
@@ -14,11 +14,9 @@
 
 import org.opensearch.cluster.metadata.IndexMetadata;
 import org.opensearch.env.Environment;
-import org.opensearch.index.IndexService;
 import org.opensearch.cluster.service.ClusterService;
 import org.opensearch.index.analysis.AnalysisRegistry;
 import org.opensearch.index.mapper.MapperService;
-import org.opensearch.indices.IndicesService;
 import org.opensearch.index.IndexSettings;
 import org.opensearch.ingest.AbstractProcessor;
 import org.opensearch.ingest.IngestDocument;
@@ -50,7 +48,6 @@ public final class TextChunkingProcessor extends AbstractProcessor {
     private Chunker chunker;
     private final Map<String, Object> fieldMap;
     private final ClusterService clusterService;
-    private final IndicesService indicesService;
     private final AnalysisRegistry analysisRegistry;
     private final Environment environment;
 
@@ -61,14 +58,12 @@ public TextChunkingProcessor(
         final Map<String, Object> algorithmMap,
         final Environment environment,
         final ClusterService clusterService,
-        final IndicesService indicesService,
         final AnalysisRegistry analysisRegistry
     ) {
         super(tag, description);
         this.fieldMap = fieldMap;
         this.environment = environment;
         this.clusterService = clusterService;
-        this.indicesService = indicesService;
         this.analysisRegistry = analysisRegistry;
         parseAlgorithmMap(algorithmMap);
     }
@@ -149,14 +144,14 @@ private boolean isListOfString(final Object value) {
     }
 
     private int getMaxTokenCount(final Map<String, Object> sourceAndMetadataMap) {
+        int defaultMaxTokenCount = IndexSettings.MAX_TOKEN_COUNT_SETTING.get(environment.settings());
         String indexName = sourceAndMetadataMap.get(IndexFieldMapper.NAME).toString();
         IndexMetadata indexMetadata = clusterService.state().metadata().index(indexName);
         if (Objects.isNull(indexMetadata)) {
-            return IndexSettings.MAX_TOKEN_COUNT_SETTING.get(environment.settings());
+            return defaultMaxTokenCount;
         }
         // if the index is specified in the metadata, read maxTokenCount from the index setting
-        IndexService indexService = indicesService.indexServiceSafe(indexMetadata.getIndex());
-        return indexService.getIndexSettings().getMaxTokenCount();
+        return IndexSettings.MAX_TOKEN_COUNT_SETTING.get(indexMetadata.getSettings());
     }
 
     /**

diff --git a/...main/java/org/opensearch/neuralsearch/processor/factory/TextChunkingProcessorFactory.java b/...main/java/org/opensearch/neuralsearch/processor/factory/TextChunkingProcessorFactory.java
@@ -9,7 +9,6 @@
 import org.opensearch.cluster.service.ClusterService;
 import org.opensearch.env.Environment;
 import org.opensearch.index.analysis.AnalysisRegistry;
-import org.opensearch.indices.IndicesService;
 import org.opensearch.ingest.Processor;
 import org.opensearch.neuralsearch.processor.TextChunkingProcessor;
 import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.TYPE;
@@ -29,19 +28,11 @@ public class TextChunkingProcessorFactory implements Processor.Factory {
 
     private final ClusterService clusterService;
 
-    private final IndicesService indicesService;
-
     private final AnalysisRegistry analysisRegistry;
 
-    public TextChunkingProcessorFactory(
-        Environment environment,
-        ClusterService clusterService,
-        IndicesService indicesService,
-        AnalysisRegistry analysisRegistry
-    ) {
+    public TextChunkingProcessorFactory(Environment environment, ClusterService clusterService, AnalysisRegistry analysisRegistry) {
         this.environment = environment;
         this.clusterService = clusterService;
-        this.indicesService = indicesService;
         this.analysisRegistry = analysisRegistry;
     }
 
@@ -54,15 +45,6 @@ public TextChunkingProcessor create(
     ) throws Exception {
         Map<String, Object> fieldMap = readMap(TYPE, processorTag, config, FIELD_MAP_FIELD);
         Map<String, Object> algorithmMap = readMap(TYPE, processorTag, config, ALGORITHM_FIELD);
-        return new TextChunkingProcessor(
-            processorTag,
-            description,
-            fieldMap,
-            algorithmMap,
-            environment,
-            clusterService,
-            indicesService,
-            analysisRegistry
-        );
+        return new TextChunkingProcessor(processorTag, description, fieldMap, algorithmMap, environment, clusterService, analysisRegistry);
     }
 }
diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorTests.java
@@ -31,7 +31,6 @@
 import org.opensearch.index.analysis.AnalysisRegistry;
 import org.opensearch.index.analysis.TokenizerFactory;
 import org.opensearch.index.mapper.IndexFieldMapper;
-import org.opensearch.indices.IndicesService;
 import org.opensearch.indices.analysis.AnalysisModule;
 import org.opensearch.ingest.IngestDocument;
 import org.opensearch.ingest.Processor;
@@ -84,11 +83,10 @@ public void setup() {
         when(environment.settings()).thenReturn(settings);
         ClusterState clusterState = mock(ClusterState.class);
         ClusterService clusterService = mock(ClusterService.class);
-        IndicesService indicesService = mock(IndicesService.class);
         when(metadata.index(anyString())).thenReturn(null);
         when(clusterState.metadata()).thenReturn(metadata);
         when(clusterService.state()).thenReturn(clusterState);
-        textChunkingProcessorFactory = new TextChunkingProcessorFactory(environment, clusterService, indicesService, getAnalysisRegistry());
+        textChunkingProcessorFactory = new TextChunkingProcessorFactory(environment, clusterService, getAnalysisRegistry());
     }
 
     private Map<String, Object> createFixedTokenLengthParameters() {

diff --git a/...java/org/opensearch/neuralsearch/processor/factory/TextChunkingProcessorFactoryTests.java b/...java/org/opensearch/neuralsearch/processor/factory/TextChunkingProcessorFactoryTests.java
@@ -19,7 +19,6 @@
 import org.opensearch.env.TestEnvironment;
 import org.opensearch.index.analysis.AnalysisRegistry;
 import org.opensearch.index.analysis.TokenizerFactory;
-import org.opensearch.indices.IndicesService;
 import org.opensearch.indices.analysis.AnalysisModule;
 import org.opensearch.ingest.Processor;
 import org.opensearch.neuralsearch.processor.TextChunkingProcessor;
@@ -62,13 +61,7 @@ public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokeniz
     public void setup() {
         Environment environment = mock(Environment.class);
         ClusterService clusterService = mock(ClusterService.class);
-        IndicesService indicesService = mock(IndicesService.class);
-        this.textChunkingProcessorFactory = new TextChunkingProcessorFactory(
-            environment,
-            clusterService,
-            indicesService,
-            getAnalysisRegistry()
-        );
+        this.textChunkingProcessorFactory = new TextChunkingProcessorFactory(environment, clusterService, getAnalysisRegistry());
     }
 
     @SneakyThrows