Fix Graph Filter Error in Search (#5665)

* fix graph filter out of bound error Signed-off-by: Mingshi Liu <mingshl@amazon.com> * add changelog Signed-off-by: Mingshi Liu <mingshl@amazon.com> * run gradle spotlessApply Signed-off-by: Mingshi Liu <mingshl@amazon.com> * reproduce error in unit test Signed-off-by: Mingshi Liu <mingshl@amazon.com> * format to pass spotlessApply Signed-off-by: Mingshi Liu <mingshl@amazon.com> * organize package Signed-off-by: Mingshi Liu <mingshl@amazon.com> Signed-off-by: Mingshi Liu <mingshl@amazon.com> (cherry picked from commit 6a7a9a1) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
opensearch-project · dblock · Jan 10, 2023 · Jan 9, 2023 · Jan 9, 2023 · af861aa7f2a9a2510fb7e1ec05135c9945f1ccef
commit af861aa7f2a9a2510fb7e1ec05135c9945f1ccef
@@ -55,7 +55,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Update thresholds map when cluster manager throttling setting is removed ([#5524](https://github.com/opensearch-project/OpenSearch/pull/5524))
 - Fix backward compatibility for static cluster manager throttling threshold setting ([#5633](https://github.com/opensearch-project/OpenSearch/pull/5633))
 - Fix index exclusion behavior in snapshot restore and clone APIs ([#5626](https://github.com/opensearch-project/OpenSearch/pull/5626))
-
+- Fix graph filter error in search ([#5665](https://github.com/opensearch-project/OpenSearch/pull/5665))
 ### Security
 
 [Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.4...2.x
@@ -70,7 +70,6 @@
 import org.opensearch.index.mapper.TextFieldMapper;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.index.query.support.QueryParsers;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -753,6 +752,14 @@ private Query analyzeGraphBoolean(String field, TokenStream source, BooleanClaus
                 lastState = end;
                 final Query queryPos;
                 boolean usePrefix = isPrefix && end == -1;
+                /**
+                 * check if the GraphTokenStreamFiniteStrings graph is empty
+                 * return empty BooleanQuery result
+                 */
+                Iterator<TokenStream> graphIt = graph.getFiniteStrings();
+                if (!graphIt.hasNext()) {
+                    return builder.build();
+                }
                 if (graph.hasSidePath(start)) {
                     final Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
                     Iterator<Query> queries = new Iterator<Query>() {

@@ -33,6 +33,7 @@
 package org.opensearch.index.query;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.tests.analysis.CannedBinaryTokenStream;
 import org.apache.lucene.tests.analysis.MockSynonymAnalyzer;
 import org.apache.lucene.index.Term;
@@ -52,6 +53,7 @@
 import org.apache.lucene.search.SynonymQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
 import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest;
 import org.opensearch.common.ParsingException;
 import org.opensearch.common.Strings;
@@ -72,6 +74,7 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Iterator;
 
 import static org.hamcrest.CoreMatchers.either;
 import static org.hamcrest.CoreMatchers.instanceOf;
@@ -326,6 +329,20 @@ public void testFuzzinessOnNonStringField() throws Exception {
         query.toQuery(context); // no exception
     }
 
+    public void testMatchQueryWithNoSidePath() throws Exception {
+        QueryShardContext testContext = createShardContext();
+        final MatchQuery testMatchQuery = new MatchQuery(testContext);
+        MockGraphAnalyzer mockAnalyzer = new MockGraphAnalyzer(createGiantGraphWithNoSide());
+        testMatchQuery.setAnalyzer(mockAnalyzer);
+        testMatchQuery.setAutoGenerateSynonymsPhraseQuery(true);
+        GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(mockAnalyzer.getTokenStream());
+        Iterator<TokenStream> graphIt = graph.getFiniteStrings();
+        assertEquals(graphIt.hasNext(), false);
+        String testField = "Gas Lift Storage Bed Frame with Arched Bed Head in King";
+        String testValue = "head board, bed head, bedhead, headboard";
+        testMatchQuery.parse(Type.BOOLEAN, testField, testValue); // no exception
+    }
+
     public void testDefaultFuzziness() {
         MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", TEXT_FIELD_NAME).fuzziness(null);
         assertNull(matchQueryBuilder.fuzziness());
@@ -542,12 +559,18 @@ private static class MockGraphAnalyzer extends Analyzer {
 
         MockGraphAnalyzer(CannedBinaryTokenStream.BinaryToken[] tokens) {
             this.tokenStream = new CannedBinaryTokenStream(tokens);
+
         }
 
         @Override
         protected TokenStreamComponents createComponents(String fieldName) {
             return new TokenStreamComponents(r -> {}, tokenStream);
         }
+
+        public CannedBinaryTokenStream getTokenStream() {
+            return this.tokenStream;
+        }
+
     }
 
     /**
@@ -570,6 +593,14 @@ private static CannedBinaryTokenStream.BinaryToken[] createGiantGraph(int numPos
         return tokens.toArray(new CannedBinaryTokenStream.BinaryToken[0]);
     }
 
+    /**
+     * Creates a graph token stream with no side path.
+     **/
+    private static CannedBinaryTokenStream.BinaryToken[] createGiantGraphWithNoSide() {
+        List<CannedBinaryTokenStream.BinaryToken> tokens = new ArrayList<>();
+        return tokens.toArray(new CannedBinaryTokenStream.BinaryToken[0]);
+    }
+
     /**
      * Creates a graph token stream with {@link BooleanQuery#getMaxClauseCount()}
      * expansions at the last position.