[Backport] [2.x] Bugfix to guard against stack overflow errors caused…

… by very large reg-ex input (#2817) * Bugfix to guard against stack overflow errors caused by very large reg-ex input (#2810) * Bugfix to guard against stack overflow errors caused by very large reg-ex input This change fixes a code path that did not properly impose the index-level max_regex_length limit. Therefore, it was possibly to provide ar arbitrarily large string as the include/exclude reg-ex value under search aggregations. This exposed the underlying node to crashes from a StackOverflowError, due to how the Lucene RegExp class processes strings using stack frames. Signed-off-by: Kartik Ganesh <gkart@amazon.com> * Adding integration tests for large string RegEx Signed-off-by: Kartik Ganesh <gkart@amazon.com> * Spotless Signed-off-by: Kartik Ganesh <gkart@amazon.com> * Updates to the large string reg-ex check (#2814) * Updates to the large string reg-ex check Removed the null-case for IndexSettings since this only occurs in tests. The tests now use a dummy Index Setting. This change also fixes a bug with the base case handling of max regex length in the check. Signed-off-by: Kartik Ganesh <gkart@amazon.com>
opensearch-project · Apr 8, 2022 · 21e3db9 · 21e3db9
1 parent c89e1fc
commit 21e3db9
Show file tree

Hide file tree

Showing 12 changed files with 161 additions and 73 deletions.
diff --git a/...nternalClusterTest/java/org/opensearch/search/aggregations/AggregationsIntegrationIT.java b/...nternalClusterTest/java/org/opensearch/search/aggregations/AggregationsIntegrationIT.java
@@ -32,10 +32,18 @@
 
 package org.opensearch.search.aggregations;
 
+import org.opensearch.OpenSearchException;
 import org.opensearch.action.index.IndexRequestBuilder;
+import org.opensearch.action.search.SearchPhaseExecutionException;
 import org.opensearch.action.search.SearchResponse;
 import org.opensearch.common.unit.TimeValue;
+import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
+import org.opensearch.search.aggregations.bucket.terms.RareTermsAggregationBuilder;
+import org.opensearch.search.aggregations.bucket.terms.SignificantTermsAggregationBuilder;
+import org.opensearch.search.aggregations.bucket.terms.SignificantTermsAggregatorFactory;
 import org.opensearch.search.aggregations.bucket.terms.Terms;
+import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.opensearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
 import org.opensearch.test.OpenSearchIntegTestCase;
 
 import java.util.ArrayList;
@@ -50,6 +58,11 @@ public class AggregationsIntegrationIT extends OpenSearchIntegTestCase {
 
     static int numDocs;
 
+    private static final String LARGE_STRING = "a".repeat(2000);
+    private static final String LARGE_STRING_EXCEPTION_MESSAGE = "The length of regex ["
+        + LARGE_STRING.length()
+        + "] used in the request has exceeded the allowed maximum";
+
     @Override
     public void setupSuiteScopeCluster() throws Exception {
         assertAcked(prepareCreate("index").setMapping("f", "type=keyword").get());
@@ -85,4 +98,51 @@ public void testScroll() {
         assertEquals(numDocs, total);
     }
 
+    public void testLargeRegExTermsAggregation() {
+        for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) {
+            TermsAggregationBuilder termsAggregation = terms("my_terms").field("f")
+                .includeExclude(getLargeStringInclude())
+                .executionHint(executionMode.toString());
+            runLargeStringAggregationTest(termsAggregation);
+        }
+    }
+
+    public void testLargeRegExSignificantTermsAggregation() {
+        for (SignificantTermsAggregatorFactory.ExecutionMode executionMode : SignificantTermsAggregatorFactory.ExecutionMode.values()) {
+            SignificantTermsAggregationBuilder significantTerms = new SignificantTermsAggregationBuilder("my_terms").field("f")
+                .includeExclude(getLargeStringInclude())
+                .executionHint(executionMode.toString());
+            runLargeStringAggregationTest(significantTerms);
+        }
+    }
+
+    public void testLargeRegExRareTermsAggregation() {
+        // currently this only supports "map" as an execution hint
+        RareTermsAggregationBuilder rareTerms = new RareTermsAggregationBuilder("my_terms").field("f")
+            .includeExclude(getLargeStringInclude())
+            .maxDocCount(2);
+        runLargeStringAggregationTest(rareTerms);
+    }
+
+    private IncludeExclude getLargeStringInclude() {
+        return new IncludeExclude(LARGE_STRING, null);
+    }
+
+    private void runLargeStringAggregationTest(AggregationBuilder aggregation) {
+        boolean exceptionThrown = false;
+        IncludeExclude include = new IncludeExclude(LARGE_STRING, null);
+        try {
+            client().prepareSearch("index").addAggregation(aggregation).get();
+        } catch (SearchPhaseExecutionException ex) {
+            exceptionThrown = true;
+            Throwable nestedException = ex.getCause();
+            assertNotNull(nestedException);
+            assertTrue(nestedException instanceof OpenSearchException);
+            assertNotNull(nestedException.getCause());
+            assertTrue(nestedException.getCause() instanceof IllegalArgumentException);
+            String actualExceptionMessage = nestedException.getCause().getMessage();
+            assertTrue(actualExceptionMessage.startsWith(LARGE_STRING_EXCEPTION_MESSAGE));
+        }
+        assertTrue("Exception should have been thrown", exceptionThrown);
+    }
 }
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java
@@ -55,6 +55,7 @@
 import org.opensearch.common.xcontent.ToXContentFragment;
 import org.opensearch.common.xcontent.XContentBuilder;
 import org.opensearch.common.xcontent.XContentParser;
+import org.opensearch.index.IndexSettings;
 import org.opensearch.search.DocValueFormat;
 
 import java.io.IOException;
@@ -337,19 +338,16 @@ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) thro
 
     }
 
-    private final RegExp include, exclude;
+    private final String include, exclude;
     private final SortedSet<BytesRef> includeValues, excludeValues;
     private final int incZeroBasedPartition;
     private final int incNumPartitions;
 
     /**
-     * @param include   The regular expression pattern for the terms to be included
-     * @param exclude   The regular expression pattern for the terms to be excluded
+     * @param include   The string or regular expression pattern for the terms to be included
+     * @param exclude   The string or regular expression pattern for the terms to be excluded
      */
-    public IncludeExclude(RegExp include, RegExp exclude) {
-        if (include == null && exclude == null) {
-            throw new IllegalArgumentException();
-        }
+    public IncludeExclude(String include, String exclude) {
         this.include = include;
         this.exclude = exclude;
         this.includeValues = null;
@@ -358,10 +356,6 @@ public IncludeExclude(RegExp include, RegExp exclude) {
         this.incNumPartitions = 0;
     }
 
-    public IncludeExclude(String include, String exclude) {
-        this(include == null ? null : new RegExp(include), exclude == null ? null : new RegExp(exclude));
-    }
-
     /**
      * @param includeValues   The terms to be included
      * @param excludeValues   The terms to be excluded
@@ -412,10 +406,8 @@ public IncludeExclude(StreamInput in) throws IOException {
             excludeValues = null;
             incZeroBasedPartition = 0;
             incNumPartitions = 0;
-            String includeString = in.readOptionalString();
-            include = includeString == null ? null : new RegExp(includeString);
-            String excludeString = in.readOptionalString();
-            exclude = excludeString == null ? null : new RegExp(excludeString);
+            include = in.readOptionalString();
+            exclude = in.readOptionalString();
             return;
         }
         include = null;
@@ -447,8 +439,8 @@ public void writeTo(StreamOutput out) throws IOException {
         boolean regexBased = isRegexBased();
         out.writeBoolean(regexBased);
         if (regexBased) {
-            out.writeOptionalString(include == null ? null : include.getOriginalString());
-            out.writeOptionalString(exclude == null ? null : exclude.getOriginalString());
+            out.writeOptionalString(include);
+            out.writeOptionalString(exclude);
         } else {
             boolean hasIncludes = includeValues != null;
             out.writeBoolean(hasIncludes);
@@ -584,26 +576,46 @@ public boolean isPartitionBased() {
         return incNumPartitions > 0;
     }
 
-    private Automaton toAutomaton() {
-        Automaton a = null;
+    private Automaton toAutomaton(IndexSettings indexSettings) {
+        Automaton a;
         if (include != null) {
-            a = include.toAutomaton();
+            validateRegExpStringLength(include, indexSettings);
+            a = new RegExp(include).toAutomaton();
         } else if (includeValues != null) {
             a = Automata.makeStringUnion(includeValues);
         } else {
             a = Automata.makeAnyString();
         }
         if (exclude != null) {
-            a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+            validateRegExpStringLength(exclude, indexSettings);
+            Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
+            a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
         } else if (excludeValues != null) {
             a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
         }
         return a;
     }
 
-    public StringFilter convertToStringFilter(DocValueFormat format) {
+    private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
+        int maxRegexLength = indexSettings.getMaxRegexLength();
+        if (maxRegexLength > 0 && source.length() > maxRegexLength) {
+            throw new IllegalArgumentException(
+                "The length of regex ["
+                    + source.length()
+                    + "] used in the request has exceeded "
+                    + "the allowed maximum of ["
+                    + maxRegexLength
+                    + "]. "
+                    + "This maximum can be set by changing the ["
+                    + IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey()
+                    + "] index level setting."
+            );
+        }
+    }
+
+    public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
         if (isRegexBased()) {
-            return new AutomatonBackedStringFilter(toAutomaton());
+            return new AutomatonBackedStringFilter(toAutomaton(indexSettings));
         }
         if (isPartitionBased()) {
             return new PartitionedStringFilter();
@@ -624,10 +636,10 @@ private static SortedSet<BytesRef> parseForDocValues(SortedSet<BytesRef> endUser
         return result;
     }
 
-    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
+    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, IndexSettings indexSettings) {
 
         if (isRegexBased()) {
-            return new AutomatonBackedOrdinalsFilter(toAutomaton());
+            return new AutomatonBackedOrdinalsFilter(toAutomaton(indexSettings));
         }
         if (isPartitionBased()) {
             return new PartitionedOrdinalsFilter();
@@ -684,7 +696,7 @@ public LongFilter convertToDoubleFilter() {
     @Override
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         if (include != null) {
-            builder.field(INCLUDE_FIELD.getPreferredName(), include.getOriginalString());
+            builder.field(INCLUDE_FIELD.getPreferredName(), include);
         } else if (includeValues != null) {
             builder.startArray(INCLUDE_FIELD.getPreferredName());
             for (BytesRef value : includeValues) {
@@ -698,7 +710,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
             builder.endObject();
         }
         if (exclude != null) {
-            builder.field(EXCLUDE_FIELD.getPreferredName(), exclude.getOriginalString());
+            builder.field(EXCLUDE_FIELD.getPreferredName(), exclude);
         } else if (excludeValues != null) {
             builder.startArray(EXCLUDE_FIELD.getPreferredName());
             for (BytesRef value : excludeValues) {
@@ -711,14 +723,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
 
     @Override
     public int hashCode() {
-        return Objects.hash(
-            include == null ? null : include.getOriginalString(),
-            exclude == null ? null : exclude.getOriginalString(),
-            includeValues,
-            excludeValues,
-            incZeroBasedPartition,
-            incNumPartitions
-        );
+        return Objects.hash(include, exclude, includeValues, excludeValues, incZeroBasedPartition, incNumPartitions);
     }
 
     @Override
@@ -730,14 +735,8 @@ public boolean equals(Object obj) {
             return false;
         }
         IncludeExclude other = (IncludeExclude) obj;
-        return Objects.equals(
-            include == null ? null : include.getOriginalString(),
-            other.include == null ? null : other.include.getOriginalString()
-        )
-            && Objects.equals(
-                exclude == null ? null : exclude.getOriginalString(),
-                other.exclude == null ? null : other.exclude.getOriginalString()
-            )
+        return Objects.equals(include, other.include)
+            && Objects.equals(exclude, other.exclude)
             && Objects.equals(includeValues, other.includeValues)
             && Objects.equals(excludeValues, other.excludeValues)
             && Objects.equals(incZeroBasedPartition, other.incZeroBasedPartition)

diff --git a/...main/java/org/opensearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java b/...main/java/org/opensearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java
@@ -34,6 +34,7 @@
 
 import org.opensearch.common.ParseField;
 import org.opensearch.common.logging.DeprecationLogger;
+import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.Aggregator;
@@ -250,7 +251,10 @@ Aggregator create(
                 double precision,
                 CardinalityUpperBound cardinality
             ) throws IOException {
-                final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
+                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                final IncludeExclude.StringFilter filter = includeExclude == null
+                    ? null
+                    : includeExclude.convertToStringFilter(format, indexSettings);
                 return new StringRareTermsAggregator(
                     name,
                     factories,

diff --git a/...va/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java b/...va/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java
@@ -34,6 +34,7 @@
 
 import org.opensearch.common.ParseField;
 import org.opensearch.common.logging.DeprecationLogger;
+import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryBuilder;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
@@ -325,8 +326,10 @@ Aggregator create(
                 CardinalityUpperBound cardinality,
                 Map<String, Object> metadata
             ) throws IOException {
-
-                final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
+                IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
+                final IncludeExclude.StringFilter filter = includeExclude == null
+                    ? null
+                    : includeExclude.convertToStringFilter(format, indexSettings);
                 return new MapStringTermsAggregator(
                     name,
                     factories,
@@ -364,8 +367,10 @@ Aggregator create(
                 CardinalityUpperBound cardinality,
                 Map<String, Object> metadata
             ) throws IOException {
-
-                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
+                IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
+                final IncludeExclude.OrdinalsFilter filter = includeExclude == null
+                    ? null
+                    : includeExclude.convertToOrdinalsFilter(format, indexSettings);
                 boolean remapGlobalOrd = true;
                 if (cardinality == CardinalityUpperBound.ONE && factories == AggregatorFactories.EMPTY && includeExclude == null) {
                     /*

diff --git a/...ava/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java b/...ava/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java
@@ -44,6 +44,7 @@
 import org.opensearch.common.util.BigArrays;
 import org.opensearch.common.util.BytesRefHash;
 import org.opensearch.common.util.ObjectArray;
+import org.opensearch.index.IndexSettings;
 import org.opensearch.index.mapper.MappedFieldType;
 import org.opensearch.index.query.QueryBuilder;
 import org.opensearch.index.query.QueryShardContext;
@@ -137,7 +138,10 @@ protected Aggregator createInternal(
 
         // TODO - need to check with mapping that this is indeed a text field....
 
-        IncludeExclude.StringFilter incExcFilter = includeExclude == null ? null : includeExclude.convertToStringFilter(DocValueFormat.RAW);
+        IndexSettings indexSettings = searchContext.getQueryShardContext().getIndexSettings();
+        IncludeExclude.StringFilter incExcFilter = includeExclude == null
+            ? null
+            : includeExclude.convertToStringFilter(DocValueFormat.RAW, indexSettings);
 
         MapStringTermsAggregator.CollectorSource collectorSource = new SignificantTextCollectorSource(
             queryShardContext.lookup().source(),

diff --git a/...src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/...src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
@@ -34,6 +34,7 @@
 
 import org.apache.lucene.search.IndexSearcher;
 import org.opensearch.common.ParseField;
+import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.AggregationExecutionException;
@@ -380,7 +381,10 @@ Aggregator create(
                 CardinalityUpperBound cardinality,
                 Map<String, Object> metadata
             ) throws IOException {
-                final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
+                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                final IncludeExclude.StringFilter filter = includeExclude == null
+                    ? null
+                    : includeExclude.convertToStringFilter(format, indexSettings);
                 return new MapStringTermsAggregator(
                     name,
                     factories,
@@ -458,7 +462,10 @@ Aggregator create(
                     );
 
                 }
-                final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
+                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                final IncludeExclude.OrdinalsFilter filter = includeExclude == null
+                    ? null
+                    : includeExclude.convertToOrdinalsFilter(format, indexSettings);
                 boolean remapGlobalOrds;
                 if (cardinality == CardinalityUpperBound.ONE && REMAP_GLOBAL_ORDS != null) {
                     /*

diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/RareTermsTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/RareTermsTests.java
@@ -33,7 +33,6 @@
 package org.opensearch.search.aggregations.bucket;
 
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.RegExp;
 import org.opensearch.search.aggregations.BaseAggregationTestCase;
 import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
 import org.opensearch.search.aggregations.bucket.terms.RareTermsAggregationBuilder;
@@ -59,13 +58,13 @@ protected RareTermsAggregationBuilder createTestAggregatorBuilder() {
             IncludeExclude incExc = null;
             switch (randomInt(6)) {
                 case 0:
-                    incExc = new IncludeExclude(new RegExp("foobar"), null);
+                    incExc = new IncludeExclude("foobar", null);
                     break;
                 case 1:
-                    incExc = new IncludeExclude(null, new RegExp("foobaz"));
+                    incExc = new IncludeExclude(null, "foobaz");
                     break;
                 case 2:
-                    incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz"));
+                    incExc = new IncludeExclude("foobar", "foobaz");
                     break;
                 case 3:
                     SortedSet<BytesRef> includeValues = new TreeSet<>();