Skip to content

Commit 9d918e6

Browse files
ishag4Isha Gupta
andauthored
Fix issue with case-insensitive + wildcard + aggregration query\n\nSigned-off-by: Isha Gupta <igupta24@apple.com> (#19489)
Signed-off-by: Isha Gupta <igupta24@apple.com> Signed-off-by: ishag4 <isha26701@gmail.com> Co-authored-by: Isha Gupta <igupta24@apple.com>
1 parent 147d6df commit 9d918e6

File tree

3 files changed

+63
-1
lines changed

3 files changed

+63
-1
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3333
### Fixed
3434
- Fix Allocation and Rebalance Constraints of WeightFunction are incorrectly reset ([#19012](https://github.com/opensearch-project/OpenSearch/pull/19012))
3535
- Fix flaky test FieldDataLoadingIT.testIndicesFieldDataCacheSizeSetting ([#19571](https://github.com/opensearch-project/OpenSearch/pull/19571))
36+
- Fix case-insensitive wildcard + aggregation query crash ([#19489](https://github.com/opensearch-project/OpenSearch/pull/19489))
3637
- Avoid primary shard failure caused by merged segment warmer exceptions ([#19436](https://github.com/opensearch-project/OpenSearch/pull/19436))
3738
- Fix pull-based ingestion out-of-bounds offset scenarios and remove persisted offsets ([#19607](https://github.com/opensearch-project/OpenSearch/pull/19607))
3839
- Fix issue with updating core with a patch number other than 0 ([#19377](https://github.com/opensearch-project/OpenSearch/pull/19377))

server/src/main/java/org/opensearch/common/lucene/search/AutomatonQueries.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.apache.lucene.util.automaton.Automata;
4040
import org.apache.lucene.util.automaton.Automaton;
4141
import org.apache.lucene.util.automaton.Operations;
42+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
4243

4344
import java.util.ArrayList;
4445
import java.util.Arrays;
@@ -92,7 +93,14 @@ public static AutomatonQuery caseInsensitiveTermQuery(Term term) {
9293
* Build an automaton matching a wildcard pattern, ASCII case insensitive, if the method is null, then will use {@link MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE}.
9394
*/
9495
public static AutomatonQuery caseInsensitiveWildcardQuery(Term wildcardquery, MultiTermQuery.RewriteMethod method) {
95-
return createAutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery), method);
96+
Automaton automaton = toCaseInsensitiveWildcardAutomaton(wildcardquery);
97+
try {
98+
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
99+
} catch (TooComplexToDeterminizeException e) {
100+
throw new RuntimeException("Wildcard query too complex to determinize for term: " + wildcardquery, e);
101+
}
102+
assert automaton.isDeterministic();
103+
return createAutomatonQuery(wildcardquery, automaton, method);
96104
}
97105

98106
/**

server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.apache.lucene.util.automaton.Automata;
4949
import org.apache.lucene.util.automaton.Automaton;
5050
import org.apache.lucene.util.automaton.Operations;
51+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
5152
import org.opensearch.OpenSearchException;
5253
import org.opensearch.common.lucene.BytesRefs;
5354
import org.opensearch.common.lucene.Lucene;
@@ -61,7 +62,9 @@
6162
import java.util.Collections;
6263
import java.util.List;
6364

65+
import static org.hamcrest.Matchers.containsString;
6466
import static org.hamcrest.Matchers.equalTo;
67+
import static org.hamcrest.Matchers.instanceOf;
6568
import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE;
6669
import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
6770

@@ -231,6 +234,56 @@ public void testIndexPrefixes() {
231234
assertThat(q, equalTo(expected));
232235
}
233236

237+
public void testCaseInsensitiveWildcardQueryDeterminization() {
238+
Term wildcardTerm = new Term("field", "test*");
239+
Query result = AutomatonQueries.caseInsensitiveWildcardQuery(wildcardTerm, null);
240+
241+
assertNotNull(result);
242+
assertTrue(((AutomatonQuery) result).getAutomaton().isDeterministic());
243+
}
244+
245+
private String createComplexPattern(int repetitions, String basePattern) {
246+
StringBuilder pattern = new StringBuilder();
247+
for (int i = 0; i < repetitions; i++) {
248+
pattern.append(basePattern);
249+
}
250+
return pattern.toString();
251+
}
252+
253+
private String createExponentialPattern(int depth) {
254+
StringBuilder pattern = new StringBuilder();
255+
for (int i = 0; i < depth; i++) {
256+
pattern.append("(");
257+
for (int j = 0; j < 5; j++) {
258+
pattern.append((char) ('a' + (i * 5 + j) % 26)).append("*");
259+
}
260+
pattern.append(")*");
261+
}
262+
return pattern.toString();
263+
}
264+
265+
public void testCaseInsensitiveWildcardQueryTooComplexToDeterminize() {
266+
String[] complexPatterns = {
267+
createComplexPattern(200, "a*b*c*d*e*f*g*h*i*j*"),
268+
createComplexPattern(150, "*[a-z]*[A-Z]*[0-9]*"),
269+
createExponentialPattern(10) };
270+
271+
for (String pattern : complexPatterns) {
272+
Term complexTerm = new Term("field", pattern);
273+
274+
try {
275+
AutomatonQuery result = AutomatonQueries.caseInsensitiveWildcardQuery(complexTerm, null);
276+
assertNotNull(result);
277+
assertTrue(result.getAutomaton().isDeterministic());
278+
} catch (RuntimeException e) {
279+
assertThat(e.getCause(), instanceOf(TooComplexToDeterminizeException.class));
280+
assertThat(e.getMessage(), containsString("Wildcard query too complex to determinize for term:"));
281+
assertThat(e.getMessage(), containsString(complexTerm.toString()));
282+
return;
283+
}
284+
}
285+
}
286+
234287
public void testFetchSourceValue() throws IOException {
235288
TextFieldType fieldType = createFieldType(true);
236289
fieldType.setIndexAnalyzer(Lucene.STANDARD_ANALYZER);

0 commit comments

Comments
 (0)