|
48 | 48 | import org.apache.lucene.util.automaton.Automata; |
49 | 49 | import org.apache.lucene.util.automaton.Automaton; |
50 | 50 | import org.apache.lucene.util.automaton.Operations; |
| 51 | +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; |
51 | 52 | import org.opensearch.OpenSearchException; |
52 | 53 | import org.opensearch.common.lucene.BytesRefs; |
53 | 54 | import org.opensearch.common.lucene.Lucene; |
|
61 | 62 | import java.util.Collections; |
62 | 63 | import java.util.List; |
63 | 64 |
|
| 65 | +import static org.hamcrest.Matchers.containsString; |
64 | 66 | import static org.hamcrest.Matchers.equalTo; |
| 67 | +import static org.hamcrest.Matchers.instanceOf; |
65 | 68 | import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE; |
66 | 69 | import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE; |
67 | 70 |
|
@@ -231,6 +234,56 @@ public void testIndexPrefixes() { |
231 | 234 | assertThat(q, equalTo(expected)); |
232 | 235 | } |
233 | 236 |
|
| 237 | + public void testCaseInsensitiveWildcardQueryDeterminization() { |
| 238 | + Term wildcardTerm = new Term("field", "test*"); |
| 239 | + Query result = AutomatonQueries.caseInsensitiveWildcardQuery(wildcardTerm, null); |
| 240 | + |
| 241 | + assertNotNull(result); |
| 242 | + assertTrue(((AutomatonQuery) result).getAutomaton().isDeterministic()); |
| 243 | + } |
| 244 | + |
| 245 | + private String createComplexPattern(int repetitions, String basePattern) { |
| 246 | + StringBuilder pattern = new StringBuilder(); |
| 247 | + for (int i = 0; i < repetitions; i++) { |
| 248 | + pattern.append(basePattern); |
| 249 | + } |
| 250 | + return pattern.toString(); |
| 251 | + } |
| 252 | + |
| 253 | + private String createExponentialPattern(int depth) { |
| 254 | + StringBuilder pattern = new StringBuilder(); |
| 255 | + for (int i = 0; i < depth; i++) { |
| 256 | + pattern.append("("); |
| 257 | + for (int j = 0; j < 5; j++) { |
| 258 | + pattern.append((char) ('a' + (i * 5 + j) % 26)).append("*"); |
| 259 | + } |
| 260 | + pattern.append(")*"); |
| 261 | + } |
| 262 | + return pattern.toString(); |
| 263 | + } |
| 264 | + |
| 265 | + public void testCaseInsensitiveWildcardQueryTooComplexToDeterminize() { |
| 266 | + String[] complexPatterns = { |
| 267 | + createComplexPattern(200, "a*b*c*d*e*f*g*h*i*j*"), |
| 268 | + createComplexPattern(150, "*[a-z]*[A-Z]*[0-9]*"), |
| 269 | + createExponentialPattern(10) }; |
| 270 | + |
| 271 | + for (String pattern : complexPatterns) { |
| 272 | + Term complexTerm = new Term("field", pattern); |
| 273 | + |
| 274 | + try { |
| 275 | + AutomatonQuery result = AutomatonQueries.caseInsensitiveWildcardQuery(complexTerm, null); |
| 276 | + assertNotNull(result); |
| 277 | + assertTrue(result.getAutomaton().isDeterministic()); |
| 278 | + } catch (RuntimeException e) { |
| 279 | + assertThat(e.getCause(), instanceOf(TooComplexToDeterminizeException.class)); |
| 280 | + assertThat(e.getMessage(), containsString("Wildcard query too complex to determinize for term:")); |
| 281 | + assertThat(e.getMessage(), containsString(complexTerm.toString())); |
| 282 | + return; |
| 283 | + } |
| 284 | + } |
| 285 | + } |
| 286 | + |
234 | 287 | public void testFetchSourceValue() throws IOException { |
235 | 288 | TextFieldType fieldType = createFieldType(true); |
236 | 289 | fieldType.setIndexAnalyzer(Lucene.STANDARD_ANALYZER); |
|
0 commit comments