Skip to content

Commit 816e37f

Browse files
committed
Fail span_multi queries that exceeds Boolean.maxclause term expansion
By default span_multi query will limit term expansions = boolean max clause. This will limit high heap usage in case of high cardinality term expansions.This applies only if top_terms_N is not used in inner multi query
1 parent 6fd4eb5 commit 816e37f

File tree

4 files changed

+134
-14
lines changed

4 files changed

+134
-14
lines changed

docs/reference/query-dsl/span-multi-term-query.asciidoc

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,9 @@ GET /_search
3737
--------------------------------------------------
3838
// CONSOLE
3939

40-
WARNING: By default `span_multi queries are rewritten to a `span_or` query
41-
containing **all** the expanded terms. This can be expensive if the number of expanded
42-
terms is large. To avoid an unbounded expansion you can set the
43-
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
44-
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
45-
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
46-
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
40+
WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the
41+
boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <<query-dsl-multi-term-rewrite,
42+
rewrite method>> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only,
43+
you can activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
44+
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
45+

server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,25 @@
1919
package org.elasticsearch.index.query;
2020

2121
import org.apache.lucene.index.Term;
22+
import org.apache.lucene.index.IndexReader;
23+
import org.apache.lucene.index.TermContext;
24+
import org.apache.lucene.search.BooleanQuery;
2225
import org.apache.lucene.search.BoostQuery;
2326
import org.apache.lucene.search.ConstantScoreQuery;
2427
import org.apache.lucene.search.MultiTermQuery;
2528
import org.apache.lucene.search.PrefixQuery;
2629
import org.apache.lucene.search.Query;
2730
import org.apache.lucene.search.TermQuery;
2831
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
32+
import org.apache.lucene.search.ScoringRewrite;
33+
import org.apache.lucene.search.TopTermsRewrite;
2934
import org.apache.lucene.search.spans.SpanBoostQuery;
3035
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
36+
import org.apache.lucene.search.spans.SpanOrQuery;
3137
import org.apache.lucene.search.spans.SpanQuery;
3238
import org.apache.lucene.search.spans.SpanTermQuery;
3339
import org.elasticsearch.Version;
40+
import org.elasticsearch.ElasticsearchException;
3441
import org.elasticsearch.common.ParseField;
3542
import org.elasticsearch.common.ParsingException;
3643
import org.elasticsearch.common.io.stream.StreamInput;
@@ -42,19 +49,19 @@
4249
import org.elasticsearch.index.query.support.QueryParsers;
4350

4451
import java.io.IOException;
52+
import java.util.ArrayList;
53+
import java.util.List;
4554
import java.util.Objects;
4655

4756
/**
4857
* Query that allows wrapping a {@link MultiTermQueryBuilder} (one of wildcard, fuzzy, prefix, term, range or regexp query)
4958
* as a {@link SpanQueryBuilder} so it can be nested.
5059
*/
5160
public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTermQueryBuilder>
52-
implements SpanQueryBuilder {
61+
implements SpanQueryBuilder {
5362

5463
public static final String NAME = "span_multi";
55-
5664
private static final ParseField MATCH_FIELD = new ParseField("match");
57-
5865
private final MultiTermQueryBuilder multiTermQueryBuilder;
5966

6067
public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
@@ -83,7 +90,7 @@ public MultiTermQueryBuilder innerQuery() {
8390

8491
@Override
8592
protected void doXContent(XContentBuilder builder, Params params)
86-
throws IOException {
93+
throws IOException {
8794
builder.startObject(NAME);
8895
builder.field(MATCH_FIELD.getPreferredName());
8996
multiTermQueryBuilder.toXContent(builder, params);
@@ -105,7 +112,7 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro
105112
QueryBuilder query = parseInnerQueryBuilder(parser);
106113
if (query instanceof MultiTermQueryBuilder == false) {
107114
throw new ParsingException(parser.getTokenLocation(),
108-
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
115+
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
109116
}
110117
subQuery = (MultiTermQueryBuilder) query;
111118
} else {
@@ -124,12 +131,55 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro
124131

125132
if (subQuery == null) {
126133
throw new ParsingException(parser.getTokenLocation(),
127-
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
134+
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
128135
}
129136

130137
return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
131138
}
132139

140+
public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {
141+
142+
private MultiTermQuery multiTermQuery;
143+
private final long maxExpansions;
144+
145+
TopTermSpanBooleanQueryRewriteWithMaxClause(long max) {
146+
maxExpansions = max;
147+
}
148+
149+
@Override
150+
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
151+
multiTermQuery = query;
152+
return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery);
153+
}
154+
155+
final ScoringRewrite<List<SpanQuery>> delegate = new ScoringRewrite<List<SpanQuery>>() {
156+
157+
@Override
158+
protected List<SpanQuery> getTopLevelBuilder() {
159+
return new ArrayList();
160+
}
161+
162+
@Override
163+
protected Query build(List<SpanQuery> builder) {
164+
return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()]));
165+
}
166+
167+
@Override
168+
protected void checkMaxClauseCount(int count) {
169+
if (count > maxExpansions) {
170+
throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " +
171+
"exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
172+
}
173+
}
174+
175+
@Override
176+
protected void addClause(List<SpanQuery> topLevel, Term term, int docCount, float boost, TermContext states) {
177+
SpanTermQuery q = new SpanTermQuery(term, states);
178+
topLevel.add(q);
179+
}
180+
};
181+
}
182+
133183
@Override
134184
protected Query doToQuery(QueryShardContext context) throws IOException {
135185
Query subQuery = multiTermQueryBuilder.toQuery(context);
@@ -190,10 +240,15 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
190240
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
191241
}
192242
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
243+
if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) {
244+
((SpanMultiTermQueryWrapper<MultiTermQuery>) spanQuery).setRewriteMethod(new
245+
TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount()));
246+
}
193247
}
194248
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
195249
return new SpanBoostQuery(spanQuery, boost);
196250
}
251+
197252
return spanQuery;
198253
}
199254

server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import org.elasticsearch.common.Strings;
3535
import org.elasticsearch.common.compress.CompressedXContent;
3636
import org.elasticsearch.common.io.stream.StreamOutput;
37-
import org.elasticsearch.common.settings.Settings;
3837
import org.elasticsearch.common.xcontent.XContentBuilder;
3938
import org.elasticsearch.index.mapper.MapperService;
4039
import org.elasticsearch.search.internal.SearchContext;
@@ -238,4 +237,38 @@ public void testFromJson() throws IOException {
238237
assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
239238
assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
240239
}
240+
241+
public void testDefaultMaxRewriteBuilder() throws Exception {
242+
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")).
243+
toQuery(createShardContext());
244+
245+
if (query instanceof SpanBoostQuery) {
246+
query = ((SpanBoostQuery)query).getQuery();
247+
}
248+
249+
assertTrue(query instanceof SpanMultiTermQueryWrapper);
250+
if (query instanceof SpanMultiTermQueryWrapper) {
251+
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
252+
assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
253+
}
254+
255+
}
256+
257+
public void testTopNMultiTermsRewriteInsideSpan() throws Exception {
258+
259+
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite
260+
("top_terms_boost_2000")).
261+
toQuery(createShardContext());
262+
263+
if (query instanceof SpanBoostQuery) {
264+
query = ((SpanBoostQuery)query).getQuery();
265+
}
266+
267+
assertTrue(query instanceof SpanMultiTermQueryWrapper);
268+
if (query instanceof SpanMultiTermQueryWrapper) {
269+
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
270+
assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
271+
}
272+
273+
}
241274
}

server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919

2020
package org.elasticsearch.search.query;
2121

22+
import org.apache.lucene.search.BooleanQuery;
2223
import org.apache.lucene.util.English;
24+
import org.elasticsearch.ElasticsearchException;
2325
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
2426
import org.elasticsearch.action.index.IndexRequestBuilder;
2527
import org.elasticsearch.action.search.SearchPhaseExecutionException;
@@ -33,8 +35,12 @@
3335
import org.elasticsearch.index.query.MatchQueryBuilder;
3436
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
3537
import org.elasticsearch.index.query.Operator;
38+
import org.elasticsearch.index.query.QueryBuilder;
3639
import org.elasticsearch.index.query.QueryBuilders;
3740
import org.elasticsearch.index.query.RangeQueryBuilder;
41+
import org.elasticsearch.index.query.SpanMultiTermQueryBuilder;
42+
import org.elasticsearch.index.query.SpanNearQueryBuilder;
43+
import org.elasticsearch.index.query.SpanTermQueryBuilder;
3844
import org.elasticsearch.index.query.TermQueryBuilder;
3945
import org.elasticsearch.index.query.WrapperQueryBuilder;
4046
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
@@ -52,6 +58,7 @@
5258
import org.joda.time.format.ISODateTimeFormat;
5359

5460
import java.io.IOException;
61+
import java.util.ArrayList;
5562
import java.util.Collection;
5663
import java.util.Collections;
5764
import java.util.Random;
@@ -1819,4 +1826,30 @@ public void testRangeQueryRangeFields_24744() throws Exception {
18191826
assertHitCount(searchResponse, 1);
18201827
}
18211828

1829+
public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException {
1830+
Settings.Builder builder = Settings.builder();
1831+
builder.put(SETTING_NUMBER_OF_SHARDS, 1).build();
1832+
1833+
createIndex("test", builder.build());
1834+
ArrayList<IndexRequestBuilder> reqs = new ArrayList<>();
1835+
int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
1836+
try {
1837+
BooleanQuery.setMaxClauseCount(2);
1838+
for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) {
1839+
reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" +
1840+
Integer.toString(i) + " bar baz"));
1841+
}
1842+
indexRandom(true, false, reqs);
1843+
1844+
QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery
1845+
("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar"));
1846+
1847+
expectThrows(ElasticsearchException.class, () ->
1848+
client().prepareSearch().setIndices("test").setQuery(queryBuilder).get());
1849+
} finally {
1850+
BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount);
1851+
}
1852+
1853+
}
1854+
18221855
}

0 commit comments

Comments
 (0)