Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Use QueryCoordinatorContext for the rewrite in validate API. ([#18272](https://github.com/opensearch-project/OpenSearch/pull/18272))
- Upgrade crypto kms plugin dependencies for AWS SDK v2.x. ([#18268](https://github.com/opensearch-project/OpenSearch/pull/18268))
- Add support for `matched_fields` with the unified highlighter ([#18164](https://github.com/opensearch-project/OpenSearch/issues/18164))
- Add BooleanQuery rewrite for must_not RangeQuery clauses ([#17655](https://github.com/opensearch-project/OpenSearch/pull/17655))
- [repository-s3] Add support for SSE-KMS and S3 bucket owner verification ([#18312](https://github.com/opensearch-project/OpenSearch/pull/18312))
- Added File Cache Stats - Involves Block level as well as full file level stats ([#17538](https://github.com/opensearch-project/OpenSearch/issues/17479))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.query;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import org.opensearch.common.settings.Settings;
import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;

import java.util.Arrays;
import java.util.Collection;

import static org.opensearch.index.query.QueryBuilders.boolQuery;
import static org.opensearch.index.query.QueryBuilders.matchAllQuery;
import static org.opensearch.index.query.QueryBuilders.rangeQuery;
import static org.opensearch.index.query.QueryBuilders.termQuery;
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;

public class BooleanQueryIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase {

public BooleanQueryIT(Settings staticSettings) {
super(staticSettings);
}

@ParametersFactory
public static Collection<Object[]> parameters() {
return Arrays.asList(
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() },
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() }
);
}

public void testMustNotRangeRewrite() throws Exception {
String intField = "int_field";
String termField1 = "term_field_1";
String termField2 = "term_field_2";
// If we don't explicitly set this mapping, term_field_2 is not correctly mapped as a keyword field and queries don't work as
// expected
createIndex(
"test",
Settings.EMPTY,
"{\"properties\":{\"int_field\":{\"type\": \"integer\"},\"term_field_1\":{\"type\": \"keyword\"},\"term_field_2\":{\"type\": \"keyword\"}}}}"
);
int numDocs = 100;

for (int i = 0; i < numDocs; i++) {
String termValue1 = "odd";
String termValue2 = "A";
if (i % 2 == 0) {
termValue1 = "even";
}
if (i >= numDocs / 2) {
termValue2 = "B";
}
client().prepareIndex("test")
.setId(Integer.toString(i))
.setSource(intField, i, termField1, termValue1, termField2, termValue2)
.get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int lt = 80;
int gte = 20;
int expectedHitCount = numDocs - (lt - gte);

assertHitCount(
client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(),
expectedHitCount
);

assertHitCount(
client().prepareSearch()
.setQuery(boolQuery().must(termQuery(termField1, "even")).mustNot(rangeQuery(intField).lt(lt).gte(gte)))
.get(),
expectedHitCount / 2
);

// Test with preexisting should fields, to ensure the original default minimumShouldMatch is respected
// once we add a must clause during rewrite, even if minimumShouldMatch is not explicitly set
assertHitCount(
client().prepareSearch().setQuery(boolQuery().should(termQuery(termField1, "even")).should(termQuery(termField2, "B"))).get(),
3 * numDocs / 4
);
assertHitCount(
client().prepareSearch()
.setQuery(
boolQuery().should(termQuery(termField1, "even"))
.should(termQuery(termField2, "A"))
.mustNot(rangeQuery(intField).lt(lt).gte(gte))
)
.get(),
3 * expectedHitCount / 4
);

assertHitCount(client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocs * 2))).get(), 0);
}

public void testMustNotDateRangeRewrite() throws Exception {
int minDay = 10;
int maxDay = 31;
String dateField = "date_field";
createIndex("test");
for (int day = minDay; day <= maxDay; day++) {
client().prepareIndex("test").setSource(dateField, getDate(day, 0)).get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int minExcludedDay = 15;
int maxExcludedDay = 25;
int expectedHitCount = maxDay + 1 - minDay - (maxExcludedDay - minExcludedDay + 1);

assertHitCount(
client().prepareSearch("test")
.setQuery(boolQuery().mustNot(rangeQuery(dateField).gte(getDate(minExcludedDay, 0)).lte(getDate(maxExcludedDay, 0))))
.get(),
expectedHitCount
);
}

public void testMustNotDateRangeWithFormatAndTimeZoneRewrite() throws Exception {
// Index a document for each hour of 1/1 in UTC. Then, do a boolean query excluding 1/1 for UTC+3.
// If the time zone is respected by the rewrite, we should see 3 matching documents,
// as there will be 3 hours that are 1/1 in UTC but not UTC+3.

String dateField = "date_field";
createIndex("test");
String format = "strict_date_hour_minute_second_fraction";

for (int hour = 0; hour < 24; hour++) {
client().prepareIndex("test").setSource(dateField, getDate(1, hour)).get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int zoneOffset = 3;
assertHitCount(
client().prepareSearch("test")
.setQuery(
boolQuery().mustNot(
rangeQuery(dateField).gte(getDate(1, 0)).lte(getDate(1, 23)).timeZone("+" + zoneOffset).format(format)
)
)
.get(),
zoneOffset
);
}

public void testMustNotRangeRewriteWithMissingValues() throws Exception {
// This test passes because the rewrite is skipped when not all docs have exactly 1 value
String intField = "int_field";
String termField = "term_field";
createIndex("test");
int numDocs = 100;
int numDocsMissingIntValues = 20;

for (int i = 0; i < numDocs; i++) {
String termValue = "odd";
if (i % 2 == 0) {
termValue = "even";
}

if (i >= numDocsMissingIntValues) {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i, termField, termValue).get();
} else {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(termField, termValue).get();
}
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

// Search excluding range 30 to 50

int lt = 50;
int gte = 30;
int expectedHitCount = numDocs - (lt - gte); // Expected hit count includes documents missing this value

assertHitCount(
client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(),
expectedHitCount
);
}

public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception {
// This test passes because the rewrite is skipped when not all docs have exactly 1 value
String intField = "int_field";
createIndex("test");
int numDocs = 100;
int numDocsWithExtraValue = 20;
int extraValue = -1;

for (int i = 0; i < numDocs; i++) {
if (i < numDocsWithExtraValue) {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, new int[] { extraValue, i }).get();
} else {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i).get();
}
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

// Range queries will match if ANY of the doc's values are within the range.
// So if we exclude the range 0 to 20, we shouldn't see any of those documents returned,
// even though they also have a value -1 which is not excluded.

int expectedHitCount = numDocs - numDocsWithExtraValue;
assertHitCount(
client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocsWithExtraValue).gte(0))).get(),
expectedHitCount
);
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), numDocs);
}

private String padZeros(int value, int length) {
// String.format() not allowed
String ret = Integer.toString(value);
if (ret.length() < length) {
ret = "0".repeat(length - ret.length()) + ret;
}
return ret;
}

private String getDate(int day, int hour) {
return "2016-01-" + padZeros(day, 2) + "T" + padZeros(hour, 2) + ":00:00.000";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,13 @@

package org.opensearch.index.query;

import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.opensearch.common.lucene.search.Queries;
Expand All @@ -48,10 +52,13 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Stream;

Expand Down Expand Up @@ -393,9 +400,13 @@
return any.get();
}

changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext);

if (changed) {
newBuilder.adjustPureNegative = adjustPureNegative;
newBuilder.minimumShouldMatch = minimumShouldMatch;
if (minimumShouldMatch != null) {
newBuilder.minimumShouldMatch = minimumShouldMatch;
}
newBuilder.boost(boost());
newBuilder.queryName(queryName());
return newBuilder;
Expand Down Expand Up @@ -460,4 +471,83 @@

}

private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) {
// If there is a range query on a given field in a must_not clause, it's more performant to execute it as
// multiple should clauses representing everything outside the target range.

// First check if we can get the individual LeafContexts. If we can't, we can't proceed with the rewrite, since we can't confirm
// every doc has exactly 1 value for this field.
List<LeafReaderContext> leafReaderContexts = getLeafReaderContexts(queryRewriteContext);
if (leafReaderContexts == null || leafReaderContexts.isEmpty()) {
return false;
}

boolean changed = false;
// For now, only handle the case where there's exactly 1 range query for this field.
Map<String, Integer> fieldCounts = new HashMap<>();
Set<RangeQueryBuilder> rangeQueries = new HashSet<>();
for (QueryBuilder clause : mustNotClauses) {
if (clause instanceof RangeQueryBuilder rq) {
fieldCounts.merge(rq.fieldName(), 1, Integer::sum);
rangeQueries.add(rq);
}
}

for (RangeQueryBuilder rq : rangeQueries) {
String fieldName = rq.fieldName();
if (fieldCounts.getOrDefault(fieldName, 0) == 1) {
// Check that all docs on this field have exactly 1 value, otherwise we can't perform this rewrite
if (checkAllDocsHaveOneValue(leafReaderContexts, fieldName)) {
List<RangeQueryBuilder> complement = rq.getComplement();
if (complement != null) {
BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder();
nestedBoolQuery.minimumShouldMatch(1);
for (RangeQueryBuilder complementComponent : complement) {
nestedBoolQuery.should(complementComponent);
}
newBuilder.must(nestedBoolQuery);
newBuilder.mustNotClauses.remove(rq);
changed = true;
}
}
}
}

if (minimumShouldMatch == null && changed) {
if ((!shouldClauses.isEmpty()) && mustClauses.isEmpty() && filterClauses.isEmpty()) {
// If there were originally should clauses and no must/filter clauses, null minimumShouldMatch is set to a default of 1
// within Lucene.
// But if there was originally a must or filter clause, the default is 0.
// If we added a must clause due to this rewrite, we should respect what the original default would have been.
newBuilder.minimumShouldMatch(1);

Check warning on line 522 in server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java#L522

Added line #L522 was not covered by tests
}
}
return changed;
}

private List<LeafReaderContext> getLeafReaderContexts(QueryRewriteContext queryRewriteContext) {
if (queryRewriteContext == null) return null;
QueryShardContext shardContext = queryRewriteContext.convertToShardContext();
if (shardContext == null) return null;
IndexSearcher indexSearcher = shardContext.searcher();
if (indexSearcher == null) return null;
return indexSearcher.getIndexReader().leaves();
}

private boolean checkAllDocsHaveOneValue(List<LeafReaderContext> contexts, String fieldName) {
for (LeafReaderContext lrc : contexts) {
PointValues values;
try {
LeafReader reader = lrc.reader();
values = reader.getPointValues(fieldName);
if (values == null || !(values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size())) {
return false;
}
} catch (IOException e) {

Check warning on line 546 in server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java#L546

Added line #L546 was not covered by tests
// If we can't get PointValues to check on the number of values per doc, assume the query is ineligible
return false;

Check warning on line 548 in server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java#L548

Added line #L548 was not covered by tests
}
}
return true;
}
}
Loading
Loading