Skip to content

Commit

Permalink
[Bug] fix case sensitivity for wildcard queries (#5462) (#5493)
Browse files Browse the repository at this point in the history
Fixes the wildcard query to not normalize the pattern when case_insensitive is
set by the user. This is achieved by creating a new normalizedWildcardQuery
method so that query_string queries (which do not support case sensitivity) can
still normalize the pattern when the default analyzer is used; maintaining
existing behavior.

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
(cherry picked from commit ce25dec)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
1 parent fd59008 commit 2a1e734
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Removed
### Fixed
- Fix 1.x compatibility bug with stored Tasks ([#5412](https://github.com/opensearch-project/OpenSearch/pull/5412))
- Fix case sensitivity for wildcard queries ([#5462](https://github.com/opensearch-project/OpenSearch/pull/5462))
### Security

[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.4...2.x
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,39 @@ public void testKeywordWithWhitespace() throws Exception {
assertHitCount(resp, 3L);
}

public void testRegexCaseInsensitivity() throws Exception {
createIndex("messages");
List<IndexRequestBuilder> indexRequests = new ArrayList<>();
indexRequests.add(client().prepareIndex("messages").setId("1").setSource("message", "message: this is a TLS handshake"));
indexRequests.add(client().prepareIndex("messages").setId("2").setSource("message", "message: this is a tcp handshake"));
indexRandom(true, false, indexRequests);

SearchResponse response = client().prepareSearch("messages").setQuery(queryStringQuery("/TLS/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "1");

response = client().prepareSearch("messages").setQuery(queryStringQuery("/tls/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "1");

response = client().prepareSearch("messages").setQuery(queryStringQuery("/TCP/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "2");

response = client().prepareSearch("messages").setQuery(queryStringQuery("/tcp/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "2");

response = client().prepareSearch("messages").setQuery(queryStringQuery("/HANDSHAKE/").defaultField("message")).get();
assertNoFailures(response);
assertHitCount(response, 2);
assertHits(response.getHits(), "1", "2");
}

public void testAllFields() throws Exception {
String indexBody = copyToStringFromClasspath("/org/opensearch/search/query/all-query-index.json");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,15 @@
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;

import static java.util.Collections.singletonMap;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
Expand Down Expand Up @@ -2089,8 +2092,14 @@ public void testWildcardQueryNormalizationOnTextField() {
refresh();

{
// test default case insensitivity: false
WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*");
SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 0L);

// test case insensitivity set to true
wildCardQuery = wildcardQuery("field1", "Bb*").caseInsensitive(true);
searchResponse = client().prepareSearch().setQuery(wildCardQuery).get();
assertHitCount(searchResponse, 1L);

wildCardQuery = wildcardQuery("field1", "bb*");
Expand All @@ -2099,6 +2108,24 @@ public void testWildcardQueryNormalizationOnTextField() {
}
}

/** tests wildcard case sensitivity */
public void testWildcardCaseSensitivity() {
assertAcked(prepareCreate("test").setMapping("field", "type=text"));
client().prepareIndex("test").setId("1").setSource("field", "lowercase text").get();
refresh();

// test case sensitive
SearchResponse response = client().prepareSearch("test").setQuery(wildcardQuery("field", "Text").caseInsensitive(false)).get();
assertNoFailures(response);
assertHitCount(response, 0);

// test case insensitive
response = client().prepareSearch("test").setQuery(wildcardQuery("field", "Text").caseInsensitive(true)).get();
assertNoFailures(response);
assertHitCount(response, 1);
assertHits(response.getHits(), "1");
}

/**
* Reserved characters should be excluded when the normalization is applied for keyword fields.
* See https://github.com/elastic/elasticsearch/issues/46300 for details.
Expand Down Expand Up @@ -2175,4 +2202,16 @@ public void testIssueFuzzyInsideSpanMulti() {
SearchResponse response = client().prepareSearch("test").setQuery(query).get();
assertHitCount(response, 1);
}

/**
* asserts the search response hits include the expected ids
*/
private void assertHits(SearchHits hits, String... ids) {
assertThat(hits.getTotalHits().value, equalTo((long) ids.length));
Set<String> hitIds = new HashSet<>();
for (SearchHit hit : hits.getHits()) {
hitIds.add(hit.getId());
}
assertThat(hitIds, containsInAnyOrder(ids));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.Nullable;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
Expand Down Expand Up @@ -368,6 +371,18 @@ protected BytesRef indexedValueForSearch(Object value) {
}
return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString());
}

@Override
public Query wildcardQuery(
String value,
@Nullable MultiTermQuery.RewriteMethod method,
boolean caseInsensitve,
QueryShardContext context
) {
// keyword field types are always normalized, so ignore case sensitivity and force normalize the wildcard
// query text
return super.wildcardQuery(value, method, caseInsensitve, true, context);
}
}

private final boolean indexed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ public Query prefixQuery(
) {
throw new QueryShardException(
context,
"Can only use prefix queries on keyword, text and wildcard fields - not on [" + name + "] which is of type [" + typeName() + "]"
"Can only use prefix queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

Expand All @@ -290,6 +290,7 @@ public final Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteM
return wildcardQuery(value, method, false, context);
}

/** optionally normalize the wildcard pattern based on the value of {@code caseInsensitive} */
public Query wildcardQuery(
String value,
@Nullable MultiTermQuery.RewriteMethod method,
Expand All @@ -298,11 +299,15 @@ public Query wildcardQuery(
) {
throw new QueryShardException(
context,
"Can only use wildcard queries on keyword, text and wildcard fields - not on ["
+ name
+ "] which is of type ["
+ typeName()
+ "]"
"Can only use wildcard queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

/** always normalizes the wildcard pattern to lowercase */
public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) {
throw new QueryShardException(
context,
"Can only use wildcard queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,34 @@ public static final String normalizeWildcardPattern(String fieldname, String val
return sb.toBytesRef().utf8ToString();
}

/** optionally normalize the wildcard pattern based on the value of {@code caseInsensitive} */
@Override
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) {
return wildcardQuery(value, method, caseInsensitive, false, context);
}

/** always normalizes the wildcard pattern to lowercase */
@Override
public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
return wildcardQuery(value, method, false, true, context);
}

/**
* return a wildcard query
*
* @param value the pattern
* @param method rewrite method
* @param caseInsensitive should ignore case; note, only used if there is no analyzer, else we use the analyzer rules
* @param normalizeIfAnalyzed force normalize casing if an analyzer is used
* @param context the query shard context
*/
public Query wildcardQuery(
String value,
MultiTermQuery.RewriteMethod method,
boolean caseInsensitive,
boolean normalizeIfAnalyzed,
QueryShardContext context
) {
failIfNotIndexed();
if (context.allowExpensiveQueries() == false) {
throw new OpenSearchException(
Expand All @@ -162,7 +188,7 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo
}

Term term;
if (getTextSearchInfo().getSearchAnalyzer() != null) {
if (getTextSearchInfo().getSearchAnalyzer() != null && normalizeIfAnalyzed) {
value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
term = new Term(name(), value);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) {
throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
}
return currentFieldType.wildcardQuery(termStr, getMultiTermRewriteMethod(), context);
// query string query is always normalized
return currentFieldType.normalizedWildcardQuery(termStr, getMultiTermRewriteMethod(), context);
} catch (RuntimeException e) {
if (lenient) {
return newLenientFieldQuery(field, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public void testNumeric() throws Exception {
QueryShardContext context = createShardContext();
QueryShardException e = expectThrows(QueryShardException.class, () -> query.toQuery(context));
assertEquals(
"Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]",
"Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
e.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ public void testPrefixNumeric() throws Exception {
QueryShardContext context = createShardContext();
QueryShardException e = expectThrows(QueryShardException.class, () -> query.toQuery(context));
assertEquals(
"Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]",
"Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
e.getMessage()
);
query.lenient(true);
Expand Down

0 comments on commit 2a1e734

Please sign in to comment.