Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor fuzziness interface on query builders #5433

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix 'org.apache.hc.core5.http.ParseException: Invalid protocol version' under JDK 16+ ([#4827](https://github.com/opensearch-project/OpenSearch/pull/4827))
- Fixed compression support for h2c protocol ([#4944](https://github.com/opensearch-project/OpenSearch/pull/4944))
- Reject bulk requests with invalid actions ([#5299](https://github.com/opensearch-project/OpenSearch/issues/5299))
- Refactor fuzziness interface on query builders ([#5433](https://github.com/opensearch-project/OpenSearch/pull/5433))
noCharger marked this conversation as resolved.
Show resolved Hide resolved

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.opensearch.action.index.IndexRequestBuilder;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.common.util.set.Sets;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentFactory;
Expand Down Expand Up @@ -1024,7 +1025,7 @@ public void testFuzzyFieldLevelBoosting() throws InterruptedException, Execution

SearchResponse searchResponse = client().prepareSearch(idx)
.setExplain(true)
.setQuery(multiMatchQuery("foo").field("title", 100).field("body").fuzziness(0))
.setQuery(multiMatchQuery("foo").field("title", 100).field("body").fuzziness(Fuzziness.ZERO))
.get();
SearchHit[] hits = searchResponse.getHits().getHits();
assertNotEquals("both documents should be on different shards", hits[0].getShard().getShardId(), hits[1].getShard().getShardId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.common.regex.Regex;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.common.xcontent.XContentType;
Expand Down Expand Up @@ -762,21 +763,21 @@ public void testMatchQueryFuzzy() throws Exception {
client().prepareIndex("test").setId("2").setSource("text", "Unity")
);

SearchResponse searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness("0")).get();
SearchResponse searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness(Fuzziness.ZERO)).get();
assertHitCount(searchResponse, 0L);

searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness("1")).get();
searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness(Fuzziness.ONE)).get();
assertHitCount(searchResponse, 2L);
assertSearchHits(searchResponse, "1", "2");

searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness("AUTO")).get();
searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness(Fuzziness.AUTO)).get();
assertHitCount(searchResponse, 2L);
assertSearchHits(searchResponse, "1", "2");

searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness("AUTO:5,7")).get();
searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness(Fuzziness.customAuto(5, 7))).get();
assertHitCount(searchResponse, 0L);

searchResponse = client().prepareSearch().setQuery(matchQuery("text", "unify").fuzziness("AUTO:5,7")).get();
searchResponse = client().prepareSearch().setQuery(matchQuery("text", "unify").fuzziness(Fuzziness.customAuto(5, 7))).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "2");
}
Expand Down
10 changes: 10 additions & 0 deletions server/src/main/java/org/opensearch/common/unit/Fuzziness.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,16 @@ public static Fuzziness build(Object fuzziness) {
return new Fuzziness(string);
}

/***
* Creates a {@link Fuzziness} instance from lowDistance and highDistance.
* where the edit distance is 0 for strings shorter than lowDistance,
* 1 for strings where its length between lowDistance and highDistance (inclusive),
* and 2 for strings longer than highDistance.
*/
public static Fuzziness customAuto(int lowDistance, int highDistance) {
noCharger marked this conversation as resolved.
Show resolved Hide resolved
return new Fuzziness("AUTO", lowDistance, highDistance);
}

private static Fuzziness parseCustomAuto(final String string) {
assert string.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":");
String[] fuzzinessLimit = string.substring(AUTO.asString().length() + 1).split(",");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,19 @@ public String minimumShouldMatch() {
return this.minimumShouldMatch;
}

@Deprecated
/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
public MatchBoolPrefixQueryBuilder fuzziness(Object fuzziness) {
this.fuzziness = Fuzziness.build(fuzziness);
return this;
}

/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
public MatchBoolPrefixQueryBuilder fuzziness(Fuzziness fuzziness) {
this.fuzziness = fuzziness;
return this;
}

/** Gets the fuzziness used when evaluated to a fuzzy query type. */
public Fuzziness fuzziness() {
return this.fuzziness;
Expand Down Expand Up @@ -348,19 +355,16 @@ public static MatchBoolPrefixQueryBuilder fromXContent(XContentParser parser) th
}
}

MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(fieldName, value);
queryBuilder.analyzer(analyzer);
queryBuilder.operator(operator);
queryBuilder.minimumShouldMatch(minimumShouldMatch);
queryBuilder.boost(boost);
queryBuilder.queryName(queryName);
if (fuzziness != null) {
queryBuilder.fuzziness(fuzziness);
}
queryBuilder.prefixLength(prefixLength);
queryBuilder.maxExpansions(maxExpansion);
queryBuilder.fuzzyTranspositions(fuzzyTranspositions);
queryBuilder.fuzzyRewrite(fuzzyRewrite);
MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(fieldName, value).analyzer(analyzer)
.operator(operator)
.minimumShouldMatch(minimumShouldMatch)
.boost(boost)
.queryName(queryName)
.fuzziness(fuzziness)
noCharger marked this conversation as resolved.
Show resolved Hide resolved
.prefixLength(prefixLength)
.maxExpansions(maxExpansion)
.fuzzyTranspositions(fuzzyTranspositions)
.fuzzyRewrite(fuzzyRewrite);
return queryBuilder;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,19 @@ public String analyzer() {
return this.analyzer;
}

@Deprecated
/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
public MatchQueryBuilder fuzziness(Object fuzziness) {
this.fuzziness = Fuzziness.build(fuzziness);
return this;
}

/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
public MatchQueryBuilder fuzziness(Fuzziness fuzziness) {
this.fuzziness = fuzziness;
return this;
}

/** Gets the fuzziness used when evaluated to a fuzzy query type. */
public Fuzziness fuzziness() {
return this.fuzziness;
Expand Down Expand Up @@ -565,9 +572,7 @@ public static MatchQueryBuilder fromXContent(XContentParser parser) throws IOExc
matchQuery.operator(operator);
matchQuery.analyzer(analyzer);
matchQuery.minimumShouldMatch(minimumShouldMatch);
if (fuzziness != null) {
matchQuery.fuzziness(fuzziness);
}
matchQuery.fuzziness(fuzziness);
matchQuery.fuzzyRewrite(fuzzyRewrite);
matchQuery.prefixLength(prefixLength);
matchQuery.fuzzyTranspositions(fuzzyTranspositions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ public int slop() {
return slop;
}

@Deprecated
/**
* Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO".
*/
Expand All @@ -407,6 +408,14 @@ public MultiMatchQueryBuilder fuzziness(Object fuzziness) {
return this;
}

/**
* Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO".
*/
public MultiMatchQueryBuilder fuzziness(Fuzziness fuzziness) {
this.fuzziness = fuzziness;
return this;
}

public Fuzziness fuzziness() {
return fuzziness;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
public static final int DEFAULT_FUZZY_PREFIX_LENGTH = FuzzyQuery.defaultPrefixLength;
public static final int DEFAULT_FUZZY_MAX_EXPANSIONS = FuzzyQuery.defaultMaxExpansions;
public static final int DEFAULT_PHRASE_SLOP = 0;
/** Default maximum edit distance. Defaults to AUTO. */
public static final Fuzziness DEFAULT_FUZZINESS = Fuzziness.AUTO;
public static final Operator DEFAULT_OPERATOR = Operator.OR;
public static final MultiMatchQueryBuilder.Type DEFAULT_TYPE = MultiMatchQueryBuilder.Type.BEST_FIELDS;
Expand Down Expand Up @@ -416,7 +417,7 @@ public boolean enablePositionIncrements() {
* Set the edit distance for fuzzy queries. Default is "AUTO".
*/
public QueryStringQueryBuilder fuzziness(Fuzziness fuzziness) {
this.fuzziness = fuzziness == null ? DEFAULT_FUZZINESS : fuzziness;
this.fuzziness = fuzziness;
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ public void testSerializationCustomAuto() throws IOException {
assertNotSame(original, deserializedFuzziness);
assertEquals(original, deserializedFuzziness);
assertEquals(original.asString(), deserializedFuzziness.asString());

original = Fuzziness.customAuto(4, 7);
deserializedFuzziness = doSerializeRoundtrip(original);
assertNotSame(original, deserializedFuzziness);
assertEquals(original, deserializedFuzziness);
assertEquals(original.asString(), deserializedFuzziness.asString());
}

private static Fuzziness doSerializeRoundtrip(Fuzziness in) throws IOException {
Expand Down Expand Up @@ -205,5 +211,11 @@ public void testAsDistanceString() {
assertEquals(1, fuzziness.asDistance("abcdef"));
assertEquals(2, fuzziness.asDistance("abcdefg"));

fuzziness = Fuzziness.customAuto(5, 7);
assertEquals(0, fuzziness.asDistance(""));
assertEquals(0, fuzziness.asDistance("abcd"));
assertEquals(1, fuzziness.asDistance("abcde"));
assertEquals(1, fuzziness.asDistance("abcdef"));
assertEquals(2, fuzziness.asDistance("abcdefg"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ public void testIllegalValues() {
}
}

public void testDefaultFuzziness() {
MatchBoolPrefixQueryBuilder matchBoolPrefixQueryBuilder = new MatchBoolPrefixQueryBuilder(TEXT_FIELD_NAME, "text").fuzziness(null);
assertNull(matchBoolPrefixQueryBuilder.fuzziness());
}

public void testFromSimpleJson() throws IOException {
final String simple = "{" + "\"match_bool_prefix\": {" + "\"fieldName\": \"fieldValue\"" + "}" + "}";
final String expected = "{"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,11 @@ public void testFuzzinessOnNonStringField() throws Exception {
query.toQuery(context); // no exception
}

public void testDefaultFuzziness() {
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", TEXT_FIELD_NAME).fuzziness(null);
assertNull(matchQueryBuilder.fuzziness());
}

public void testExactOnUnsupportedField() throws Exception {
MatchQueryBuilder query = new MatchQueryBuilder(GEO_POINT_FIELD_NAME, "2,3");
QueryShardContext context = createShardContext();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,11 @@ public void testFuzzinessNotAllowedTypes() throws IOException {
}
}

public void testDefaultFuzziness() {
MultiMatchQueryBuilder multiMatchQueryBuilder = new MultiMatchQueryBuilder("text", TEXT_FIELD_NAME).fuzziness(null);
assertNull(multiMatchQueryBuilder.fuzziness());
}

public void testQueryParameterArrayException() {
String json = "{\n"
+ " \"multi_match\" : {\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ public void testToQueryWildcardWithIndexedPrefixes() throws Exception {
assertThat(query, equalTo(expectedQuery));
}

public void testToQueryWilcardQueryWithSynonyms() throws Exception {
public void testToQueryWildcardQueryWithSynonyms() throws Exception {
for (Operator op : Operator.values()) {
BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();
QueryStringQueryParser queryParser = new QueryStringQueryParser(createShardContext(), TEXT_FIELD_NAME);
Expand Down Expand Up @@ -803,7 +803,7 @@ public void testToQueryRegExpQueryMaxDeterminizedStatesParsing() throws Exceptio
assertThat(e.getMessage(), containsString("would require more than 10 effort"));
}

public void testToQueryFuzzyQueryAutoFuziness() throws Exception {
public void testToQueryFuzzyQueryAutoFuzziness() throws Exception {
for (int i = 0; i < 3; i++) {
final int len;
final int expectedEdits;
Expand All @@ -828,7 +828,6 @@ public void testToQueryFuzzyQueryAutoFuziness() throws Exception {
String queryString = new String(bytes);
for (int j = 0; j < 2; j++) {
Query query = queryStringQuery(queryString + (j == 0 ? "~" : "~auto")).defaultField(TEXT_FIELD_NAME)
.fuzziness(Fuzziness.AUTO)
.toQuery(createShardContext());
assertThat(query, instanceOf(FuzzyQuery.class));
FuzzyQuery fuzzyQuery = (FuzzyQuery) query;
Expand Down Expand Up @@ -868,6 +867,11 @@ public void testFuzzyNumeric() throws Exception {
query.toQuery(context); // no exception
}

public void testDefaultFuzziness() {
QueryStringQueryBuilder queryStringQueryBuilder = new QueryStringQueryBuilder(TEXT_FIELD_NAME).fuzziness(null);
assertNull(queryStringQueryBuilder.fuzziness());
}

public void testPrefixNumeric() throws Exception {
QueryStringQueryBuilder query = queryStringQuery("12*").defaultField(INT_FIELD_NAME);
QueryShardContext context = createShardContext();
Expand Down