Skip to content

deprecating jarowinkler in favor of jaro_winkler #27526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ String sort() {
* based on Damerau-Levenshtein algorithm.
* <li><code>levenshtein</code> - String distance algorithm based on
* Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on
* <li><code>jaro_winkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character
* n-grams.
Expand Down Expand Up @@ -474,9 +474,10 @@ static StringDistance resolveDistance(String distanceVal) {
return new LevensteinDistance();
} else if ("levenshtein".equals(distanceVal)) {
return new LevensteinDistance();
// TODO Jaro and Winkler are 2 people - so apply same naming logic
// as damerau_levenshtein
} else if ("jarowinkler".equals(distanceVal)) {
DEPRECATION_LOGGER.deprecated("Deprecated distance [jarowinkler] used, replaced by [jaro_winkler]");
return new JaroWinklerDistance();
} else if ("jaro_winkler".equals(distanceVal)) {
return new JaroWinklerDistance();
} else if ("ngram".equals(distanceVal)) {
return new NGramDistance();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ public SortBy sort() {
* Damerau-Levenshtein algorithm.
* <li><code>levenshtein</code> - String distance algorithm based on
* Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on
* <li><code>jaro_winkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character
* n-grams.
Expand Down Expand Up @@ -556,7 +556,7 @@ public StringDistance toLucene() {
}
},
/** String distance algorithm based on Jaro-Winkler algorithm. */
JAROWINKLER {
JARO_WINKLER {
@Override
public StringDistance toLucene() {
return new JaroWinklerDistance();
Expand Down Expand Up @@ -596,7 +596,10 @@ public static StringDistanceImpl resolve(final String str) {
case "ngram":
return NGRAM;
case "jarowinkler":
return JAROWINKLER;
DEPRECATION_LOGGER.deprecated("Deprecated distance [jarowinkler] used, replaced by [jaro_winkler]");
return JARO_WINKLER;
case "jaro_winkler":
return JARO_WINKLER;
default: throw new IllegalArgumentException("Illegal distance option " + str);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void testFromString() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("internal"), equalTo(DirectSpellChecker.INTERNAL_LEVENSHTEIN));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("damerau_levenshtein"), instanceOf(LuceneLevenshteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenshtein"), instanceOf(LevensteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaro_winkler"), instanceOf(JaroWinklerDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("ngram"), instanceOf(NGramDistance.class));

expectThrows(IllegalArgumentException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance("doesnt_exist"));
Expand All @@ -88,6 +88,11 @@ public void testLevensteinDeprecation() {
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}

public void testJaroWinklerDeprecation() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class));
assertWarnings("Deprecated distance [jarowinkler] used, replaced by [jaro_winkler]");
}

private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBuilder original) throws IOException {
DirectCandidateGeneratorBuilder mutation = copy(original);
List<Supplier<DirectCandidateGeneratorBuilder>> mutators = new ArrayList<>();
Expand Down Expand Up @@ -212,7 +217,8 @@ public static DirectCandidateGeneratorBuilder randomCandidateGenerator() {
maybeSet(generator::postFilter, randomAlphaOfLengthBetween(1, 20));
maybeSet(generator::size, randomIntBetween(1, 20));
maybeSet(generator::sort, randomFrom("score", "frequency"));
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenshtein", "jarowinkler", "ngram"));
maybeSet(generator::stringDistance,
randomFrom("internal", "damerau_levenshtein", "levenshtein", "jaro_winkler", "ngram"));
maybeSet(generator::suggestMode, randomFrom("missing", "popular", "always"));
return generator;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public void testValidOrdinals() {
assertThat(StringDistanceImpl.INTERNAL.ordinal(), equalTo(0));
assertThat(StringDistanceImpl.DAMERAU_LEVENSHTEIN.ordinal(), equalTo(1));
assertThat(StringDistanceImpl.LEVENSHTEIN.ordinal(), equalTo(2));
assertThat(StringDistanceImpl.JAROWINKLER.ordinal(), equalTo(3));
assertThat(StringDistanceImpl.JARO_WINKLER.ordinal(), equalTo(3));
assertThat(StringDistanceImpl.NGRAM.ordinal(), equalTo(4));
}

Expand All @@ -48,7 +48,7 @@ public void testFromString() {
assertThat(StringDistanceImpl.resolve("internal"), equalTo(StringDistanceImpl.INTERNAL));
assertThat(StringDistanceImpl.resolve("damerau_levenshtein"), equalTo(StringDistanceImpl.DAMERAU_LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("levenshtein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER));
assertThat(StringDistanceImpl.resolve("jaro_winkler"), equalTo(StringDistanceImpl.JARO_WINKLER));
assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM));

final String doesntExist = "doesnt_exist";
Expand All @@ -63,12 +63,17 @@ public void testLevensteinDeprecation() {
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}

public void testJaroWinklerDeprecation() {
assertThat(StringDistanceImpl.resolve("jaroWinkler"), equalTo(StringDistanceImpl.JARO_WINKLER));
assertWarnings("Deprecated distance [jarowinkler] used, replaced by [jaro_winkler]");
}

@Override
public void testWriteTo() throws IOException {
assertWriteToStream(StringDistanceImpl.INTERNAL, 0);
assertWriteToStream(StringDistanceImpl.DAMERAU_LEVENSHTEIN, 1);
assertWriteToStream(StringDistanceImpl.LEVENSHTEIN, 2);
assertWriteToStream(StringDistanceImpl.JAROWINKLER, 3);
assertWriteToStream(StringDistanceImpl.JARO_WINKLER, 3);
assertWriteToStream(StringDistanceImpl.NGRAM, 4);
}

Expand All @@ -77,7 +82,7 @@ public void testReadFrom() throws IOException {
assertReadFromStream(0, StringDistanceImpl.INTERNAL);
assertReadFromStream(1, StringDistanceImpl.DAMERAU_LEVENSHTEIN);
assertReadFromStream(2, StringDistanceImpl.LEVENSHTEIN);
assertReadFromStream(3, StringDistanceImpl.JAROWINKLER);
assertReadFromStream(3, StringDistanceImpl.JARO_WINKLER);
assertReadFromStream(4, StringDistanceImpl.NGRAM);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ private static StringDistanceImpl randomStringDistance() {
case 0: return StringDistanceImpl.INTERNAL;
case 1: return StringDistanceImpl.DAMERAU_LEVENSHTEIN;
case 2: return StringDistanceImpl.LEVENSHTEIN;
case 3: return StringDistanceImpl.JAROWINKLER;
case 3: return StringDistanceImpl.JARO_WINKLER;
case 4: return StringDistanceImpl.NGRAM;
default: throw new IllegalArgumentException("No string distance algorithm with an ordinal of " + randomVal);
}
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/search/suggesters/term-suggest.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,5 @@ doesn't take the query into account that is part of request.
Damerau-Levenshtein algorithm.
`levenshtein` - String distance algorithm based on Levenshtein edit distance
algorithm.
`jarowinkler` - String distance algorithm based on Jaro-Winkler algorithm.
`jaro_winkler` - String distance algorithm based on Jaro-Winkler algorithm.
`ngram` - String distance algorithm based on character n-grams.