Skip to content

Deprecating levenstein in favor of levensHtein #27409

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
Expand All @@ -45,6 +47,9 @@

public final class DirectCandidateGeneratorBuilder implements CandidateGenerator {

private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(
Loggers.getLogger(DirectCandidateGeneratorBuilder.class));

private static final String TYPE = "direct_generator";

public static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE);
Expand Down Expand Up @@ -211,8 +216,8 @@ String sort() {
* string distance for terms inside the index.
* <li><code>damerau_levenshtein</code> - String distance algorithm
* based on Damerau-Levenshtein algorithm.
* <li><code>levenstein</code> - String distance algorithm based on
* Levenstein edit distance algorithm.
* <li><code>levenshtein</code> - String distance algorithm based on
* Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character
Expand Down Expand Up @@ -458,13 +463,16 @@ private static SuggestMode resolveSuggestMode(String suggestMode) {
}
}

private static StringDistance resolveDistance(String distanceVal) {
static StringDistance resolveDistance(String distanceVal) {
distanceVal = distanceVal.toLowerCase(Locale.US);
if ("internal".equals(distanceVal)) {
return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
} else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
return new LuceneLevenshteinDistance();
} else if ("levenstein".equals(distanceVal)) {
DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]");
return new LevensteinDistance();
} else if ("levenshtein".equals(distanceVal)) {
return new LevensteinDistance();
// TODO Jaro and Winkler are 2 people - so apply same naming logic
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems a shame to leave this TODO here. Would you care to fix that too?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No scope creep please. 😄

// as damerau_levenshtein
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.QueryShardContext;
Expand Down Expand Up @@ -66,6 +68,9 @@
* global options, but are only applicable for this suggestion.
*/
public class TermSuggestionBuilder extends SuggestionBuilder<TermSuggestionBuilder> {

private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermSuggestionBuilder.class));

private static final String SUGGESTION_NAME = "term";

private SuggestMode suggestMode = SuggestMode.MISSING;
Expand Down Expand Up @@ -214,8 +219,8 @@ public SortBy sort() {
* string distance for terms inside the index.
* <li><code>damerau_levenshtein</code> - String distance algorithm based on
* Damerau-Levenshtein algorithm.
* <li><code>levenstein</code> - String distance algorithm based on
* Levenstein edit distance algorithm.
* <li><code>levenshtein</code> - String distance algorithm based on
* Levenshtein edit distance algorithm.
* <li><code>jarowinkler</code> - String distance algorithm based on
* Jaro-Winkler algorithm.
* <li><code>ngram</code> - String distance algorithm based on character
Expand Down Expand Up @@ -543,8 +548,8 @@ public StringDistance toLucene() {
return new LuceneLevenshteinDistance();
}
},
/** String distance algorithm based on Levenstein edit distance algorithm. */
LEVENSTEIN {
/** String distance algorithm based on Levenshtein edit distance algorithm. */
LEVENSHTEIN {
@Override
public StringDistance toLucene() {
return new LevensteinDistance();
Expand Down Expand Up @@ -584,7 +589,10 @@ public static StringDistanceImpl resolve(final String str) {
case "damerauLevenshtein":
return DAMERAU_LEVENSHTEIN;
case "levenstein":
return LEVENSTEIN;
DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]");
return LEVENSHTEIN;
case "levenshtein":
return LEVENSHTEIN;
case "ngram":
return NGRAM;
case "jarowinkler":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@

package org.elasticsearch.search.suggest.phrase;

import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
import org.apache.lucene.search.spell.NGramDistance;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.xcontent.ToXContent;
Expand All @@ -38,6 +43,8 @@
import java.util.function.Supplier;

import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.core.IsInstanceOf.instanceOf;

public class DirectCandidateGeneratorTests extends ESTestCase {
private static final int NUMBER_OF_RUNS = 20;
Expand Down Expand Up @@ -65,6 +72,22 @@ public void testEqualsAndHashcode() throws IOException {
}
}

public void testFromString() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("internal"), equalTo(DirectSpellChecker.INTERNAL_LEVENSHTEIN));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("damerau_levenshtein"), instanceOf(LuceneLevenshteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenshtein"), instanceOf(LevensteinDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class));
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("ngram"), instanceOf(NGramDistance.class));

expectThrows(IllegalArgumentException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance("doesnt_exist"));
expectThrows(NullPointerException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance(null));
}

public void testLevensteinDeprecation() {
assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenstein"), instanceOf(LevensteinDistance.class));
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}

private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBuilder original) throws IOException {
DirectCandidateGeneratorBuilder mutation = copy(original);
List<Supplier<DirectCandidateGeneratorBuilder>> mutators = new ArrayList<>();
Expand All @@ -89,7 +112,7 @@ private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBu
mutators.add(() -> mutation.preFilter(original.preFilter() == null ? "preFilter" : original.preFilter() + "_other"));
mutators.add(() -> mutation.sort(original.sort() == null ? "score" : original.sort() + "_other"));
mutators.add(
() -> mutation.stringDistance(original.stringDistance() == null ? "levenstein" : original.stringDistance() + "_other"));
() -> mutation.stringDistance(original.stringDistance() == null ? "levenshtein" : original.stringDistance() + "_other"));
mutators.add(() -> mutation.suggestMode(original.suggestMode() == null ? "missing" : original.suggestMode() + "_other"));
return randomFrom(mutators).get();
}
Expand Down Expand Up @@ -189,7 +212,7 @@ public static DirectCandidateGeneratorBuilder randomCandidateGenerator() {
maybeSet(generator::postFilter, randomAlphaOfLengthBetween(1, 20));
maybeSet(generator::size, randomIntBetween(1, 20));
maybeSet(generator::sort, randomFrom("score", "frequency"));
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenstein", "jarowinkler", "ngram"));
maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenshtein", "jarowinkler", "ngram"));
maybeSet(generator::suggestMode, randomFrom("missing", "popular", "always"));
return generator;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
package org.elasticsearch.search.suggest.term;

import org.elasticsearch.common.io.stream.AbstractWriteableEnumTestCase;
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl;

import java.io.IOException;

import static org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl;
import static org.hamcrest.Matchers.equalTo;

/**
Expand All @@ -38,7 +38,7 @@ public StringDistanceImplTests() {
public void testValidOrdinals() {
assertThat(StringDistanceImpl.INTERNAL.ordinal(), equalTo(0));
assertThat(StringDistanceImpl.DAMERAU_LEVENSHTEIN.ordinal(), equalTo(1));
assertThat(StringDistanceImpl.LEVENSTEIN.ordinal(), equalTo(2));
assertThat(StringDistanceImpl.LEVENSHTEIN.ordinal(), equalTo(2));
assertThat(StringDistanceImpl.JAROWINKLER.ordinal(), equalTo(3));
assertThat(StringDistanceImpl.NGRAM.ordinal(), equalTo(4));
}
Expand All @@ -47,28 +47,27 @@ public void testValidOrdinals() {
public void testFromString() {
assertThat(StringDistanceImpl.resolve("internal"), equalTo(StringDistanceImpl.INTERNAL));
assertThat(StringDistanceImpl.resolve("damerau_levenshtein"), equalTo(StringDistanceImpl.DAMERAU_LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSTEIN));
assertThat(StringDistanceImpl.resolve("levenshtein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER));
assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM));

final String doesntExist = "doesnt_exist";
try {
StringDistanceImpl.resolve(doesntExist);
fail("StringDistanceImpl should not have an element " + doesntExist);
} catch (IllegalArgumentException e) {
}
try {
StringDistanceImpl.resolve(null);
fail("StringDistanceImpl.resolve on a null value should throw an exception.");
} catch (NullPointerException e) {
assertThat(e.getMessage(), equalTo("Input string is null"));
}
expectThrows(IllegalArgumentException.class, () -> StringDistanceImpl.resolve(doesntExist));

NullPointerException e = expectThrows(NullPointerException.class, () -> StringDistanceImpl.resolve(null));
assertThat(e.getMessage(), equalTo("Input string is null"));
}

public void testLevensteinDeprecation() {
assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSHTEIN));
assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]");
}

@Override
public void testWriteTo() throws IOException {
assertWriteToStream(StringDistanceImpl.INTERNAL, 0);
assertWriteToStream(StringDistanceImpl.DAMERAU_LEVENSHTEIN, 1);
assertWriteToStream(StringDistanceImpl.LEVENSTEIN, 2);
assertWriteToStream(StringDistanceImpl.LEVENSHTEIN, 2);
assertWriteToStream(StringDistanceImpl.JAROWINKLER, 3);
assertWriteToStream(StringDistanceImpl.NGRAM, 4);
}
Expand All @@ -77,7 +76,7 @@ public void testWriteTo() throws IOException {
public void testReadFrom() throws IOException {
assertReadFromStream(0, StringDistanceImpl.INTERNAL);
assertReadFromStream(1, StringDistanceImpl.DAMERAU_LEVENSHTEIN);
assertReadFromStream(2, StringDistanceImpl.LEVENSTEIN);
assertReadFromStream(2, StringDistanceImpl.LEVENSHTEIN);
assertReadFromStream(3, StringDistanceImpl.JAROWINKLER);
assertReadFromStream(4, StringDistanceImpl.NGRAM);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ private static StringDistanceImpl randomStringDistance() {
switch (randomVal) {
case 0: return StringDistanceImpl.INTERNAL;
case 1: return StringDistanceImpl.DAMERAU_LEVENSHTEIN;
case 2: return StringDistanceImpl.LEVENSTEIN;
case 2: return StringDistanceImpl.LEVENSHTEIN;
case 3: return StringDistanceImpl.JAROWINKLER;
case 4: return StringDistanceImpl.NGRAM;
default: throw new IllegalArgumentException("No string distance algorithm with an ordinal of " + randomVal);
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/search/suggesters/term-suggest.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ doesn't take the query into account that is part of request.
for comparing string distance for terms inside the index.
`damerau_levenshtein` - String distance algorithm based on
Damerau-Levenshtein algorithm.
`levenstein` - String distance algorithm based on Levenstein edit distance
`levenshtein` - String distance algorithm based on Levenshtein edit distance
algorithm.
`jarowinkler` - String distance algorithm based on Jaro-Winkler algorithm.
`ngram` - String distance algorithm based on character n-grams.