-
Notifications
You must be signed in to change notification settings - Fork 25.3k
Make Fuzziness reject illegal values earlier #33511
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
14bfd51
df643a9
82546f2
639bd55
6345af9
d53a57e
609ad68
93e2f05
72327df
e9e7044
d250966
d639395
2dcfccc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
[float] | ||
[[breaking_80_java_changes]] | ||
=== Java API changes | ||
|
||
[float] | ||
==== Changes to Fuzziness | ||
|
||
To create `Fuzziness` instances, use the `fromString` and `fromEdits` method | ||
instead of the `build` method that used to accept both Strings and numeric | ||
values. Several fuzziness setters on query builders (e.g. | ||
MatchQueryBuilder#fuzziness) now accept only a `Fuzziness`instance instead of | ||
an Object. You should preferably use the available constants (e.g. | ||
Fuzziness.ONE, Fuzziness.AUTO) or build your own instance using the above | ||
mentioned factory methods. | ||
|
||
Fuzziness used to be lenient when it comes to parsing arbitrary numeric values | ||
while silently truncating them to one of the three allowed edit distances 0, 1 | ||
or 2. This leniency is now removed and the class will throw errors when trying | ||
to construct an instance with another value (e.g. floats like 1.3 used to get | ||
accepted but truncated to 1). You should use one of the allowed values. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
|
||
import org.elasticsearch.ElasticsearchParseException; | ||
import org.elasticsearch.common.ParseField; | ||
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.common.io.stream.StreamInput; | ||
import org.elasticsearch.common.io.stream.StreamOutput; | ||
import org.elasticsearch.common.io.stream.Writeable; | ||
|
@@ -38,41 +39,73 @@ | |
*/ | ||
public final class Fuzziness implements ToXContentFragment, Writeable { | ||
|
||
public static final String X_FIELD_NAME = "fuzziness"; | ||
public static final Fuzziness ZERO = new Fuzziness(0); | ||
public static final Fuzziness ONE = new Fuzziness(1); | ||
public static final Fuzziness TWO = new Fuzziness(2); | ||
public static final Fuzziness AUTO = new Fuzziness("AUTO"); | ||
static final String X_FIELD_NAME = "fuzziness"; | ||
public static final ParseField FIELD = new ParseField(X_FIELD_NAME); | ||
private static final int DEFAULT_LOW_DISTANCE = 3; | ||
private static final int DEFAULT_HIGH_DISTANCE = 6; | ||
|
||
public static final Fuzziness ZERO = new Fuzziness("0"); | ||
public static final Fuzziness ONE = new Fuzziness("1"); | ||
public static final Fuzziness TWO = new Fuzziness("2"); | ||
public static final Fuzziness AUTO = new Fuzziness("AUTO"); | ||
|
||
static final int DEFAULT_LOW_DISTANCE = 3; | ||
static final int DEFAULT_HIGH_DISTANCE = 6; | ||
|
||
private final String fuzziness; | ||
private int lowDistance = DEFAULT_LOW_DISTANCE; | ||
private int highDistance = DEFAULT_HIGH_DISTANCE; | ||
|
||
private Fuzziness(int fuzziness) { | ||
if (fuzziness != 0 && fuzziness != 1 && fuzziness != 2) { | ||
throw new IllegalArgumentException("Valid edit distances are [0, 1, 2] but was [" + fuzziness + "]"); | ||
} | ||
this.fuzziness = Integer.toString(fuzziness); | ||
private Fuzziness(String fuzziness) { | ||
this.fuzziness = fuzziness; | ||
} | ||
|
||
private Fuzziness(String fuzziness) { | ||
if (fuzziness == null || fuzziness.isEmpty()) { | ||
throw new IllegalArgumentException("fuzziness can't be null!"); | ||
/** | ||
* Creates a {@link Fuzziness} instance from an edit distance. The value must be one of {@code [0, 1, 2]} | ||
* Note: Using this method only makes sense if the field you are applying Fuzziness to is some sort of string. | ||
* @throws IllegalArgumentException if the edit distance is not in [0, 1, 2] | ||
*/ | ||
public static Fuzziness fromEdits(int edits) { | ||
switch (edits) { | ||
case 0: return Fuzziness.ZERO; | ||
case 1: return Fuzziness.ONE; | ||
case 2: return Fuzziness.TWO; | ||
default: | ||
throw new IllegalArgumentException("Valid edit distances are [0, 1, 2] but was [" + edits + "]"); | ||
} | ||
this.fuzziness = fuzziness.toUpperCase(Locale.ROOT); | ||
} | ||
|
||
private Fuzziness(String fuzziness, int lowDistance, int highDistance) { | ||
this(fuzziness); | ||
if (lowDistance < 0 || highDistance < 0 || lowDistance > highDistance) { | ||
throw new IllegalArgumentException("fuzziness wrongly configured, must be: lowDistance > 0, highDistance" + | ||
" > 0 and lowDistance <= highDistance "); | ||
/** | ||
* Creates a {@link Fuzziness} instance from a String representation. This can | ||
* either be an edit distance where the value must be one of | ||
* {@code ["0", "1", "2"]} or "AUTO" for a fuzziness that depends on the term | ||
* length. Using the "AUTO" fuzziness, you can optionally supply low and high | ||
* distance arguments in the format {@code "AUTO:[low],[high]"}. See the query | ||
* DSL documentation for more information about how these values affect the | ||
* fuzziness value. | ||
* Note: Using this method only makes sense if the field you | ||
* are applying Fuzziness to is some sort of string. | ||
*/ | ||
public static Fuzziness fromString(String fuzzinessString) { | ||
if (Strings.isEmpty(fuzzinessString)) { | ||
throw new IllegalArgumentException("fuzziness cannot be null or empty."); | ||
} | ||
String upperCase = fuzzinessString.toUpperCase(Locale.ROOT); | ||
// check if it is one of the "AUTO" variants | ||
if (upperCase.equals("AUTO")) { | ||
return Fuzziness.AUTO; | ||
} else if (upperCase.startsWith("AUTO:")) { | ||
return parseCustomAuto(upperCase); | ||
} else { | ||
// should be a float or int representing a valid edit distance, otherwise throw error | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we restrict to int ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, I see this still leaves the door open for something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
try { | ||
float parsedFloat = Float.parseFloat(upperCase); | ||
if (parsedFloat % 1 > 0) { | ||
throw new IllegalArgumentException("fuzziness needs to be one of 0.0, 1.0 or 2.0 but was " + parsedFloat); | ||
} | ||
return fromEdits((int) parsedFloat); | ||
} catch (NumberFormatException e) { | ||
throw new IllegalArgumentException("fuzziness cannot be [" + fuzzinessString + "].", e); | ||
} | ||
} | ||
this.lowDistance = lowDistance; | ||
this.highDistance = highDistance; | ||
} | ||
|
||
/** | ||
|
@@ -101,39 +134,23 @@ public void writeTo(StreamOutput out) throws IOException { | |
} | ||
} | ||
|
||
/** | ||
* Creates a {@link Fuzziness} instance from an edit distance. The value must be one of {@code [0, 1, 2]} | ||
* | ||
* Note: Using this method only makes sense if the field you are applying Fuzziness to is some sort of string. | ||
*/ | ||
public static Fuzziness fromEdits(int edits) { | ||
return new Fuzziness(edits); | ||
} | ||
|
||
public static Fuzziness build(Object fuzziness) { | ||
if (fuzziness instanceof Fuzziness) { | ||
return (Fuzziness) fuzziness; | ||
} | ||
String string = fuzziness.toString(); | ||
if (AUTO.asString().equalsIgnoreCase(string)) { | ||
return AUTO; | ||
} else if (string.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":")) { | ||
return parseCustomAuto(string); | ||
} | ||
return new Fuzziness(string); | ||
} | ||
|
||
private static Fuzziness parseCustomAuto( final String string) { | ||
assert string.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":"); | ||
String[] fuzzinessLimit = string.substring(AUTO.asString().length() + 1).split(","); | ||
private static Fuzziness parseCustomAuto(final String fuzzinessString) { | ||
assert fuzzinessString.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":"); | ||
String[] fuzzinessLimit = fuzzinessString.substring(AUTO.asString().length() + 1).split(","); | ||
if (fuzzinessLimit.length == 2) { | ||
try { | ||
int lowerLimit = Integer.parseInt(fuzzinessLimit[0]); | ||
int highLimit = Integer.parseInt(fuzzinessLimit[1]); | ||
return new Fuzziness("AUTO", lowerLimit, highLimit); | ||
if (lowerLimit < 0 || highLimit < 0 || lowerLimit > highLimit) { | ||
throw new ElasticsearchParseException("fuzziness wrongly configured [{}]. Must be 0 < lower value <= higher value.", | ||
fuzzinessString); | ||
} | ||
Fuzziness fuzziness = new Fuzziness("AUTO"); | ||
fuzziness.lowDistance = lowerLimit; | ||
fuzziness.highDistance = highLimit; | ||
return fuzziness; | ||
} catch (NumberFormatException e) { | ||
throw new ElasticsearchParseException("failed to parse [{}] as a \"auto:int,int\"", e, | ||
string); | ||
throw new ElasticsearchParseException("failed to parse [{}] as a \"auto:int,int\"", e, fuzzinessString); | ||
} | ||
} else { | ||
throw new ElasticsearchParseException("failed to find low and high distance values"); | ||
|
@@ -144,29 +161,9 @@ public static Fuzziness parse(XContentParser parser) throws IOException { | |
XContentParser.Token token = parser.currentToken(); | ||
switch (token) { | ||
case VALUE_STRING: | ||
return fromString(parser.text()); | ||
case VALUE_NUMBER: | ||
final String fuzziness = parser.text(); | ||
if (AUTO.asString().equalsIgnoreCase(fuzziness)) { | ||
return AUTO; | ||
} else if (fuzziness.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":")) { | ||
return parseCustomAuto(fuzziness); | ||
} | ||
try { | ||
final int minimumSimilarity = Integer.parseInt(fuzziness); | ||
switch (minimumSimilarity) { | ||
case 0: | ||
return ZERO; | ||
case 1: | ||
return ONE; | ||
case 2: | ||
return TWO; | ||
default: | ||
return build(fuzziness); | ||
} | ||
} catch (NumberFormatException ex) { | ||
return build(fuzziness); | ||
} | ||
|
||
return fromEdits(parser.intValue()); | ||
default: | ||
throw new IllegalArgumentException("Can't parse fuzziness on token: [" + token + "]"); | ||
} | ||
|
@@ -200,7 +197,7 @@ public float asFloat() { | |
if (this.equals(AUTO) || isAutoWithCustomValues()) { | ||
return 1f; | ||
} | ||
return Float.parseFloat(fuzziness.toString()); | ||
return Float.parseFloat(fuzziness); | ||
} | ||
|
||
private int termLen(String text) { | ||
|
@@ -209,13 +206,13 @@ private int termLen(String text) { | |
|
||
public String asString() { | ||
if (isAutoWithCustomValues()) { | ||
return fuzziness.toString() + ":" + lowDistance + "," + highDistance; | ||
return fuzziness + ":" + lowDistance + "," + highDistance; | ||
} | ||
return fuzziness.toString(); | ||
return fuzziness; | ||
} | ||
|
||
private boolean isAutoWithCustomValues() { | ||
return fuzziness.startsWith("AUTO") && (lowDistance != DEFAULT_LOW_DISTANCE || | ||
return fuzziness.equals("AUTO") && (lowDistance != DEFAULT_LOW_DISTANCE || | ||
highDistance != DEFAULT_HIGH_DISTANCE); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This method used to be very permissive, e.g. parsing float values and silently casting them to ints, where values larger than 2 get ignored somewhere later in the code I think.