Skip to content

Use index-prefix fields for terms of length min_chars - 1 #36703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@ setup:
- match: {hits.max_score: 2}
- match: {hits.hits.0._score: 2}

- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
query_string:
default_field: text
query: s*
boost: 2

- match: {hits.total: 1}
- match: {hits.max_score: 2}
- match: {hits.hits.0._score: 2}

- do:
search:
rest_total_hits_as_int: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
Expand All @@ -40,6 +41,9 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.intervals.IntervalsSource;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -360,7 +364,7 @@ PrefixFieldType setAnalyzer(NamedAnalyzer delegate) {
}

boolean accept(int length) {
return length >= minChars && length <= maxChars;
return length >= minChars - 1 && length <= maxChars;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's go even further, change this to just length <= maxChars, and then append minChars - prefixTerm.length wildcards to the automaton that is used for querying?

}

void doXContent(XContentBuilder builder) throws IOException {
Expand All @@ -370,6 +374,22 @@ void doXContent(XContentBuilder builder) throws IOException {
builder.endObject();
}

@Override
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
if (value.length() >= minChars) {
return super.termQuery(value, context);
}
List<Automaton> automata = new ArrayList<>();
automata.add(Automata.makeString(value));
for (int i = value.length(); i < minChars; i++) {
automata.add(Automata.makeAnyChar());
}
Automaton automaton = Operations.concatenate(automata);
AutomatonQuery query = new AutomatonQuery(new Term(name(), value + "*"), automaton);
query.setRewriteMethod(method);
return query;
}

@Override
public PrefixFieldType clone() {
return new PrefixFieldType(name(), minChars, maxChars);
Expand Down Expand Up @@ -402,7 +422,6 @@ public boolean equals(Object o) {

@Override
public int hashCode() {

return Objects.hash(super.hashCode(), minChars, maxChars);
}
}
Expand Down Expand Up @@ -564,7 +583,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer
if (prefixFieldType == null || prefixFieldType.accept(value.length()) == false) {
return super.prefixQuery(value, method, context);
}
Query tq = prefixFieldType.termQuery(value, context);
Query tq = prefixFieldType.prefixQuery(value, method, context);
if (method == null || method == MultiTermQuery.CONSTANT_SCORE_REWRITE
|| method == MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE) {
return new ConstantScoreQuery(tq);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
Expand Down Expand Up @@ -71,10 +69,8 @@
import java.util.HashMap;
import java.util.Map;

import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;

public class TextFieldMapperTests extends ESSingleNodeTestCase {
Expand Down Expand Up @@ -817,18 +813,13 @@ public void testFastPhraseMapping() throws IOException {

public void testIndexPrefixMapping() throws IOException {

QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> {
throw new UnsupportedOperationException();
}, null);

{
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "standard")
.startObject("index_prefixes")
.field("min_chars", 1)
.field("min_chars", 2)
.field("max_chars", 10)
.endObject()
.endObject().endObject()
Expand All @@ -837,16 +828,7 @@ public void testIndexPrefixMapping() throws IOException {
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());

assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=1:10"));

FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field");
MappedFieldType fieldType = fieldMapper.fieldType;

Query q = fieldType.prefixQuery("goin", CONSTANT_SCORE_REWRITE, queryShardContext);

assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);
q = fieldType.prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, queryShardContext);
assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);
assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:10"));

ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
.bytes(XContentFactory.jsonBuilder()
Expand All @@ -870,17 +852,8 @@ public void testIndexPrefixMapping() throws IOException {
CompressedXContent json = new CompressedXContent(mapping);
DocumentMapper mapper = parser.parse("type", json);

FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field");
MappedFieldType fieldType = fieldMapper.fieldType;

Query q1 = fieldType.prefixQuery("g", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q1, instanceOf(PrefixQuery.class));
Query q2 = fieldType.prefixQuery("go", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q2, instanceOf(ConstantScoreQuery.class));
Query q5 = fieldType.prefixQuery("going", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q5, instanceOf(ConstantScoreQuery.class));
Query q6 = fieldType.prefixQuery("goings", CONSTANT_SCORE_REWRITE, queryShardContext);
assertThat(q6, instanceOf(PrefixQuery.class));
assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:5"));

}

{
Expand All @@ -898,10 +871,8 @@ public void testIndexPrefixMapping() throws IOException {
.endObject().endObject()
.endObject().endObject());

IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
indexService.mapperService()
.merge("type", new CompressedXContent(illegalMapping), MergeReason.MAPPING_UPDATE);
});
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () ->
indexService.mapperService().merge("type", new CompressedXContent(illegalMapping), MergeReason.MAPPING_UPDATE));
assertThat(e.getMessage(), containsString("Field [field._index_prefix] is defined twice in [type]"));

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,27 @@

import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.unit.Fuzziness;
import org.junit.Before;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;

public class TextFieldTypeTests extends FieldTypeTestCase {
@Override
protected MappedFieldType createDefaultFieldType() {
Expand Down Expand Up @@ -143,4 +152,21 @@ public void testFuzzyQuery() {
() -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}

public void testIndexPrefixes() {
TextFieldMapper.TextFieldType ft = new TextFieldMapper.TextFieldType();
ft.setName("field");
ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType("field._index_prefix", 2, 10));

Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, null);
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);

q = ft.prefixQuery("internationalisatio", CONSTANT_SCORE_REWRITE, null);
assertEquals(new PrefixQuery(new Term("field", "internationalisatio")), q);

q = ft.prefixQuery("g", CONSTANT_SCORE_REWRITE, null);
Automaton automaton
= Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
assertEquals(new ConstantScoreQuery(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton)), q);
}
}