elastic · jimczi · Jan 17, 2019 · Jan 14, 2019 · Jan 16, 2019 · Jan 16, 2019
diff --git a/.../src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/.../src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
@@ -27,17 +27,17 @@
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.search.NormsFieldExistsQuery;
-import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanQuery;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -603,62 +603,26 @@ public Query existsQuery(QueryShardContext context) {
         }
 
         @Override
-        public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
-            PhraseQuery.Builder builder = new PhraseQuery.Builder();
-            builder.setSlop(slop);
-
-            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
-            PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
-            int position = -1;
-
-            stream.reset();
-            while (stream.incrementToken()) {
-                if (enablePosIncrements) {
-                    position += posIncrAtt.getPositionIncrement();
-                }
-                else {
-                    position += 1;
-                }
-                builder.add(new Term(field, termAtt.getBytesRef()), position);
-            }
-
-            return builder.build();
+        public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) {
+            SpanMultiTermQueryWrapper<?> spanMulti =
+                new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value))));
+            spanMulti.setRewriteMethod(method);
+            return spanMulti;
         }
 
         @Override
-        public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
-
-            MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
-            mpqb.setSlop(slop);
-
-            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
-
-            PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
-            int position = -1;
-
-            List<Term> multiTerms = new ArrayList<>();
-            stream.reset();
-            while (stream.incrementToken()) {
-                int positionIncrement = posIncrAtt.getPositionIncrement();
+        public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+            return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements);
+        }
 
-                if (positionIncrement > 0 && multiTerms.size() > 0) {
-                    if (enablePositionIncrements) {
-                        mpqb.add(multiTerms.toArray(new Term[0]), position);
-                    } else {
-                        mpqb.add(multiTerms.toArray(new Term[0]));
-                    }
-                    multiTerms.clear();
-                }
-                position += positionIncrement;
-                multiTerms.add(new Term(field, termAtt.getBytesRef()));
-            }
+        @Override
+        public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+            return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements);
+        }
 
-            if (enablePositionIncrements) {
-                mpqb.add(multiTerms.toArray(new Term[0]), position);
-            } else {
-                mpqb.add(multiTerms.toArray(new Term[0]));
-            }
-            return mpqb.build();
+        @Override
+        public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
+            return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions);
         }
     }
 

diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java
@@ -39,16 +39,21 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.ListIterator;
+import java.util.Objects;
 
 public class MultiPhrasePrefixQuery extends Query {
 
-    private String field;
+    private final String field;
     private ArrayList<Term[]> termArrays = new ArrayList<>();
     private ArrayList<Integer> positions = new ArrayList<>();
     private int maxExpansions = Integer.MAX_VALUE;
 
     private int slop = 0;
 
+    public MultiPhrasePrefixQuery(String field) {
+        this.field = Objects.requireNonNull(field);
+    }
+
     /**
      * Sets the phrase slop for this query.
      *
@@ -102,9 +107,6 @@ public void add(Term[] terms) {
      * @see org.apache.lucene.search.PhraseQuery.Builder#add(Term, int)
      */
     public void add(Term[] terms, int position) {
-        if (termArrays.size() == 0)
-            field = terms[0].field();
-
         for (int i = 0; i < terms.length; i++) {
             if (terms[i].field() != field) {
                 throw new IllegalArgumentException(
@@ -212,7 +214,7 @@ private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final
     @Override
     public final String toString(String f) {
         StringBuilder buffer = new StringBuilder();
-        if (field == null || !field.equals(f)) {
+        if (field.equals(f) == false) {
             buffer.append(field);
             buffer.append(":");
         }

diff --git a/...ain/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java b/...ain/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.common.lucene.search;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.queries.SpanMatchNoDocsQuery;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A span rewrite method that extracts the first <code>maxExpansions</code> terms
+ * that match the {@link MultiTermQuery} in the terms dictionary.
+ * The rewrite throws an error if more than <code>maxExpansions</code> terms are found and <code>hardLimit</code>
+ * is set.
+ */
+public class SpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {
+    private final int maxExpansions;
+    private final boolean hardLimit;
+
+    public SpanBooleanQueryRewriteWithMaxClause() {
+        this(BooleanQuery.getMaxClauseCount(), true);
+    }
+
+    public SpanBooleanQueryRewriteWithMaxClause(int maxExpansions, boolean hardLimit) {
+        this.maxExpansions = maxExpansions;
+        this.hardLimit = hardLimit;
+    }
+
+    public int getMaxExpansions() {
+        return maxExpansions;
+    }
+
+    public boolean isHardLimit() {
+        return hardLimit;
+    }
+
+    @Override
+    public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+        final MultiTermQuery.RewriteMethod delegate = new MultiTermQuery.RewriteMethod() {
+            @Override
+            public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+                Collection<SpanQuery> queries = collectTerms(reader, query);
+                if (queries.size() == 0) {
+                    return new SpanMatchNoDocsQuery(query.getField(), "no expansion found for " + query.toString());
+                } else if (queries.size() == 1) {
+                    return queries.iterator().next();
+                } else {
+                    return new SpanOrQuery(queries.toArray(new SpanQuery[0]));
+                }
+            }
+
+            private Collection<SpanQuery> collectTerms(IndexReader reader, MultiTermQuery query) throws IOException {
+                Set<SpanQuery> queries = new HashSet<>();
+                IndexReaderContext topReaderContext = reader.getContext();
+                for (LeafReaderContext context : topReaderContext.leaves()) {
+                    final Terms terms = context.reader().terms(query.getField());
+                    if (terms == null) {
+                        // field does not exist
+                        continue;
+                    }
+
+                    final TermsEnum termsEnum = getTermsEnum(query, terms, null);
+                    assert termsEnum != null;
+
+                    if (termsEnum == TermsEnum.EMPTY)
+                        continue;
+
+                    BytesRef bytes;
+                    while ((bytes = termsEnum.next()) != null) {
+                        if (queries.size() >= maxExpansions) {
+                            if (hardLimit) {
+                                throw new RuntimeException("[" + query.toString() + " ] " +
+                                    "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
+                            } else {
+                                return queries;
+                            }
+                        }
+                        queries.add(new SpanTermQuery(new Term(query.getField(), bytes)));
+                    }
+                }
+                return queries;
+            }
+        };
+        return (SpanQuery) delegate.rewrite(reader, query);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
@@ -35,6 +35,8 @@
 import org.apache.lucene.search.TermInSetQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.intervals.IntervalsSource;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.Nullable;
@@ -365,16 +367,26 @@ public Query regexpQuery(String value, int flags, int maxDeterminizedStates, @Nu
 
     public abstract Query existsQuery(QueryShardContext context);
 
-    public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+    public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
         throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name
             + "] which is of type [" + typeName() + "]");
     }
 
-    public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
+    public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
         throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name
             + "] which is of type [" + typeName() + "]");
     }
 
+    public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
+        throw new IllegalArgumentException("Can only use phrase prefix queries on text fields - not on [" + name
+            + "] which is of type [" + typeName() + "]");
+    }
+
+    public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) {
+        throw new IllegalArgumentException("Can only use span prefix queries on text fields - not on [" + name
+            + "] which is of type [" + typeName() + "]");
+    }
+
     /**
      * Create an {@link IntervalsSource} to be used for proximity queries
      */