elastic
diff --git a/‎buildSrc/src/main/resources/checkstyle_suppressions.xml
Lines changed: 0 additions & 5 deletions b/‎buildSrc/src/main/resources/checkstyle_suppressions.xml
Lines changed: 0 additions & 5 deletions
diff --git a/‎docs/reference/migration/migrate_7_0/search.asciidoc
Lines changed: 7 additions & 0 deletions b/‎docs/reference/migration/migrate_7_0/search.asciidoc
Lines changed: 7 additions & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java
Lines changed: 7 additions & 6 deletions b/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java
Lines changed: 7 additions & 6 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java
Lines changed: 8 additions & 5 deletions b/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java
Lines changed: 8 additions & 5 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java
Lines changed: 54 additions & 34 deletions b/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java
Lines changed: 54 additions & 34 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
Lines changed: 1 addition & 1 deletion b/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
Lines changed: 1 addition & 1 deletion
diff --git a/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
Lines changed: 1 addition & 1 deletion b/‎server/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
Lines changed: 1 addition & 1 deletion
@@ -372,9 +372,6 @@
   <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]completion[/\\]context[/\\]ContextMapping.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]completion[/\\]context[/\\]GeoContextMapping.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]completion[/\\]context[/\\]GeoQueryContext.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]phrase[/\\]CandidateScorer.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]phrase[/\\]NoisyChannelSpellChecker.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]phrase[/\\]WordScorer.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]snapshots[/\\]RestoreService.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]snapshots[/\\]SnapshotShardFailure.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]snapshots[/\\]SnapshotShardsService.java" checks="LineLength" />
@@ -564,7 +561,6 @@
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]store[/\\]CorruptedTranslogIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]store[/\\]IndexStoreTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]store[/\\]StoreTests.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]suggest[/\\]stats[/\\]SuggestStatsIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]translog[/\\]TranslogTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]indexing[/\\]IndexActionIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]indexlifecycle[/\\]IndexLifecycleActionIT.java" checks="LineLength" />
@@ -644,7 +640,6 @@
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]ContextCompletionSuggestSearchIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]completion[/\\]CategoryContextMappingTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]completion[/\\]GeoContextMappingTests.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]suggest[/\\]phrase[/\\]NoisyChannelSpellCheckerTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]similarity[/\\]SimilarityIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]snapshots[/\\]AbstractSnapshotIntegTestCase.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]snapshots[/\\]DedicatedClusterSnapshotRestoreIT.java" checks="LineLength" />
 
@@ -78,6 +78,13 @@ removed.
 * 	`levenstein` - replaced by `levenshtein`
 * 	`jarowinkler` - replaced by `jaro_winkler`
 
+[float]
+==== `popular` mode for Suggesters
+
+The `popular` mode for Suggesters (`term` and `phrase`) now uses the doc frequency
+(instead of the sum of the doc frequency) of the input terms to compute the frequency
+threshold for candidate suggestions.
+
 [float]
 ==== Limiting the number of terms that can be used in a Terms Query request
 
 
@@ -18,6 +18,7 @@
  */
 package org.elasticsearch.search.suggest.phrase;
 
+import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
@@ -29,22 +30,22 @@ public abstract class CandidateGenerator {
 
     public abstract boolean isKnownWord(BytesRef term) throws IOException;
 
-    public abstract long frequency(BytesRef term) throws IOException;
+    public abstract TermStats termStats(BytesRef term) throws IOException;
 
     public CandidateSet drawCandidates(BytesRef term) throws IOException {
         CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term, true));
         return drawCandidates(set);
     }
 
     public Candidate createCandidate(BytesRef term, boolean userInput) throws IOException {
-        return createCandidate(term, frequency(term), 1.0, userInput);
+        return createCandidate(term, termStats(term), 1.0, userInput);
     }
-    public Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException {
-        return createCandidate(term, frequency, channelScore, false);
+    public Candidate createCandidate(BytesRef term, TermStats termStats, double channelScore) throws IOException {
+        return createCandidate(term, termStats, channelScore, false);
     }
 
-    public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException;
+    public abstract Candidate createCandidate(BytesRef term, TermStats termStats,
+                                                double channelScore, boolean userInput) throws IOException;
 
     public abstract CandidateSet drawCandidates(CandidateSet set) throws IOException;
-
 }
@@ -77,21 +77,24 @@ public void findCandidates(CandidateSet[] candidates, Candidate[] path, int ord,
         } else {
             if (numMissspellingsLeft > 0) {
                 path[ord] = current.originalTerm;
-                findCandidates(candidates, path, ord + 1, numMissspellingsLeft, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
+                findCandidates(candidates, path, ord + 1, numMissspellingsLeft, corrections, cutoffScore,
+                    pathScore + scorer.score(path, candidates, ord, gramSize));
                 for (int i = 0; i < current.candidates.length; i++) {
                     path[ord] = current.candidates[i];
-                    findCandidates(candidates, path, ord + 1, numMissspellingsLeft - 1, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
+                    findCandidates(candidates, path, ord + 1, numMissspellingsLeft - 1, corrections, cutoffScore,
+                        pathScore + scorer.score(path, candidates, ord, gramSize));
                 }
             } else {
                 path[ord] = current.originalTerm;
-                findCandidates(candidates, path, ord + 1, 0, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
+                findCandidates(candidates, path, ord + 1, 0, corrections, cutoffScore,
+                    pathScore + scorer.score(path, candidates, ord, gramSize));
             }
         }
 
     }
 
-    private void updateTop(CandidateSet[] candidates, Candidate[] path, PriorityQueue<Correction> corrections, double cutoffScore, double score)
-            throws IOException {
+    private void updateTop(CandidateSet[] candidates, Candidate[] path,
+                                PriorityQueue<Correction> corrections, double cutoffScore, double score) throws IOException {
         score = Math.exp(score);
         assert Math.abs(score - score(path, candidates)) < 0.00001 : "cur_score=" + score + ", path_score=" + score(path,candidates);
         if (score > cutoffScore) {
 
@@ -23,6 +23,7 @@
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.Term;
@@ -48,6 +49,7 @@
 
 import static java.lang.Math.log10;
 import static java.lang.Math.max;
+import static java.lang.Math.min;
 import static java.lang.Math.round;
 
 public final class DirectCandidateGenerator extends CandidateGenerator {
@@ -57,20 +59,20 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
     private final SuggestMode suggestMode;
     private final TermsEnum termsEnum;
     private final IndexReader reader;
-    private final long dictSize;
+    private final long sumTotalTermFreq;
     private static final double LOG_BASE = 5;
     private final long frequencyPlateau;
     private final Analyzer preFilter;
     private final Analyzer postFilter;
     private final double nonErrorLikelihood;
-    private final boolean useTotalTermFrequency;
     private final CharsRefBuilder spare = new CharsRefBuilder();
     private final BytesRefBuilder byteSpare = new BytesRefBuilder();
     private final int numCandidates;
 
     public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
             double nonErrorLikelihood, int numCandidates) throws IOException {
-        this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
+        this(spellchecker, field, suggestMode, reader, nonErrorLikelihood,
+                numCandidates, null, null, MultiFields.getTerms(reader, field));
     }
 
     public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
@@ -83,14 +85,12 @@ public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, S
         this.numCandidates = numCandidates;
         this.suggestMode = suggestMode;
         this.reader = reader;
-        final long dictSize = terms.getSumTotalTermFreq();
-        this.useTotalTermFrequency = dictSize != -1;
-        this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
+        this.sumTotalTermFreq =  terms.getSumTotalTermFreq() == -1 ? reader.maxDoc() : terms.getSumTotalTermFreq();
         this.preFilter = preFilter;
         this.postFilter = postFilter;
         this.nonErrorLikelihood = nonErrorLikelihood;
         float thresholdFrequency = spellchecker.getThresholdFrequency();
-        this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
+        this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int) (reader.maxDoc() * thresholdFrequency);
         termsEnum = terms.iterator();
     }
 
@@ -99,24 +99,29 @@ public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, S
      */
     @Override
     public boolean isKnownWord(BytesRef term) throws IOException {
-        return frequency(term) > 0;
+        return termStats(term).docFreq > 0;
     }
 
     /* (non-Javadoc)
      * @see org.elasticsearch.search.suggest.phrase.CandidateGenerator#frequency(org.apache.lucene.util.BytesRef)
      */
     @Override
-    public long frequency(BytesRef term) throws IOException {
+    public TermStats termStats(BytesRef term) throws IOException {
         term = preFilter(term, spare, byteSpare);
-        return internalFrequency(term);
+        return internalTermStats(term);
     }
 
 
-    public long internalFrequency(BytesRef term) throws IOException {
+    public TermStats internalTermStats(BytesRef term) throws IOException {
         if (termsEnum.seekExact(term)) {
-            return useTotalTermFrequency ? termsEnum.totalTermFreq() : termsEnum.docFreq();
+            return new TermStats(termsEnum.docFreq(),
+                /**
+                 * We use the {@link TermsEnum#docFreq()} for fields that don't
+                 * record the {@link TermsEnum#totalTermFreq()}.
+                 */
+                termsEnum.totalTermFreq() == -1 ? termsEnum.docFreq() : termsEnum.totalTermFreq());
         }
-        return 0;
+        return new TermStats(0, 0);
     }
 
     public String getField() {
@@ -127,15 +132,28 @@ public String getField() {
     public CandidateSet drawCandidates(CandidateSet set) throws IOException {
         Candidate original = set.originalTerm;
         BytesRef term = preFilter(original.term, spare, byteSpare);
-        final long frequency = original.frequency;
-        spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
+        if (suggestMode != SuggestMode.SUGGEST_ALWAYS) {
+            /**
+             * We use the {@link TermStats#docFreq} to compute the frequency threshold
+             * because that's what {@link DirectSpellChecker#suggestSimilar} expects
+             * when filtering terms.
+             */
+            int threshold = thresholdTermFrequency(original.termStats.docFreq);
+            if (threshold == Integer.MAX_VALUE) {
+                // the threshold is the max possible frequency so we can skip the search
+                return set;
+            }
+            spellchecker.setThresholdFrequency(threshold);
+        }
+
         SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
         List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
         for (int i = 0; i < suggestSimilar.length; i++) {
             SuggestWord suggestWord = suggestSimilar[i];
             BytesRef candidate = new BytesRef(suggestWord.string);
-            postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score,
-                    score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
+            TermStats termStats = internalTermStats(candidate);
+            postFilter(new Candidate(candidate, termStats,
+                suggestWord.score, score(termStats, suggestWord.score, sumTotalTermFreq), false), spare, byteSpare, candidates);
         }
         set.addCandidates(candidates);
         return set;
@@ -171,28 +189,30 @@ public void nextToken() throws IOException {
                         BytesRef term = result.toBytesRef();
                         // We should not use frequency(term) here because it will analyze the term again
                         // If preFilter and postFilter are the same analyzer it would fail.
-                        long freq = internalFrequency(term);
-                        candidates.add(new Candidate(result.toBytesRef(), freq, candidate.stringDistance,
-                                score(candidate.frequency, candidate.stringDistance, dictSize), false));
+                        TermStats termStats = internalTermStats(term);
+                        candidates.add(new Candidate(result.toBytesRef(), termStats, candidate.stringDistance,
+                                score(candidate.termStats, candidate.stringDistance, sumTotalTermFreq), false));
                     } else {
-                        candidates.add(new Candidate(result.toBytesRef(), candidate.frequency, nonErrorLikelihood,
-                                score(candidate.frequency, candidate.stringDistance, dictSize), false));
+                        candidates.add(new Candidate(result.toBytesRef(), candidate.termStats, nonErrorLikelihood,
+                                score(candidate.termStats, candidate.stringDistance, sumTotalTermFreq), false));
                     }
                 }
             }, spare);
         }
     }
 
-    private double score(long frequency, double errorScore, long dictionarySize) {
-        return errorScore * (((double)frequency + 1) / ((double)dictionarySize +1));
+    private double score(TermStats termStats, double errorScore, long dictionarySize) {
+        return errorScore * (((double)termStats.totalTermFreq + 1) / ((double)dictionarySize +1));
     }
 
-    protected long thresholdFrequency(long termFrequency, long dictionarySize) {
-        if (termFrequency > 0) {
-            return max(0, round(termFrequency * (log10(termFrequency - frequencyPlateau) * (1.0 / log10(LOG_BASE))) + 1));
+    // package protected for test
+    int thresholdTermFrequency(int docFreq) {
+        if (docFreq > 0) {
+            return (int) min(
+                max(0, round(docFreq * (log10(docFreq - frequencyPlateau) * (1.0 / log10(LOG_BASE))) + 1)), Integer.MAX_VALUE
+            );
         }
         return 0;
-
     }
 
     public abstract static class TokenConsumer {
@@ -249,12 +269,12 @@ public static class Candidate implements Comparable<Candidate> {
         public static final Candidate[] EMPTY = new Candidate[0];
         public final BytesRef term;
         public final double stringDistance;
-        public final long frequency;
+        public final TermStats termStats;
         public final double score;
         public final boolean userInput;
 
-        public Candidate(BytesRef term, long frequency, double stringDistance, double score, boolean userInput) {
-            this.frequency = frequency;
+        public Candidate(BytesRef term, TermStats termStats, double stringDistance, double score, boolean userInput) {
+            this.termStats = termStats;
             this.term = term;
             this.stringDistance = stringDistance;
             this.score = score;
@@ -266,7 +286,7 @@ public String toString() {
             return "Candidate [term=" + term.utf8ToString()
                     + ", stringDistance=" + stringDistance
                     + ", score=" + score
-                    + ", frequency=" + frequency
+                    + ", termStats=" + termStats
                     + (userInput ? ", userInput" : "") + "]";
         }
 
@@ -305,8 +325,8 @@ public int compareTo(Candidate other) {
     }
 
     @Override
-    public Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException {
-        return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput);
+    public Candidate createCandidate(BytesRef term, TermStats termStats, double channelScore, boolean userInput) throws IOException {
+        return new Candidate(term, termStats, channelScore, score(termStats, channelScore, sumTotalTermFreq), userInput);
     }
 
     public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare)
 
@@ -46,7 +46,7 @@ protected double scoreUnigram(Candidate word) throws IOException {
     @Override
     protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
         join(separator, spare, w_1.term, word.term);
-        return (alpha + frequency(spare.get())) / (w_1.frequency + alpha * numTerms);
+        return (alpha + frequency(spare.get())) / (w_1.termStats.totalTermFreq + alpha * numTerms);
     }
 
     @Override
 
@@ -60,7 +60,7 @@ protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
         if (count < 1) {
             return unigramLambda * scoreUnigram(word);
         }
-        return bigramLambda * (count / (0.5d + w_1.frequency)) + unigramLambda * scoreUnigram(word);
+        return bigramLambda * (count / (0.5d + w_1.termStats.totalTermFreq)) + unigramLambda * scoreUnigram(word);
     }
 
     @Override
Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,7 @@ protected double scoreUnigram(Candidate word) throws IOException {`
`46`	`46`	`@Override`
`47`	`47`	`protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {`
`48`	`48`	`join(separator, spare, w_1.term, word.term);`
`49`		`- return (alpha + frequency(spare.get())) / (w_1.frequency + alpha * numTerms);`
	`49`	`+ return (alpha + frequency(spare.get())) / (w_1.termStats.totalTermFreq + alpha * numTerms);`
`50`	`50`	`}`
`51`	`51`
`52`	`52`	`@Override`
Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {`
`60`	`60`	`if (count < 1) {`
`61`	`61`	`return unigramLambda * scoreUnigram(word);`
`62`	`62`	`}`
`63`		`- return bigramLambda * (count / (0.5d + w_1.frequency)) + unigramLambda * scoreUnigram(word);`
	`63`	`+ return bigramLambda * (count / (0.5d + w_1.termStats.totalTermFreq)) + unigramLambda * scoreUnigram(word);`
`64`	`64`	`}`
`65`	`65`
`66`	`66`	`@Override`