elastic · ywelsch · Feb 2, 2022 · Feb 2, 2022 · Feb 2, 2022
diff --git a/docs/changelog/83404.yaml b/docs/changelog/83404.yaml
@@ -0,0 +1,5 @@
+pr: 83404
+summary: Implement all queries on doc-values only keyword fields
+area: Mapping
+type: enhancement
+issues: []
diff --git a/docs/reference/mapping/params/doc-values.asciidoc b/docs/reference/mapping/params/doc-values.asciidoc
@@ -19,7 +19,7 @@ with the __notable exception of `text` and `annotated_text` fields__.
 
 <<number,Numeric types>>, <<date,date types>>, the <<boolean,boolean type>>,
 <<ip,ip type>>, <<geo-point,geo_point type>> and the <<keyword,keyword type>>
-can also be queried using term or range-based queries
+can also be queried
 when they are not <<mapping-index,indexed>> but only have doc values enabled.
 Query performance on doc values is much slower than on index structures, but
 offers an interesting tradeoff between disk usage and query performance for

diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc
@@ -82,8 +82,7 @@ The following parameters are accepted by `keyword` fields:
 
     Should the field be quickly searchable? Accepts `true` (default) and
      `false`. `keyword` fields that only have <<doc-values,`doc_values`>>
-     enabled can still be queried using term or range-based queries,
-     albeit slower.
+     enabled can still be queried, albeit slower.
 
 <<index-options,`index_options`>>::
 

diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml
@@ -289,6 +289,60 @@ setup:
         body: { query: { range: { keyword: { gte: "key1" } } } }
   - length:   { hits.hits: 2  }
 
+---
+"Test fuzzy query on keyword field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { fuzzy: { keyword: { value: "kay1", fuzziness: 1 } } } }
+  - length:   { hits.hits: 1  }
+
+---
+"Test prefix query on keyword field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { prefix: { keyword: { value: "key" } } } }
+  - length:   { hits.hits: 2  }
+
+---
+"Test case insensitive term query on keyword field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { term: { keyword: { value: "KeY1", case_insensitive: true } } } }
+  - length:   { hits.hits: 1  }
+
+---
+"Test wildcard query on keyword field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { wildcard: { keyword: { value: "k*1" } } } }
+  - length:   { hits.hits: 1  }
+
+---
+"Test case insensitive wildcard query on keyword field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { wildcard: { keyword: { value: "K*1", case_insensitive: true } } } }
+  - length:   { hits.hits: 1  }
+
+---
+"Test regexp query on keyword field where only doc values are enabled":
+
+  - do:
+      search:
+        index: test
+        body: { query: { regexp: { keyword: { value: "k.*1" } } } }
+  - length:   { hits.hits: 1  }
+
 ---
 "Test match query on boolean field where only doc values are enabled":
 

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -31,8 +31,10 @@
 import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
+import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.fielddata.FieldData;
@@ -44,9 +46,15 @@
 import org.elasticsearch.script.ScriptCompiler;
 import org.elasticsearch.script.StringFieldScript;
 import org.elasticsearch.script.field.KeywordDocValuesField;
+import org.elasticsearch.script.field.SortedSetDocValuesStringFieldScript;
 import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 import org.elasticsearch.search.lookup.FieldValues;
 import org.elasticsearch.search.lookup.SearchLookup;
+import org.elasticsearch.search.runtime.StringScriptFieldFuzzyQuery;
+import org.elasticsearch.search.runtime.StringScriptFieldPrefixQuery;
+import org.elasticsearch.search.runtime.StringScriptFieldRegexpQuery;
+import org.elasticsearch.search.runtime.StringScriptFieldTermQuery;
+import org.elasticsearch.search.runtime.StringScriptFieldWildcardQuery;
 import org.elasticsearch.xcontent.XContentParser;
 
 import java.io.IOException;
@@ -388,6 +396,68 @@ public Query rangeQuery(
             }
         }
 
+        @Override
+        public Query fuzzyQuery(
+            Object value,
+            Fuzziness fuzziness,
+            int prefixLength,
+            int maxExpansions,
+            boolean transpositions,
+            SearchExecutionContext context
+        ) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context);
+            } else {
+                return StringScriptFieldFuzzyQuery.build(
+                    new Script(""),
+                    ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx),
+                    name(),
+                    indexedValueForSearch(value).utf8ToString(),
+                    fuzziness.asDistance(BytesRefs.toString(value)),
+                    prefixLength,
+                    transpositions
+                );
+            }
+        }
+
+        @Override
+        public Query prefixQuery(
+            String value,
+            MultiTermQuery.RewriteMethod method,
+            boolean caseInsensitive,
+            SearchExecutionContext context
+        ) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.prefixQuery(value, method, caseInsensitive, context);
+            } else {
+                return new StringScriptFieldPrefixQuery(
+                    new Script(""),
+                    ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx),
+                    name(),
+                    indexedValueForSearch(value).utf8ToString(),
+                    caseInsensitive
+                );
+            }
+        }
+
+        @Override
+        public Query termQueryCaseInsensitive(Object value, SearchExecutionContext context) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.termQueryCaseInsensitive(value, context);
+            } else {
+                return new StringScriptFieldTermQuery(
+                    new Script(""),
+                    ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx),
+                    name(),
+                    indexedValueForSearch(value).utf8ToString(),
+                    true
+                );
+            }
+        }
+
         @Override
         public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutionContext queryShardContext, String searchAfter)
             throws IOException {
@@ -521,7 +591,72 @@ public Query wildcardQuery(
             boolean caseInsensitive,
             SearchExecutionContext context
         ) {
-            return super.wildcardQuery(value, method, caseInsensitive, true, context);
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.wildcardQuery(value, method, caseInsensitive, true, context);
+            } else {
+                if (getTextSearchInfo().getSearchAnalyzer() != null) {
+                    value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
+                } else {
+                    value = indexedValueForSearch(value).utf8ToString();
+                }
+                return new StringScriptFieldWildcardQuery(
+                    new Script(""),
+                    ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx),
+                    name(),
+                    value,
+                    caseInsensitive
+                );
+            }
+        }
+
+        @Override
+        public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, SearchExecutionContext context) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.normalizedWildcardQuery(value, method, context);
+            } else {
+                if (getTextSearchInfo().getSearchAnalyzer() != null) {
+                    value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer());
+                } else {
+                    value = indexedValueForSearch(value).utf8ToString();
+                }
+                return new StringScriptFieldWildcardQuery(
+                    new Script(""),
+                    ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx),
+                    name(),
+                    value,
+                    false
+                );
+            }
+        }
+
+        @Override
+        public Query regexpQuery(
+            String value,
+            int syntaxFlags,
+            int matchFlags,
+            int maxDeterminizedStates,
+            MultiTermQuery.RewriteMethod method,
+            SearchExecutionContext context
+        ) {
+            failIfNotIndexedNorDocValuesFallback(context);
+            if (isIndexed()) {
+                return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context);
+            } else {
+                if (matchFlags != 0) {
+                    throw new IllegalArgumentException("Match flags not yet implemented [" + matchFlags + "]");
+                }
+                return new StringScriptFieldRegexpQuery(
+                    new Script(""),
+                    ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx),
+                    name(),
+                    indexedValueForSearch(value).utf8ToString(),
+                    syntaxFlags,
+                    matchFlags,
+                    maxDeterminizedStates
+                );
+            }
         }
 
         @Override

diff --git a/server/src/main/java/org/elasticsearch/script/field/SortedSetDocValuesStringFieldScript.java b/server/src/main/java/org/elasticsearch/script/field/SortedSetDocValuesStringFieldScript.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.script.field;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.script.StringFieldScript;
+import org.elasticsearch.search.lookup.SearchLookup;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class SortedSetDocValuesStringFieldScript extends StringFieldScript {
+    private final SortedSetDocValues sortedSetDocValues;
+
+    boolean hasValue = false;
+
+    public SortedSetDocValuesStringFieldScript(String fieldName, SearchLookup searchLookup, LeafReaderContext ctx) {
+        super(fieldName, Map.of(), searchLookup, ctx);
+        try {
+            sortedSetDocValues = DocValues.getSortedSet(ctx.reader(), fieldName);
+        } catch (IOException e) {
+            throw new IllegalStateException("Cannot load doc values", e);
+        }
+    }
+
+    @Override
+    public void setDocument(int docID) {
+        try {
+            hasValue = sortedSetDocValues.advanceExact(docID);
+        } catch (IOException e) {
+            throw new IllegalStateException("Cannot load doc values", e);
+        }
+    }
+
+    @Override
+    public void execute() {
+        try {
+            if (hasValue) {
+                long ord;
+                while ((ord = sortedSetDocValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+                    BytesRef bytesRef = sortedSetDocValues.lookupOrd(ord);
+                    emit(bytesRef.utf8ToString());
+                }
+            }
+        } catch (IOException e) {
+            throw new IllegalStateException("Cannot load doc values", e);
+        }
+    }
+}
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java
@@ -167,12 +167,12 @@ public void testRegexpQuery() {
         MappedFieldType ft = new KeywordFieldType("field");
         assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT));
 
-        MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap());
+        MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap());
         IllegalArgumentException e = expectThrows(
             IllegalArgumentException.class,
             () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT)
         );
-        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+        assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage());
 
         ElasticsearchException ee = expectThrows(
             ElasticsearchException.class,
@@ -188,12 +188,12 @@ public void testFuzzyQuery() {
             ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT)
         );
 
-        MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap());
+        MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap());
         IllegalArgumentException e = expectThrows(
             IllegalArgumentException.class,
             () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT)
         );
-        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
+        assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage());
 
         ElasticsearchException ee = expectThrows(
             ElasticsearchException.class,