Skip to content

Refuse to load fields from _source when using the fields option and support wildcards. #15017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 30, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,47 @@
package org.elasticsearch.index.fieldvisitor;

import org.apache.lucene.index.FieldInfo;
import org.elasticsearch.common.regex.Regex;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Set;

/**
* A field visitor that allows to load a selection of the stored fields.
* A field visitor that allows to load a selection of the stored fields by exact name or by pattern.
* Supported pattern styles: "xxx*", "*xxx", "*xxx*" and "xxx*yyy".
* The Uid field is always loaded.
* The class is optimized for source loading as it is a common use case.
*/
public class CustomFieldsVisitor extends FieldsVisitor {

private final Set<String> fields;
private final List<String> patterns;

public CustomFieldsVisitor(Set<String> fields, boolean loadSource) {
public CustomFieldsVisitor(Set<String> fields, List<String> patterns, boolean loadSource) {
super(loadSource);
this.fields = fields;
this.patterns = patterns;
}

public CustomFieldsVisitor(Set<String> fields, boolean loadSource) {
this(fields, Collections.emptyList(), loadSource);
}

@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (super.needsField(fieldInfo) == Status.YES) {
return Status.YES;
}

return fields.contains(fieldInfo.name) ? Status.YES : Status.NO;
if (fields.contains(fieldInfo.name)) {
return Status.YES;
}
for (String pattern : patterns) {
if (Regex.simpleMatch(pattern, fieldInfo.name)) {
return Status.YES;
}
}
return Status.NO;
}
}
120 changes: 29 additions & 91 deletions core/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.text.StringAndBytesText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.fieldvisitor.AllFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.index.mapper.DocumentMapper;
Expand All @@ -55,13 +55,7 @@
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;

import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.common.xcontent.XContentFactory.contentBuilder;
Expand Down Expand Up @@ -98,9 +92,7 @@ public void preProcess(SearchContext context) {
public void execute(SearchContext context) {
FieldsVisitor fieldsVisitor;
Set<String> fieldNames = null;
List<String> extractFieldNames = null;

boolean loadAllStored = false;
List<String> fieldNamePatterns = null;
if (!context.hasFieldNames()) {
// no fields specified, default to return source if no explicit indication
if (!context.hasScriptFields() && !context.hasFetchSourceContext()) {
Expand All @@ -111,10 +103,6 @@ public void execute(SearchContext context) {
fieldsVisitor = new FieldsVisitor(context.sourceRequested());
} else {
for (String fieldName : context.fieldNames()) {
if (fieldName.equals("*")) {
loadAllStored = true;
continue;
}
if (fieldName.equals(SourceFieldMapper.NAME)) {
if (context.hasFetchSourceContext()) {
context.fetchSourceContext().fetchSource(true);
Expand All @@ -123,32 +111,28 @@ public void execute(SearchContext context) {
}
continue;
}
MappedFieldType fieldType = context.smartNameFieldType(fieldName);
if (fieldType == null) {
// Only fail if we know it is a object field, missing paths / fields shouldn't fail.
if (context.getObjectMapper(fieldName) != null) {
throw new IllegalArgumentException("field [" + fieldName + "] isn't a leaf field");
if (Regex.isSimpleMatchPattern(fieldName)) {
if (fieldNamePatterns == null) {
fieldNamePatterns = new ArrayList<>();
}
fieldNamePatterns.add(fieldName);
} else {
MappedFieldType fieldType = context.smartNameFieldType(fieldName);
if (fieldType == null) {
// Only fail if we know it is a object field, missing paths / fields shouldn't fail.
if (context.getObjectMapper(fieldName) != null) {
throw new IllegalArgumentException("field [" + fieldName + "] isn't a leaf field");
}
}
} else if (fieldType.stored()) {
if (fieldNames == null) {
fieldNames = new HashSet<>();
}
fieldNames.add(fieldType.names().indexName());
} else {
if (extractFieldNames == null) {
extractFieldNames = new ArrayList<>();
}
extractFieldNames.add(fieldName);
fieldNames.add(fieldName);
}
}
if (loadAllStored) {
fieldsVisitor = new AllFieldsVisitor(); // load everything, including _source
} else if (fieldNames != null) {
boolean loadSource = extractFieldNames != null || context.sourceRequested();
fieldsVisitor = new CustomFieldsVisitor(fieldNames, loadSource);
} else {
fieldsVisitor = new FieldsVisitor(extractFieldNames != null || context.sourceRequested());
}
boolean loadSource = context.sourceRequested();
fieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames,
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, loadSource);
}

InternalSearchHit[] hits = new InternalSearchHit[context.docIdsToLoadSize()];
Expand All @@ -163,9 +147,9 @@ public void execute(SearchContext context) {
try {
int rootDocId = findRootDocumentIfNested(context, subReaderContext, subDocId);
if (rootDocId != -1) {
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, extractFieldNames, loadAllStored, fieldNames, subReaderContext);
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, fieldNames, fieldNamePatterns, subReaderContext);
} else {
searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, extractFieldNames, subReaderContext);
searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, subReaderContext);
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
Expand Down Expand Up @@ -199,7 +183,7 @@ private int findRootDocumentIfNested(SearchContext context, LeafReaderContext su
return -1;
}

private InternalSearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId, List<String> extractFieldNames, LeafReaderContext subReaderContext) {
private InternalSearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId, LeafReaderContext subReaderContext) {
loadStoredFields(context, subReaderContext, fieldsVisitor, subDocId);
fieldsVisitor.postProcess(context.mapperService());

Expand All @@ -219,45 +203,24 @@ private InternalSearchHit createSearchHit(SearchContext context, FieldsVisitor f
typeText = documentMapper.typeText();
}
InternalSearchHit searchHit = new InternalSearchHit(docId, fieldsVisitor.uid().id(), typeText, searchFields);

// go over and extract fields that are not mapped / stored
// Set _source if requested.
SourceLookup sourceLookup = context.lookup().source();
sourceLookup.setSegmentAndDocument(subReaderContext, subDocId);
if (fieldsVisitor.source() != null) {
sourceLookup.setSource(fieldsVisitor.source());
}
if (extractFieldNames != null) {
for (String extractFieldName : extractFieldNames) {
List<Object> values = context.lookup().source().extractRawValues(extractFieldName);
if (!values.isEmpty()) {
if (searchHit.fieldsOrNull() == null) {
searchHit.fields(new HashMap<String, SearchHitField>(2));
}

SearchHitField hitField = searchHit.fields().get(extractFieldName);
if (hitField == null) {
hitField = new InternalSearchHitField(extractFieldName, new ArrayList<>(2));
searchHit.fields().put(extractFieldName, hitField);
}
for (Object value : values) {
hitField.values().add(value);
}
}
}
}

return searchHit;
}

private InternalSearchHit createNestedSearchHit(SearchContext context, int nestedTopDocId, int nestedSubDocId, int rootSubDocId, List<String> extractFieldNames, boolean loadAllStored, Set<String> fieldNames, LeafReaderContext subReaderContext) throws IOException {
private InternalSearchHit createNestedSearchHit(SearchContext context, int nestedTopDocId, int nestedSubDocId, int rootSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) throws IOException {
// Also if highlighting is requested on nested documents we need to fetch the _source from the root document,
// otherwise highlighting will attempt to fetch the _source from the nested doc, which will fail,
// because the entire _source is only stored with the root document.
final FieldsVisitor rootFieldsVisitor = new FieldsVisitor(context.sourceRequested() || extractFieldNames != null || context.highlight() != null);
final FieldsVisitor rootFieldsVisitor = new FieldsVisitor(context.sourceRequested() || context.highlight() != null);
loadStoredFields(context, subReaderContext, rootFieldsVisitor, rootSubDocId);
rootFieldsVisitor.postProcess(context.mapperService());

Map<String, SearchHitField> searchFields = getSearchFields(context, nestedSubDocId, loadAllStored, fieldNames, subReaderContext);
Map<String, SearchHitField> searchFields = getSearchFields(context, nestedSubDocId, fieldNames, fieldNamePatterns, subReaderContext);
DocumentMapper documentMapper = context.mapperService().documentMapper(rootFieldsVisitor.uid().type());
SourceLookup sourceLookup = context.lookup().source();
sourceLookup.setSegmentAndDocument(subReaderContext, nestedSubDocId);
Expand Down Expand Up @@ -299,39 +262,14 @@ private InternalSearchHit createNestedSearchHit(SearchContext context, int neste
}

InternalSearchHit searchHit = new InternalSearchHit(nestedTopDocId, rootFieldsVisitor.uid().id(), documentMapper.typeText(), nestedIdentity, searchFields);
if (extractFieldNames != null) {
for (String extractFieldName : extractFieldNames) {
List<Object> values = context.lookup().source().extractRawValues(extractFieldName);
if (!values.isEmpty()) {
if (searchHit.fieldsOrNull() == null) {
searchHit.fields(new HashMap<String, SearchHitField>(2));
}

SearchHitField hitField = searchHit.fields().get(extractFieldName);
if (hitField == null) {
hitField = new InternalSearchHitField(extractFieldName, new ArrayList<>(2));
searchHit.fields().put(extractFieldName, hitField);
}
for (Object value : values) {
hitField.values().add(value);
}
}
}
}

return searchHit;
}

private Map<String, SearchHitField> getSearchFields(SearchContext context, int nestedSubDocId, boolean loadAllStored, Set<String> fieldNames, LeafReaderContext subReaderContext) {
private Map<String, SearchHitField> getSearchFields(SearchContext context, int nestedSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) {
Map<String, SearchHitField> searchFields = null;
if (context.hasFieldNames() && !context.fieldNames().isEmpty()) {
FieldsVisitor nestedFieldsVisitor = null;
if (loadAllStored) {
nestedFieldsVisitor = new AllFieldsVisitor();
} else if (fieldNames != null) {
nestedFieldsVisitor = new CustomFieldsVisitor(fieldNames, false);
}

FieldsVisitor nestedFieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames,
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, false);
if (nestedFieldsVisitor != null) {
loadStoredFields(context, subReaderContext, nestedFieldsVisitor, nestedSubDocId);
nestedFieldsVisitor.postProcess(context.mapperService());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ public void testSimpleIndexTemplateTests() throws Exception {

assertHitCount(searchResponse, 1);
assertThat(searchResponse.getHits().getAt(0).field("field1").value().toString(), equalTo("value1"));
assertThat(searchResponse.getHits().getAt(0).field("field2").value().toString(), equalTo("value 2")); // this will still be loaded because of the source feature
// field2 is not stored.
assertThat(searchResponse.getHits().getAt(0).field("field2"), nullValue());

client().prepareIndex("text_index", "type1", "1").setSource("field1", "value1", "field2", "value 2").setRefresh(true).execute().actionGet();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public void setupSuiteScopeCluster() throws Exception {
.endObject()));
}
assertAcked(prepareCreate(HIGH_CARD_IDX_NAME).setSettings(Settings.builder().put("number_of_shards", 2))
.addMapping("type", SINGLE_VALUED_FIELD_NAME, "type=geo_point", MULTI_VALUED_FIELD_NAME, "type=geo_point", NUMBER_FIELD_NAME, "type=long", "tag", "type=string,index=not_analyzed"));
.addMapping("type", SINGLE_VALUED_FIELD_NAME, "type=geo_point", MULTI_VALUED_FIELD_NAME, "type=geo_point", NUMBER_FIELD_NAME, "type=long,store=true", "tag", "type=string,index=not_analyzed"));

for (int i = 0; i < 2000; i++) {
singleVal = singleValues[i % numUniqueGeoPoints];
Expand Down Expand Up @@ -196,8 +196,8 @@ public void setupSuiteScopeCluster() throws Exception {
SearchHitField hitField = searchHit.field(NUMBER_FIELD_NAME);

assertThat("Hit " + i + " has wrong number of values", hitField.getValues().size(), equalTo(1));
Integer value = hitField.getValue();
assertThat("Hit " + i + " has wrong value", value, equalTo(i));
Long value = hitField.getValue();
assertThat("Hit " + i + " has wrong value", value.intValue(), equalTo(i));
}
assertThat(totalHits, equalTo(2000l));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,8 @@ public void testFetchFeatures() {
topHits("hits").setSize(1)
.highlighter(new HighlightBuilder().field("text"))
.setExplain(true)
.addFieldDataField("field1")
.addField("text")
.addFieldDataField("field1")
.addScriptField("script", new Script("5", ScriptService.ScriptType.INLINE, MockScriptEngine.NAME, Collections.emptyMap()))
.setFetchSource("text", null)
.setVersion(true)
Expand Down Expand Up @@ -569,8 +569,7 @@ public void testFetchFeatures() {
SearchHitField field = hit.field("field1");
assertThat(field.getValue().toString(), equalTo("5"));

field = hit.field("text");
assertThat(field.getValue().toString(), equalTo("some text to entertain"));
assertThat(hit.getSource().get("text").toString(), equalTo("some text to entertain"));

field = hit.field("script");
assertThat(field.getValue().toString(), equalTo("5"));
Expand Down
4 changes: 4 additions & 0 deletions docs/reference/migration/migrate_3_0.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -468,3 +468,7 @@ response is output by default.
Finally, the API for org.elasticsearch.monitor.os.OsStats has changed. The `getLoadAverage` method has been removed. The
value for this can now be obtained from `OsStats.Cpu#getLoadAverage`. Additionally, the recent CPU usage can be obtained
from `OsStats.Cpu#getPercent`.

=== Fields option
Only stored fields are retrievable with this option.
The fields option won't be able to load non stored fields from _source anymore.
Loading