Skip to content

Commit

Permalink
Enable validating user-supplied missing values on unmapped fields (#4…
Browse files Browse the repository at this point in the history
…3718) (#43940)

Provides a hook for aggregations to introspect the `ValuesSourceType` for a user supplied Missing value on an unmapped field, when the type would otherwise be `ANY`.  Mapped field behavior is unchanged, and still applies the `ValuesSourceType` of the field.  This PR just provides the hook for doing this, no existing aggregations have their behavior changed.
  • Loading branch information
not-napoleon authored Jul 8, 2019
1 parent 4390d4a commit 299a52c
Show file tree
Hide file tree
Showing 8 changed files with 319 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
}

throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field()
+ "]. It can only be applied to numeric or string fields.");
+ "]. It can only be applied to numeric or string fields.");
}

// return the SubAggCollectionMode that this aggregation should use based on the expected size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,28 @@ public ValuesSourceAggregatorFactory(String name, ValuesSourceConfig<VS> config,
@Override
public Aggregator createInternal(Aggregator parent, boolean collectsFromSingleBucket,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
VS vs = config.toValuesSource(context.getQueryShardContext());
VS vs = config.toValuesSource(context.getQueryShardContext(), this::resolveMissingAny);
if (vs == null) {
return createUnmapped(parent, pipelineAggregators, metaData);
}
return doCreateInternal(vs, parent, collectsFromSingleBucket, pipelineAggregators, metaData);
}

/**
* This method provides a hook for aggregations that need finer grained control over the ValuesSource selected when the user supplies a
* missing value and there is no mapped field to infer the type from. This will only be called for aggregations that specify the
* ValuesSourceType.ANY in their constructors (On the builder class). The user supplied object is passed as a parameter, so its type
* may be inspected as needed.
*
* Generally, only the type of the returned ValuesSource is used, so returning the EMPTY instance of the chosen type is recommended.
*
* @param missing The user supplied missing value
* @return A ValuesSource instance compatible with the supplied parameter
*/
protected ValuesSource resolveMissingAny(Object missing) {
return ValuesSource.Bytes.WithOrdinals.EMPTY;
}

protected abstract Aggregator createUnmapped(Aggregator parent,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.function.Function;

/**
* A configuration that tells aggregations how to retrieve data from the index
Expand Down Expand Up @@ -223,10 +224,15 @@ public DocValueFormat format() {
return format;
}

@Nullable
public VS toValuesSource(QueryShardContext context) {
return toValuesSource(context, value -> ValuesSource.Bytes.WithOrdinals.EMPTY);
}

/** Get a value source given its configuration. A return value of null indicates that
* no value source could be built. */
@Nullable
public VS toValuesSource(QueryShardContext context) {
public VS toValuesSource(QueryShardContext context, Function<Object, ValuesSource> resolveMissingAny) {
if (!valid()) {
throw new IllegalStateException(
"value source config is invalid; must have either a field context or a script or marked as unwrapped");
Expand All @@ -241,8 +247,10 @@ public VS toValuesSource(QueryShardContext context) {
vs = (VS) ValuesSource.Numeric.EMPTY;
} else if (valueSourceType() == ValuesSourceType.GEOPOINT) {
vs = (VS) ValuesSource.GeoPoint.EMPTY;
} else if (valueSourceType() == ValuesSourceType.ANY || valueSourceType() == ValuesSourceType.BYTES) {
} else if (valueSourceType() == ValuesSourceType.BYTES) {
vs = (VS) ValuesSource.Bytes.WithOrdinals.EMPTY;
} else if (valueSourceType() == ValuesSourceType.ANY) {
vs = (VS) resolveMissingAny.apply(missing());
} else {
throw new IllegalArgumentException("Can't deal with unmapped ValuesSource type " + valueSourceType());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,22 @@

import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;

import static org.hamcrest.Matchers.containsString;

public class HistogramAggregatorTests extends AggregatorTestCase {

public void testLongs() throws Exception {
Expand Down Expand Up @@ -188,6 +193,83 @@ public void testMissing() throws Exception {
}
}

public void testMissingUnmappedField() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 7; i ++) {
Document doc = new Document();
w.addDocument(doc);
}

HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field("field")
.interval(5)
.missing(2d);
MappedFieldType type = null;
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
InternalHistogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, type);

assertEquals(1, histogram.getBuckets().size());

assertEquals(0d, histogram.getBuckets().get(0).getKey());
assertEquals(7, histogram.getBuckets().get(0).getDocCount());

assertTrue(AggregationInspectionHelper.hasValue(histogram));
}
}
}

public void testMissingUnmappedFieldBadType() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 7; i ++) {
w.addDocument(new Document());
}

String missingValue = "🍌🍌🍌";
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field("field")
.interval(5)
.missing(missingValue);
MappedFieldType type = null;
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
Throwable t = expectThrows(IllegalArgumentException.class, () -> {
search(searcher, new MatchAllDocsQuery(), aggBuilder, type);
});
// This throws a number format exception (which is a subclass of IllegalArgumentException) and might be ok?
assertThat(t.getMessage(), containsString(missingValue));
}
}
}

public void testIncorrectFieldType() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (String value : new String[] {"foo", "bar", "baz", "quux"}) {
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef(value)));
w.addDocument(doc);
}

HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field("field")
.interval(5);
MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
fieldType.setName("field");
fieldType.setHasDocValues(true);
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);

expectThrows(IllegalArgumentException.class, () -> {
search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
});
}
}

}

public void testOffset() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,45 @@ public void testRanges() throws Exception {
}
}
}

public void testMissingUnmapped() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 7; i++) {
Document doc = new Document();
w.addDocument(doc);
}

IpRangeAggregationBuilder builder = new IpRangeAggregationBuilder("test_agg")
.field("field")
.addRange(new IpRangeAggregationBuilder.Range("foo", "192.168.100.0", "192.168.100.255"))
.missing("192.168.100.42"); // Apparently we expect a string here
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
InternalBinaryRange range = search(searcher, new MatchAllDocsQuery(), builder, (MappedFieldType) null);
assertEquals(1, range.getBuckets().size());
}
}
}

public void testMissingUnmappedBadType() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 7; i++) {
Document doc = new Document();
w.addDocument(doc);
}

IpRangeAggregationBuilder builder = new IpRangeAggregationBuilder("test_agg")
.field("field")
.addRange(new IpRangeAggregationBuilder.Range("foo", "192.168.100.0", "192.168.100.255"))
.missing(1234);
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
expectThrows(IllegalArgumentException.class, () -> {
search(searcher, new MatchAllDocsQuery(), builder, (MappedFieldType) null);
});
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
Expand All @@ -37,10 +38,12 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.MockBigArrays;
import org.elasticsearch.common.util.MockPageCacheRecycler;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.IpFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
Expand Down Expand Up @@ -77,6 +80,7 @@
import org.elasticsearch.search.aggregations.support.ValueType;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.ScoreSortBuilder;
import org.elasticsearch.test.geo.RandomGeoGenerator;

import java.io.IOException;
import java.net.InetAddress;
Expand Down Expand Up @@ -884,6 +888,60 @@ public void testUnmappedWithMissing() throws Exception {
}
}

public void testGeoPointField() throws Exception {
try (Directory directory = newDirectory()) {
GeoPoint point = RandomGeoGenerator.randomPoint(random());
final String field = "field";
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
document.add(new LatLonDocValuesField(field, point.getLat(), point.getLon()));
indexWriter.addDocument(document);
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType();
fieldType.setHasDocValues(true);
fieldType.setName("field");

IndexSearcher indexSearcher = newIndexSearcher(indexReader);
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", null) .field(field);
// Note - other places we throw IllegalArgumentException
expectThrows(AggregationExecutionException.class, () -> {
createAggregator(aggregationBuilder, indexSearcher, fieldType);
});
}
}
}
}

public void testIpField() throws Exception {
try (Directory directory = newDirectory()) {
final String field = "field";
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
document.add(new SortedSetDocValuesField("field",
new BytesRef(InetAddressPoint.encode(InetAddresses.forString("192.168.100.42")))));
indexWriter.addDocument(document);
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
MappedFieldType fieldType = new IpFieldMapper.IpFieldType();
fieldType.setHasDocValues(true);
fieldType.setName("field");

IndexSearcher indexSearcher = newIndexSearcher(indexReader);
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", null) .field(field);
// Note - other places we throw IllegalArgumentException
Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
Terms result = (Terms) aggregator.buildAggregation(0L);
assertEquals("_name", result.getName());
assertEquals(1, result.getBuckets().size());
assertEquals("192.168.100.42", result.getBuckets().get(0).getKey());
assertEquals(1, result.getBuckets().get(0).getDocCount());
}
}
}
}

public void testNestedTermsAgg() throws Exception {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Expand Down
Loading

0 comments on commit 299a52c

Please sign in to comment.