Skip to content

Commit

Permalink
[Star tree] Doc count field support in star tree (#15282)
Browse files Browse the repository at this point in the history
---------
Signed-off-by: Bharathwaj G <bharath78910@gmail.com>
  • Loading branch information
bharath-techie committed Aug 27, 2024
1 parent 771949d commit 091ab6f
Show file tree
Hide file tree
Showing 14 changed files with 644 additions and 205 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,16 @@ public void testValidCompositeIndex() {
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals(2, starTreeFieldType.getMetrics().size());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

// Assert default metrics
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());

assertEquals("_doc_count", starTreeFieldType.getMetrics().get(1).getField());
assertEquals(List.of(MetricStat.DOC_COUNT), starTreeFieldType.getMetrics().get(1).getMetrics());

assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs());
assertEquals(
StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedNumericDocValues;
import org.opensearch.common.annotation.ExperimentalApi;
Expand All @@ -25,6 +26,7 @@
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
import org.opensearch.index.compositeindex.datacube.startree.builder.StarTreesBuilder;
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.MapperService;

import java.io.IOException;
Expand Down Expand Up @@ -63,21 +65,29 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState
this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes();
compositeFieldSet = new HashSet<>();
segmentFieldSet = new HashSet<>();
// TODO : add integ test for this
for (FieldInfo fi : segmentWriteState.fieldInfos) {
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);
}
}
for (CompositeMappedFieldType type : compositeMappedFieldTypes) {
compositeFieldSet.addAll(type.fields());
}
// check if there are any composite fields which are part of the segment
// TODO : add integ test where there are no composite fields in a segment, test both flush and merge cases
segmentHasCompositeFields = Collections.disjoint(segmentFieldSet, compositeFieldSet) == false;
}

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addNumericField(field, valuesProducer);
// Perform this only during flush flow
if (mergeState.get() == null && segmentHasCompositeFields) {
createCompositeIndicesIfPossible(valuesProducer, field);
}
}

@Override
Expand Down Expand Up @@ -119,13 +129,7 @@ private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer,
if (segmentFieldSet.isEmpty()) {
Set<String> compositeFieldSetCopy = new HashSet<>(compositeFieldSet);
for (String compositeField : compositeFieldSetCopy) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
compositeFieldSet.remove(compositeField);
addDocValuesForEmptyField(compositeField);
}
}
// we have all the required fields to build composite fields
Expand All @@ -138,7 +142,28 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
}
}
}
}

/**
* Add empty doc values for fields not present in segment
*/
private void addDocValuesForEmptyField(String compositeField) {
if (compositeField.equals(DocCountFieldMapper.NAME)) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) {
return DocValues.emptyNumeric();
}
});
} else {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
}
compositeFieldSet.remove(compositeField);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,26 @@ public enum MetricStat {
SUM("sum"),
MIN("min"),
MAX("max"),
AVG("avg", VALUE_COUNT, SUM);
AVG("avg", VALUE_COUNT, SUM),
DOC_COUNT("doc_count", true);

private final String typeName;
private final MetricStat[] baseMetrics;

// System field stats cannot be used as input for user metric types
private final boolean isSystemFieldStat;

MetricStat(String typeName) {
this(typeName, false);
}

MetricStat(String typeName, MetricStat... baseMetrics) {
this(typeName, false, baseMetrics);
}

MetricStat(String typeName, boolean isSystemFieldStat, MetricStat... baseMetrics) {
this.typeName = typeName;
this.isSystemFieldStat = isSystemFieldStat;
this.baseMetrics = baseMetrics;
}

Expand All @@ -56,7 +69,8 @@ public boolean isDerivedMetric() {

public static MetricStat fromTypeName(String typeName) {
for (MetricStat metric : MetricStat.values()) {
if (metric.getTypeName().equalsIgnoreCase(typeName)) {
// prevent system fields to be entered as user input
if (metric.getTypeName().equalsIgnoreCase(typeName) && metric.isSystemFieldStat == false) {
return metric;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.opensearch.index.compositeindex.datacube.Dimension;
import org.opensearch.index.compositeindex.datacube.Metric;
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.StarTreeMapper;
Expand Down Expand Up @@ -78,7 +79,7 @@ public static void validate(MapperService mapperService, CompositeIndexSettings
String.format(Locale.ROOT, "unknown metric field [%s] as part of star tree field", metric.getField())
);
}
if (ft.isAggregatable() == false) {
if (ft.isAggregatable() == false && ft instanceof DocCountFieldMapper.DocCountFieldType == false) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,9 @@
class CountValueAggregator implements ValueAggregator<Long> {

public static final long DEFAULT_INITIAL_VALUE = 1L;
private final StarTreeNumericType starTreeNumericType;
private static final StarTreeNumericType VALUE_AGGREGATOR_TYPE = StarTreeNumericType.LONG;

public CountValueAggregator(StarTreeNumericType starTreeNumericType) {
this.starTreeNumericType = starTreeNumericType;
}
public CountValueAggregator() {}

@Override
public StarTreeNumericType getAggregatedValueType() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube.startree.aggregators;

import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType;

/**
* Aggregator to handle '_doc_count' field
*
* @opensearch.experimental
*/
public class DocCountAggregator implements ValueAggregator<Long> {

private static final StarTreeNumericType VALUE_AGGREGATOR_TYPE = StarTreeNumericType.LONG;

public DocCountAggregator() {}

@Override
public StarTreeNumericType getAggregatedValueType() {
return VALUE_AGGREGATOR_TYPE;
}

/**
* If _doc_count field for a doc is missing, we increment the _doc_count by '1' for the associated doc
* otherwise take the actual value present in the field
*/
@Override
public Long getInitialAggregatedValueForSegmentDocValue(Long segmentDocValue) {
if (segmentDocValue == null) {
return getIdentityMetricValue();
}
return segmentDocValue;
}

@Override
public Long mergeAggregatedValueAndSegmentValue(Long value, Long segmentDocValue) {
assert value != null;
return mergeAggregatedValues(value, segmentDocValue);
}

@Override
public Long mergeAggregatedValues(Long value, Long aggregatedValue) {
if (value == null) {
value = getIdentityMetricValue();
}
if (aggregatedValue == null) {
aggregatedValue = getIdentityMetricValue();
}
return value + aggregatedValue;
}

@Override
public Long toAggregatedValueType(Long rawValue) {
return rawValue;
}

/**
* If _doc_count field for a doc is missing, we increment the _doc_count by '1' for the associated doc
*/
@Override
public Long getIdentityMetricValue() {
return 1L;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ public static ValueAggregator getValueAggregator(MetricStat aggregationType, Sta
case SUM:
return new SumValueAggregator(starTreeNumericType);
case VALUE_COUNT:
return new CountValueAggregator(starTreeNumericType);
return new CountValueAggregator();
case MIN:
return new MinValueAggregator(starTreeNumericType);
case MAX:
return new MaxValueAggregator(starTreeNumericType);
case DOC_COUNT:
return new DocCountAggregator();
default:
throw new IllegalStateException("Unsupported aggregation type: " + aggregationType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
Expand All @@ -28,6 +29,7 @@
import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator;
import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode;
import org.opensearch.index.fielddata.IndexNumericFieldData;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.NumberFieldMapper;
Expand Down Expand Up @@ -117,6 +119,16 @@ protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState sta
public List<MetricAggregatorInfo> generateMetricAggregatorInfos(MapperService mapperService) {
List<MetricAggregatorInfo> metricAggregatorInfos = new ArrayList<>();
for (Metric metric : this.starTreeField.getMetrics()) {
if (metric.getField().equals(DocCountFieldMapper.NAME)) {
MetricAggregatorInfo metricAggregatorInfo = new MetricAggregatorInfo(
MetricStat.DOC_COUNT,
metric.getField(),
starTreeField.getName(),
IndexNumericFieldData.NumericType.LONG
);
metricAggregatorInfos.add(metricAggregatorInfo);
continue;
}
for (MetricStat metricStat : metric.getMetrics()) {
if (metricStat.isDerivedMetric()) {
continue;
Expand Down Expand Up @@ -429,7 +441,7 @@ public void build(Map<String, DocValuesProducer> fieldProducerMap) throws IOExce
String dimension = dimensionsSplitOrder.get(i).getField();
FieldInfo dimensionFieldInfo = state.fieldInfos.fieldInfo(dimension);
if (dimensionFieldInfo == null) {
dimensionFieldInfo = getFieldInfo(dimension);
dimensionFieldInfo = getFieldInfo(dimension, DocValuesType.SORTED_NUMERIC);
}
dimensionReaders[i] = new SequentialDocValuesIterator(
fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo)
Expand All @@ -441,15 +453,15 @@ public void build(Map<String, DocValuesProducer> fieldProducerMap) throws IOExce
logger.debug("Finished Building star-tree in ms : {}", (System.currentTimeMillis() - startTime));
}

private static FieldInfo getFieldInfo(String field) {
private static FieldInfo getFieldInfo(String field, DocValuesType docValuesType) {
return new FieldInfo(
field,
1,
1, // This is filled as part of doc values creation and is not used otherwise
false,
false,
false,
IndexOptions.NONE,
DocValuesType.SORTED_NUMERIC,
docValuesType,
-1,
Collections.emptyMap(),
0,
Expand All @@ -473,20 +485,44 @@ public List<SequentialDocValuesIterator> getMetricReaders(SegmentWriteState stat
List<SequentialDocValuesIterator> metricReaders = new ArrayList<>();
for (Metric metric : this.starTreeField.getMetrics()) {
for (MetricStat metricStat : metric.getMetrics()) {
SequentialDocValuesIterator metricReader = null;
FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField());
if (metricFieldInfo == null) {
metricFieldInfo = getFieldInfo(metric.getField());
if (metricStat.equals(MetricStat.DOC_COUNT)) {
// _doc_count is numeric field , so we convert to sortedNumericDocValues and get iterator
metricReader = getIteratorForNumericField(fieldProducerMap, metricFieldInfo, DocCountFieldMapper.NAME);
} else {
if (metricFieldInfo == null) {
metricFieldInfo = getFieldInfo(metric.getField(), DocValuesType.SORTED_NUMERIC);
}
metricReader = new SequentialDocValuesIterator(
fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo)
);
}

SequentialDocValuesIterator metricReader = new SequentialDocValuesIterator(
fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo)
);
metricReaders.add(metricReader);
}
}
return metricReaders;
}

/**
* Converts numericDocValues to sortedNumericDocValues and returns SequentialDocValuesIterator
*/
private SequentialDocValuesIterator getIteratorForNumericField(
Map<String, DocValuesProducer> fieldProducerMap,
FieldInfo fieldInfo,
String name
) throws IOException {
if (fieldInfo == null) {
fieldInfo = getFieldInfo(name, DocValuesType.NUMERIC);
}
SequentialDocValuesIterator sequentialDocValuesIterator;
assert fieldProducerMap.containsKey(fieldInfo.name);
sequentialDocValuesIterator = new SequentialDocValuesIterator(
DocValues.singleton(fieldProducerMap.get(fieldInfo.name).getNumeric(fieldInfo))
);
return sequentialDocValuesIterator;
}

/**
* Builds the star tree using Star-Tree Document
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ private List<Metric> buildMetrics(String fieldName, Map<String, Object> map, Map
for (Object metric : metricsList) {
Map<String, Object> metricMap = (Map<String, Object>) metric;
String name = (String) XContentMapValues.extractValue(CompositeDataCubeFieldType.NAME, metricMap);
// Handle _doc_count metric separately at the end
if (name.equals(DocCountFieldMapper.NAME)) {
continue;
}
metricMap.remove(CompositeDataCubeFieldType.NAME);
if (objbuilder == null || objbuilder.mappersBuilders == null) {
metrics.add(getMetric(name, metricMap, context));
Expand All @@ -250,7 +254,8 @@ private List<Metric> buildMetrics(String fieldName, Map<String, Object> map, Map
} else {
throw new MapperParsingException(String.format(Locale.ROOT, "unable to parse metrics for star tree field [%s]", this.name));
}

Metric docCountMetric = new Metric(DocCountFieldMapper.NAME, List.of(MetricStat.DOC_COUNT));
metrics.add(docCountMetric);
return metrics;
}

Expand Down
Loading

0 comments on commit 091ab6f

Please sign in to comment.