Skip to content

Commit 02bf0c5

Browse files
authored
[7.x] Histogram field type support for Sum aggregation (#55916)
Implements Sum aggregation over Histogram fields by summing the value of each bucket multiplied by their count as requested in #53285 Backports #55681 to 7.x
1 parent f679880 commit 02bf0c5

File tree

9 files changed

+415
-16
lines changed

9 files changed

+415
-16
lines changed

docs/reference/aggregations/metrics/sum-aggregation.asciidoc

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
[[search-aggregations-metrics-sum-aggregation]]
22
=== Sum Aggregation
33

4-
A `single-value` metrics aggregation that sums up numeric values that are extracted from the aggregated documents. These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script.
4+
A `single-value` metrics aggregation that sums up numeric values that are extracted from the aggregated documents.
5+
These values can be extracted either from specific numeric or <<histogram,histogram>> fields in the documents,
6+
or be generated by a provided script.
57

68
Assuming the data consists of documents representing sales records we can sum
79
the sale price of all hats with:
@@ -30,9 +32,9 @@ Resulting in:
3032
--------------------------------------------------
3133
{
3234
...
33-
"aggregations": {
34-
"hat_prices": {
35-
"value": 450.0
35+
"aggregations" : {
36+
"hat_prices" : {
37+
"value" : 450.0
3638
}
3739
}
3840
}
@@ -157,3 +159,55 @@ POST /sales/_search?size=0
157159
}
158160
--------------------------------------------------
159161
// TEST[setup:sales]
162+
163+
[[search-aggregations-metrics-sum-aggregation-histogram-fields]]
164+
==== Histogram fields
165+
166+
When the sums are computed on <<histogram,histogram fields>>, the result of the aggregation is the sum of all elements in the `values`
167+
array multiplied by the number in the same position in the `counts` array.
168+
169+
For example, if we have the following index that stores pre-aggregated histograms with latency metrics for different networks:
170+
171+
[source,console]
172+
--------------------------------------------------
173+
PUT metrics_index/_doc/1
174+
{
175+
"network.name" : "net-1",
176+
"latency_histo" : {
177+
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
178+
"counts" : [3, 7, 23, 12, 6] <2>
179+
}
180+
}
181+
182+
PUT metrics_index/_doc/2
183+
{
184+
"network.name" : "net-2",
185+
"latency_histo" : {
186+
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
187+
"counts" : [8, 17, 8, 7, 6] <2>
188+
}
189+
}
190+
191+
POST /metrics_index/_search?size=0
192+
{
193+
"aggs" : {
194+
"total_latency" : { "sum" : { "field" : "latency_histo" } }
195+
}
196+
}
197+
--------------------------------------------------
198+
199+
For each histogram field the sum aggregation will multiply each number in the `values` array <1> multiplied with its associated count
200+
in the `counts` array <2>. Eventually, it will add all values for all histograms and return the following result:
201+
202+
[source,console-result]
203+
--------------------------------------------------
204+
{
205+
...
206+
"aggregations" : {
207+
"total_latency" : {
208+
"value" : 28.8
209+
}
210+
}
211+
}
212+
--------------------------------------------------
213+
// TESTRESPONSE[skip:test not setup]

docs/reference/mapping/types/histogram.asciidoc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ binary <<doc-values,doc values>> and not indexed. Its size in bytes is at most
3535
Because the data is not indexed, you only can use `histogram` fields for the
3636
following aggregations and queries:
3737

38+
* <<search-aggregations-metrics-sum-aggregation-histogram-fields,sum>> aggregation
3839
* <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
3940
* <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
4041
* <<search-aggregations-metrics-boxplot-aggregation,boxplot>> aggregation
@@ -73,9 +74,9 @@ The following <<indices-create-index, create index>> API request creates a new i
7374
--------------------------------------------------
7475
PUT my_index
7576
{
76-
"mappings": {
77-
"properties": {
78-
"my_histogram": {
77+
"mappings" : {
78+
"properties" : {
79+
"my_histogram" : {
7980
"type" : "histogram"
8081
},
8182
"my_text" : {
@@ -114,6 +115,3 @@ increasing order. For <<search-aggregations-metrics-percentile-aggregation-appro
114115
histograms this value represents the mean value. In case of HDR histograms this represents the value iterated to.
115116
<2> Count for each bucket. Values in the arrays are treated as integers and must be positive or zero.
116117
Negative values will be rejected. The relation between a bucket and a count is given by the position in the array.
117-
118-
119-

server/src/main/java/org/elasticsearch/search/aggregations/metrics/InternalSum.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
public class InternalSum extends InternalNumericMetricsAggregation.SingleValue implements Sum {
3333
private final double sum;
3434

35-
InternalSum(String name, double sum, DocValueFormat formatter, Map<String, Object> metadata) {
35+
public InternalSum(String name, double sum, DocValueFormat formatter, Map<String, Object> metadata) {
3636
super(name, metadata);
3737
this.sum = sum;
3838
this.format = formatter;

server/src/main/java/org/elasticsearch/search/aggregations/metrics/SumAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import java.io.IOException;
3636
import java.util.Map;
3737

38-
class SumAggregator extends NumericMetricsAggregator.SingleValue {
38+
public class SumAggregator extends NumericMetricsAggregator.SingleValue {
3939

4040
private final ValuesSource.Numeric valuesSource;
4141
private final DocValueFormat format;

x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import org.elasticsearch.threadpool.ThreadPool;
3232
import org.elasticsearch.watcher.ResourceWatcherService;
3333
import org.elasticsearch.xpack.analytics.action.TransportAnalyticsStatsAction;
34-
import org.elasticsearch.xpack.analytics.aggregations.metrics.AnalyticsPercentilesAggregatorFactory;
34+
import org.elasticsearch.xpack.analytics.aggregations.metrics.AnalyticsAggregatorFactory;
3535
import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
3636
import org.elasticsearch.xpack.analytics.boxplot.InternalBoxplot;
3737
import org.elasticsearch.xpack.analytics.cumulativecardinality.CumulativeCardinalityPipelineAggregationBuilder;
@@ -142,8 +142,11 @@ public Map<String, Mapper.TypeParser> getMappers() {
142142

143143
@Override
144144
public List<Consumer<ValuesSourceRegistry.Builder>> getAggregationExtentions() {
145-
return Arrays.asList(AnalyticsPercentilesAggregatorFactory::registerPercentilesAggregator,
146-
AnalyticsPercentilesAggregatorFactory::registerPercentileRanksAggregator);
145+
return Arrays.asList(
146+
AnalyticsAggregatorFactory::registerPercentilesAggregator,
147+
AnalyticsAggregatorFactory::registerPercentileRanksAggregator,
148+
AnalyticsAggregatorFactory::registerHistoBackedSumAggregator
149+
);
147150
}
148151

149152
@Override
Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@
66

77
package org.elasticsearch.xpack.analytics.aggregations.metrics;
88

9+
import org.elasticsearch.search.aggregations.metrics.MetricAggregatorSupplier;
910
import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder;
1011
import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder;
1112
import org.elasticsearch.search.aggregations.metrics.PercentilesAggregatorSupplier;
1213
import org.elasticsearch.search.aggregations.metrics.PercentilesConfig;
1314
import org.elasticsearch.search.aggregations.metrics.PercentilesMethod;
15+
import org.elasticsearch.search.aggregations.metrics.SumAggregationBuilder;
1416
import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
1517
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
1618

17-
public class AnalyticsPercentilesAggregatorFactory {
19+
public class AnalyticsAggregatorFactory {
20+
1821
public static void registerPercentilesAggregator(ValuesSourceRegistry.Builder builder) {
1922
builder.register(PercentilesAggregationBuilder.NAME,
2023
AnalyticsValuesSourceType.HISTOGRAM,
@@ -58,4 +61,10 @@ public static void registerPercentileRanksAggregator(ValuesSourceRegistry.Builde
5861
"is not compatible with Histogram field");
5962
});
6063
}
64+
65+
public static void registerHistoBackedSumAggregator(ValuesSourceRegistry.Builder builder) {
66+
builder.register(SumAggregationBuilder.NAME,
67+
AnalyticsValuesSourceType.HISTOGRAM,
68+
(MetricAggregatorSupplier) HistoBackedSumAggregator::new);
69+
}
6170
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.analytics.aggregations.metrics;
7+
8+
import org.apache.lucene.index.LeafReaderContext;
9+
import org.apache.lucene.search.ScoreMode;
10+
import org.elasticsearch.common.lease.Releasables;
11+
import org.elasticsearch.common.util.BigArrays;
12+
import org.elasticsearch.common.util.DoubleArray;
13+
import org.elasticsearch.index.fielddata.HistogramValue;
14+
import org.elasticsearch.index.fielddata.HistogramValues;
15+
import org.elasticsearch.search.DocValueFormat;
16+
import org.elasticsearch.search.aggregations.Aggregator;
17+
import org.elasticsearch.search.aggregations.InternalAggregation;
18+
import org.elasticsearch.search.aggregations.LeafBucketCollector;
19+
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
20+
import org.elasticsearch.search.aggregations.metrics.CompensatedSum;
21+
import org.elasticsearch.search.aggregations.metrics.InternalSum;
22+
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
23+
import org.elasticsearch.search.aggregations.support.ValuesSource;
24+
import org.elasticsearch.search.internal.SearchContext;
25+
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource;
26+
27+
import java.io.IOException;
28+
import java.util.Map;
29+
30+
/**
31+
* Sum aggregator operating over histogram datatypes {@link HistogramValuesSource}
32+
*/
33+
class HistoBackedSumAggregator extends NumericMetricsAggregator.SingleValue {
34+
35+
private final ValuesSource valuesSource;
36+
private final DocValueFormat format;
37+
38+
private DoubleArray sums;
39+
private DoubleArray compensations;
40+
41+
HistoBackedSumAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, SearchContext context,
42+
Aggregator parent, Map<String, Object> metadata) throws IOException {
43+
super(name, context, parent, metadata);
44+
this.valuesSource = valuesSource;
45+
this.format = formatter;
46+
if (valuesSource != null) {
47+
sums = context.bigArrays().newDoubleArray(1, true);
48+
compensations = context.bigArrays().newDoubleArray(1, true);
49+
}
50+
}
51+
52+
@Override
53+
public ScoreMode scoreMode() {
54+
return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
55+
}
56+
57+
@Override
58+
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
59+
final LeafBucketCollector sub) throws IOException {
60+
if (valuesSource == null) {
61+
return LeafBucketCollector.NO_OP_COLLECTOR;
62+
}
63+
final BigArrays bigArrays = context.bigArrays();
64+
final HistogramValues values = ((HistogramValuesSource.Histogram) valuesSource).getHistogramValues(ctx);
65+
66+
final CompensatedSum kahanSummation = new CompensatedSum(0, 0);
67+
return new LeafBucketCollectorBase(sub, values) {
68+
@Override
69+
public void collect(int doc, long bucket) throws IOException {
70+
sums = bigArrays.grow(sums, bucket + 1);
71+
compensations = bigArrays.grow(compensations, bucket + 1);
72+
73+
if (values.advanceExact(doc)) {
74+
final HistogramValue sketch = values.histogram();
75+
final double sum = sums.get(bucket);
76+
final double compensation = compensations.get(bucket);
77+
kahanSummation.reset(sum, compensation);
78+
while (sketch.next()) {
79+
double d = sketch.value() * sketch.count();
80+
kahanSummation.add(d);
81+
}
82+
83+
compensations.set(bucket, kahanSummation.delta());
84+
sums.set(bucket, kahanSummation.value());
85+
}
86+
}
87+
};
88+
}
89+
90+
@Override
91+
public double metric(long owningBucketOrd) {
92+
if (valuesSource == null || owningBucketOrd >= sums.size()) {
93+
return 0.0;
94+
}
95+
return sums.get(owningBucketOrd);
96+
}
97+
98+
@Override
99+
public InternalAggregation buildAggregation(long bucket) {
100+
if (valuesSource == null || bucket >= sums.size()) {
101+
return buildEmptyAggregation();
102+
}
103+
return new InternalSum(name, sums.get(bucket), format, metadata());
104+
}
105+
106+
@Override
107+
public InternalAggregation buildEmptyAggregation() {
108+
return new InternalSum(name, 0.0, format, metadata());
109+
}
110+
111+
@Override
112+
public void doClose() {
113+
Releasables.close(sums, compensations);
114+
}
115+
}

0 commit comments

Comments
 (0)