Skip to content

Commit 8f03a9d

Browse files
committed
Aggregations Refactor: Refactor Geohash Grid Aggregation
1 parent 41f9e8b commit 8f03a9d

File tree

3 files changed

+206
-62
lines changed

3 files changed

+206
-62
lines changed

core/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParser.java

Lines changed: 128 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -21,108 +21,118 @@
2121
import org.apache.lucene.index.LeafReaderContext;
2222
import org.apache.lucene.index.SortedNumericDocValues;
2323
import org.apache.lucene.util.GeoHashUtils;
24+
import org.elasticsearch.common.ParseField;
25+
import org.elasticsearch.common.ParseFieldMatcher;
2426
import org.elasticsearch.common.geo.GeoPoint;
27+
import org.elasticsearch.common.io.stream.StreamInput;
28+
import org.elasticsearch.common.io.stream.StreamOutput;
29+
import org.elasticsearch.common.xcontent.XContentBuilder;
2530
import org.elasticsearch.common.xcontent.XContentParser;
31+
import org.elasticsearch.common.xcontent.XContentParser.Token;
2632
import org.elasticsearch.index.fielddata.MultiGeoPointValues;
2733
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
2834
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
2935
import org.elasticsearch.index.fielddata.SortingNumericDocValues;
3036
import org.elasticsearch.index.query.GeoBoundingBoxQueryBuilder;
31-
import org.elasticsearch.search.SearchParseException;
3237
import org.elasticsearch.search.aggregations.Aggregator;
3338
import org.elasticsearch.search.aggregations.AggregatorFactory;
3439
import org.elasticsearch.search.aggregations.InternalAggregation;
3540
import org.elasticsearch.search.aggregations.NonCollectingAggregator;
3641
import org.elasticsearch.search.aggregations.bucket.BucketUtils;
3742
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
43+
import org.elasticsearch.search.aggregations.support.AbstractValuesSourceParser.GeoPointValuesSourceParser;
3844
import org.elasticsearch.search.aggregations.support.AggregationContext;
45+
import org.elasticsearch.search.aggregations.support.ValueType;
3946
import org.elasticsearch.search.aggregations.support.ValuesSource;
4047
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
41-
import org.elasticsearch.search.aggregations.support.ValuesSourceParser;
42-
import org.elasticsearch.search.internal.SearchContext;
48+
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
4349

4450
import java.io.IOException;
4551
import java.util.Collections;
4652
import java.util.List;
4753
import java.util.Map;
54+
import java.util.Objects;
4855

4956
/**
5057
* Aggregates Geo information into cells determined by geohashes of a given precision.
5158
* WARNING - for high-precision geohashes it may prove necessary to use a {@link GeoBoundingBoxQueryBuilder}
5259
* aggregation to focus in on a smaller area to avoid generating too many buckets and using too much RAM
5360
*/
54-
public class GeoHashGridParser implements Aggregator.Parser {
61+
public class GeoHashGridParser extends GeoPointValuesSourceParser {
62+
63+
public static final int DEFAULT_PRECISION = 5;
64+
public static final int DEFAULT_MAX_NUM_CELLS = 10000;
65+
66+
public GeoHashGridParser() {
67+
super(false, false);
68+
}
5569

5670
@Override
5771
public String type() {
5872
return InternalGeoHashGrid.TYPE.name();
5973
}
74+
@Override
75+
public AggregatorFactory getFactoryPrototype() {
76+
return new GeoGridFactory(null);
77+
}
6078

6179
@Override
62-
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
63-
64-
ValuesSourceParser<ValuesSource.GeoPoint> vsParser = ValuesSourceParser
65-
.geoPoint(aggregationName, InternalGeoHashGrid.TYPE, context).build();
66-
67-
int precision = GeoHashGridParams.DEFAULT_PRECISION;
68-
int requiredSize = GeoHashGridParams.DEFAULT_MAX_NUM_CELLS;
69-
int shardSize = -1;
70-
71-
XContentParser.Token token;
72-
String currentFieldName = null;
73-
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
74-
if (token == XContentParser.Token.FIELD_NAME) {
75-
currentFieldName = parser.currentName();
76-
} else if (vsParser.token(currentFieldName, token, parser)) {
77-
continue;
78-
} else if (token == XContentParser.Token.VALUE_NUMBER ||
79-
token == XContentParser.Token.VALUE_STRING) { //Be lenient and also allow numbers enclosed in quotes
80-
if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_PRECISION)) {
81-
precision = GeoHashGridParams.checkPrecision(parser.intValue());
82-
} else if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_SIZE)) {
83-
requiredSize = parser.intValue();
84-
} else if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_SHARD_SIZE)) {
85-
shardSize = parser.intValue();
80+
protected ValuesSourceAggregatorFactory<org.elasticsearch.search.aggregations.support.ValuesSource.GeoPoint> createFactory(
81+
String aggregationName, ValuesSourceType valuesSourceType,
82+
ValueType targetValueType, Map<ParseField, Object> otherOptions) {
83+
GeoGridFactory factory = new GeoGridFactory(aggregationName);
84+
Integer precision = (Integer) otherOptions.get(GeoHashGridParams.FIELD_PRECISION);
85+
if (precision != null) {
86+
factory.precision(precision);
8687
}
87-
} else if (token != XContentParser.Token.START_OBJECT) {
88-
throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "].",
89-
parser.getTokenLocation());
90-
}
88+
Integer size = (Integer) otherOptions.get(GeoHashGridParams.FIELD_SIZE);
89+
if (size != null) {
90+
factory.size(size);
9191
}
92-
93-
if (shardSize == 0) {
94-
shardSize = Integer.MAX_VALUE;
92+
Integer shardSize = (Integer) otherOptions.get(GeoHashGridParams.FIELD_SHARD_SIZE);
93+
if (shardSize != null) {
94+
factory.shardSize(shardSize);
9595
}
96+
return factory;
97+
}
9698

97-
if (requiredSize == 0) {
98-
requiredSize = Integer.MAX_VALUE;
99+
@Override
100+
protected boolean token(String aggregationName, String currentFieldName, Token token, XContentParser parser,
101+
ParseFieldMatcher parseFieldMatcher, Map<ParseField, Object> otherOptions) throws IOException {
102+
if (token == XContentParser.Token.VALUE_NUMBER || token == XContentParser.Token.VALUE_STRING) {
103+
if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_PRECISION)) {
104+
otherOptions.put(GeoHashGridParams.FIELD_PRECISION, parser.intValue());
105+
return true;
106+
} else if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_SIZE)) {
107+
otherOptions.put(GeoHashGridParams.FIELD_SIZE, parser.intValue());
108+
return true;
109+
} else if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_SHARD_SIZE)) {
110+
otherOptions.put(GeoHashGridParams.FIELD_SHARD_SIZE, parser.intValue());
111+
return true;
99112
}
100-
101-
if (shardSize < 0) {
102-
//Use default heuristic to avoid any wrong-ranking caused by distributed counting
103-
shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, context.numberOfShards());
104113
}
105-
106-
if (shardSize < requiredSize) {
107-
shardSize = requiredSize;
114+
return false;
108115
}
109116

110-
return new GeoGridFactory(aggregationName, vsParser.input(), precision, requiredSize, shardSize);
117+
public static class GeoGridFactory extends ValuesSourceAggregatorFactory<ValuesSource.GeoPoint> {
111118

112-
}
119+
private int precision = DEFAULT_PRECISION;
120+
private int requiredSize = DEFAULT_MAX_NUM_CELLS;
121+
private int shardSize = -1;
113122

123+
public GeoGridFactory(String name) {
124+
super(name, InternalGeoHashGrid.TYPE.name(), ValuesSourceType.GEOPOINT, ValueType.GEOPOINT);
125+
}
114126

115-
static class GeoGridFactory extends ValuesSourceAggregatorFactory<ValuesSource.GeoPoint> {
127+
public void precision(int precision) {
128+
this.precision = GeoHashGridParams.checkPrecision(precision);
129+
}
116130

117-
private final int precision;
118-
private final int requiredSize;
119-
private final int shardSize;
131+
public void size(int size) {
132+
this.requiredSize = size;
133+
}
120134

121-
public GeoGridFactory(String name, ValuesSourceParser.Input<ValuesSource.GeoPoint> input, int precision, int requiredSize,
122-
int shardSize) {
123-
super(name, InternalGeoHashGrid.TYPE.name(), input);
124-
this.precision = precision;
125-
this.requiredSize = requiredSize;
135+
public void shardSize(int shardSize) {
126136
this.shardSize = shardSize;
127137
}
128138

@@ -143,6 +153,23 @@ public InternalAggregation buildEmptyAggregation() {
143153
protected Aggregator doCreateInternal(final ValuesSource.GeoPoint valuesSource, AggregationContext aggregationContext,
144154
Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
145155
throws IOException {
156+
if (shardSize == 0) {
157+
shardSize = Integer.MAX_VALUE;
158+
}
159+
160+
if (requiredSize == 0) {
161+
requiredSize = Integer.MAX_VALUE;
162+
}
163+
164+
if (shardSize < 0) {
165+
// Use default heuristic to avoid any wrong-ranking caused by
166+
// distributed counting
167+
shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, aggregationContext.searchContext().numberOfShards());
168+
}
169+
170+
if (shardSize < requiredSize) {
171+
shardSize = requiredSize;
172+
}
146173
if (collectsFromSingleBucket == false) {
147174
return asMultiBucketAggregator(this, aggregationContext, parent);
148175
}
@@ -152,6 +179,52 @@ protected Aggregator doCreateInternal(final ValuesSource.GeoPoint valuesSource,
152179

153180
}
154181

182+
@Override
183+
protected ValuesSourceAggregatorFactory<org.elasticsearch.search.aggregations.support.ValuesSource.GeoPoint> innerReadFrom(
184+
String name, ValuesSourceType valuesSourceType,
185+
ValueType targetValueType, StreamInput in) throws IOException {
186+
GeoGridFactory factory = new GeoGridFactory(name);
187+
factory.precision = in.readVInt();
188+
factory.requiredSize = in.readVInt();
189+
factory.shardSize = in.readVInt();
190+
return factory;
191+
}
192+
193+
@Override
194+
protected void innerWriteTo(StreamOutput out) throws IOException {
195+
out.writeVInt(precision);
196+
out.writeVInt(requiredSize);
197+
out.writeVInt(shardSize);
198+
}
199+
200+
@Override
201+
protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
202+
builder.field(GeoHashGridParams.FIELD_PRECISION.getPreferredName(), precision);
203+
builder.field(GeoHashGridParams.FIELD_SIZE.getPreferredName(), requiredSize);
204+
builder.field(GeoHashGridParams.FIELD_SHARD_SIZE.getPreferredName(), shardSize);
205+
return builder;
206+
}
207+
208+
@Override
209+
protected boolean innerEquals(Object obj) {
210+
GeoGridFactory other = (GeoGridFactory) obj;
211+
if (precision != other.precision) {
212+
return false;
213+
}
214+
if (requiredSize != other.requiredSize) {
215+
return false;
216+
}
217+
if (shardSize != other.shardSize) {
218+
return false;
219+
}
220+
return true;
221+
}
222+
223+
@Override
224+
protected int innerHashCode() {
225+
return Objects.hash(precision, requiredSize, shardSize);
226+
}
227+
155228
private static class CellValues extends SortingNumericDocValues {
156229
private MultiGeoPointValues geoValues;
157230
private int precision;
@@ -209,10 +282,4 @@ public SortedBinaryDocValues bytesValues(LeafReaderContext ctx) {
209282

210283
}
211284
}
212-
// NORELEASE implement this method when refactoring this aggregation
213-
@Override
214-
public AggregatorFactory getFactoryPrototype() {
215-
return null;
216-
}
217-
218285
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.bucket;
21+
22+
import org.elasticsearch.search.aggregations.BaseAggregationTestCase;
23+
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridParser.GeoGridFactory;
24+
25+
public class GeoHashGridTests extends BaseAggregationTestCase<GeoGridFactory> {
26+
27+
@Override
28+
protected GeoGridFactory createTestAggregatorFactory() {
29+
String name = randomAsciiOfLengthBetween(3, 20);
30+
GeoGridFactory factory = new GeoGridFactory(name);
31+
if (randomBoolean()) {
32+
int precision = randomIntBetween(1, 12);
33+
factory.precision(precision);
34+
}
35+
if (randomBoolean()) {
36+
int size = randomInt(5);
37+
switch (size) {
38+
case 0:
39+
break;
40+
case 1:
41+
case 2:
42+
case 3:
43+
case 4:
44+
size = randomInt();
45+
break;
46+
}
47+
factory.size(size);
48+
49+
}
50+
if (randomBoolean()) {
51+
int shardSize = randomInt(5);
52+
switch (shardSize) {
53+
case 0:
54+
break;
55+
case 1:
56+
case 2:
57+
case 3:
58+
case 4:
59+
shardSize = randomInt();
60+
break;
61+
}
62+
factory.shardSize(shardSize);
63+
}
64+
return factory;
65+
}
66+
67+
}

core/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParserTests.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ public void testParseValidFromInts() throws Exception {
3131
int precision = randomIntBetween(1, 12);
3232
XContentParser stParser = JsonXContent.jsonXContent.createParser(
3333
"{\"field\":\"my_loc\", \"precision\":" + precision + ", \"size\": 500, \"shard_size\": 550}");
34+
XContentParser.Token token = stParser.nextToken();
35+
assertSame(XContentParser.Token.START_OBJECT, token);
3436
GeoHashGridParser parser = new GeoHashGridParser();
3537
// can create a factory
3638
assertNotNull(parser.parse("geohash_grid", stParser, searchContext));
@@ -41,6 +43,8 @@ public void testParseValidFromStrings() throws Exception {
4143
int precision = randomIntBetween(1, 12);
4244
XContentParser stParser = JsonXContent.jsonXContent.createParser(
4345
"{\"field\":\"my_loc\", \"precision\":\"" + precision + "\", \"size\": \"500\", \"shard_size\": \"550\"}");
46+
XContentParser.Token token = stParser.nextToken();
47+
assertSame(XContentParser.Token.START_OBJECT, token);
4448
GeoHashGridParser parser = new GeoHashGridParser();
4549
// can create a factory
4650
assertNotNull(parser.parse("geohash_grid", stParser, searchContext));
@@ -49,6 +53,8 @@ public void testParseValidFromStrings() throws Exception {
4953
public void testParseErrorOnNonIntPrecision() throws Exception {
5054
SearchContext searchContext = new TestSearchContext();
5155
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":\"2.0\"}");
56+
XContentParser.Token token = stParser.nextToken();
57+
assertSame(XContentParser.Token.START_OBJECT, token);
5258
GeoHashGridParser parser = new GeoHashGridParser();
5359
try {
5460
parser.parse("geohash_grid", stParser, searchContext);
@@ -61,18 +67,22 @@ public void testParseErrorOnNonIntPrecision() throws Exception {
6167
public void testParseErrorOnBooleanPrecision() throws Exception {
6268
SearchContext searchContext = new TestSearchContext();
6369
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":false}");
70+
XContentParser.Token token = stParser.nextToken();
71+
assertSame(XContentParser.Token.START_OBJECT, token);
6472
GeoHashGridParser parser = new GeoHashGridParser();
6573
try {
6674
parser.parse("geohash_grid", stParser, searchContext);
6775
fail();
6876
} catch (SearchParseException ex) {
69-
assertEquals("Unexpected token VALUE_BOOLEAN in [geohash_grid].", ex.getMessage());
77+
assertEquals("Unexpected token VALUE_BOOLEAN [precision] in [geohash_grid].", ex.getMessage());
7078
}
7179
}
7280

7381
public void testParseErrorOnPrecisionOutOfRange() throws Exception {
7482
SearchContext searchContext = new TestSearchContext();
7583
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":\"13\"}");
84+
XContentParser.Token token = stParser.nextToken();
85+
assertSame(XContentParser.Token.START_OBJECT, token);
7686
GeoHashGridParser parser = new GeoHashGridParser();
7787
try {
7888
parser.parse("geohash_grid", stParser, searchContext);

0 commit comments

Comments
 (0)