Skip to content

Implement quadkey geo grid aggregation #30240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,19 @@
field3: value
- match: { hits.total: 1 }
- match: { hits.hits.0._id: q3 }


---
"Verify geo aggregations work during upgrade":
- do:
search:
index: geo_agg_index
body:
aggregations:
mygrid:
geohash_grid:
field : location
precision : 1
- match: { hits.total: 6 }
- match: { aggregations.mygrid.buckets.0.key: u }
- match: { aggregations.mygrid.buckets.0.doc_count: 6 }
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,46 @@
tasks.get:
wait_for_completion: true
task_id: $task

---
"Create geo data records in the old cluster":
- do:
indices.create:
index: geo_agg_index
body:
settings:
index:
number_of_replicas: 0
mappings:
doc:
properties:
location:
type: geo_point
- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "geo_agg_index", "_type": "doc"}}'
- '{"location": "52.374081,4.912350", "name": "NEMO Science Museum"}'
- '{"index": {"_index": "geo_agg_index", "_type": "doc"}}'
- '{"location": "52.369219,4.901618", "name": "Museum Het Rembrandthuis"}'
- '{"index": {"_index": "geo_agg_index", "_type": "doc"}}'
- '{"location": "52.371667,4.914722", "name": "Nederlands Scheepvaartmuseum"}'
- '{"index": {"_index": "geo_agg_index", "_type": "doc"}}'
- '{"location": "51.222900,4.405200", "name": "Letterenhuis"}'
- '{"index": {"_index": "geo_agg_index", "_type": "doc"}}'
- '{"location": "48.861111,2.336389", "name": "Musée du Louvre"}'
- '{"index": {"_index": "geo_agg_index", "_type": "doc"}}'
- '{"location": "48.860000,2.327000", "name": "Musée Orsay"}'
- do:
search:
index: geo_agg_index
body:
aggregations:
mygrid:
geohash_grid:
field : location
precision : 1
- match: { hits.total: 6 }
- match: { aggregations.mygrid.buckets.0.key: u }
- match: { aggregations.mygrid.buckets.0.doc_count: 6 }
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,19 @@
wait_for_completion: true
task_id: $task_id
- match: { task.headers.X-Opaque-Id: "Reindexing Again" }

---
"Verify geo aggregations work after upgrade with new types":
- do:
search:
index: geo_agg_index
body:
aggregations:
mygrid:
geohash_grid:
hash_type: geohash
field : location
precision : 1
- match: { hits.total: 6 }
- match: { aggregations.mygrid.buckets.0.key: u }
- match: { aggregations.mygrid.buckets.0.doc_count: 6 }
36 changes: 19 additions & 17 deletions server/src/main/java/org/elasticsearch/common/geo/GeoUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.GeoPointValues;
import org.elasticsearch.index.fielddata.MultiGeoPointValues;
Expand Down Expand Up @@ -548,23 +547,26 @@ private static GeoPoint parseGeoHash(GeoPoint point, String geohash, EffectivePo
* @return int representing precision
*/
public static int parsePrecision(XContentParser parser) throws IOException, ElasticsearchParseException {
XContentParser.Token token = parser.currentToken();
if (token.equals(XContentParser.Token.VALUE_NUMBER)) {
return XContentMapValues.nodeIntegerValue(parser.intValue());
} else {
String precision = parser.text();
return parser.currentToken() == Token.VALUE_NUMBER ? parser.intValue() : parsePrecisionString(parser.text());
}

/**
* Attempt to parse geohash precision string into an integer value
*/
public static int parsePrecisionString(String precision) {
try {
// we want to treat simple integer strings as precision levels, not distances
return checkPrecisionRange(Integer.parseInt(precision));
// checkPrecisionRange could also throw IllegalArgumentException, but let it through
// to keep errors somewhat consistent with how they were shown before this change
} catch (NumberFormatException e) {
// try to parse as a distance value
final int parsedPrecision = GeoUtils.geoHashLevelsForPrecision(precision);
try {
// we want to treat simple integer strings as precision levels, not distances
return XContentMapValues.nodeIntegerValue(precision);
} catch (NumberFormatException e) {
// try to parse as a distance value
final int parsedPrecision = GeoUtils.geoHashLevelsForPrecision(precision);
try {
return checkPrecisionRange(parsedPrecision);
} catch (IllegalArgumentException e2) {
// this happens when distance too small, so precision > 12. We'd like to see the original string
throw new IllegalArgumentException("precision too high [" + precision + "]", e2);
}
return checkPrecisionRange(parsedPrecision);
} catch (IllegalArgumentException e2) {
// this happens when distance too small, so precision > 12. We'd like to see the original string
throw new IllegalArgumentException("precision too high [" + precision + "]", e2);
}
}
}
Expand Down
158 changes: 158 additions & 0 deletions server/src/main/java/org/elasticsearch/common/geo/QuadKeyHash.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.geo;

import org.apache.lucene.util.BitUtil;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashTypeProvider;

import static org.elasticsearch.common.geo.GeoUtils.normalizeLat;
import static org.elasticsearch.common.geo.GeoUtils.normalizeLon;

/**
* Implements quad key hashing, same as used by map tiles.
* The string key is formatted as "zoom/x/y"
* The hash value (long) contains all three of those values.
*/
public class QuadKeyHash implements GeoHashTypeProvider {

/**
* Largest number of tiles (precision) to use.
* This value cannot be more than (64-5)/2 = 29, because 5 bits are used for zoom level itself
* If zoom is not stored inside hash, it would be possible to use up to 32.
* Another consideration is that index optimizes lat/lng storage, loosing some precision.
* E.g. hash lng=140.74779717298918D lat=45.61884022447444D == "18/233561/93659", but shown as "18/233561/93658"
*/
public static final int MAX_ZOOM = 29;

/**
* Bit position of the zoom value within hash. Must be >= 2*MAX_ZOOM
* Keeping it at a constant place allows MAX_ZOOM to be increased
* without breaking serialization binary compatibility
* (still, the newer version should not use higher MAX_ZOOM in the mixed cases)
*/
private static final int ZOOM_SHIFT = 29 * 2;

/**
* Mask of all the bits used by the quadkey in a hash
*/
private static final long QUADKEY_MASK = (1L << ZOOM_SHIFT) - 1;

private static int validatePrecisionInt(int precision) {
if (precision < 0 || precision > MAX_ZOOM) {
throw new IllegalArgumentException("Invalid geohash quadkey aggregation precision of " +
precision + ". Must be between 0 and " + MAX_ZOOM + ".");
}
return precision;
}

private static int[] parseHash(final long hash) {
final int zoom = validatePrecisionInt((int) (hash >>> ZOOM_SHIFT));
final int tiles = 1 << zoom;

// decode the quadkey bits as interleaved xtile and ytile
long val = hash & QUADKEY_MASK;
int xtile = (int) BitUtil.deinterleave(val);
int ytile = (int) BitUtil.deinterleave(val >>> 1);
if (xtile < 0 || ytile < 0 || xtile >= tiles || ytile >= tiles) {
throw new IllegalArgumentException("hash-tile");
}

return new int[]{zoom, xtile, ytile};
}

private static double tile2lon(final double x, final double tiles) {
return x / tiles * 360.0 - 180;
}

private static double tile2lat(final double y, final double tiles) {
double n = Math.PI - (2.0 * Math.PI * y) / tiles;
return Math.toDegrees(Math.atan(Math.sinh(n)));
}

@Override
public int getDefaultPrecision() {
return 5;
}

@Override
public int parsePrecisionString(String precision) {
try {
// we want to treat simple integer strings as precision levels, not distances
return validatePrecision(Integer.parseInt(precision));
// Do not catch IllegalArgumentException here
} catch (NumberFormatException e) {
// try to parse as a distance value
final int parsedPrecision = GeoUtils.quadTreeLevelsForPrecision(precision);
try {
return validatePrecision(parsedPrecision);
} catch (IllegalArgumentException e2) {
// this happens when distance too small, so precision > .
// We'd like to see the original string
throw new IllegalArgumentException("precision too high [" + precision + "]", e2);
}
}
}

@Override
public int validatePrecision(int precision) {
return validatePrecisionInt(precision);
}

@Override
public long calculateHash(double longitude, double latitude, int precision) {
// Adapted from https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Java

// How many tiles in X and in Y
final int tiles = 1 << validatePrecision(precision);
final double lon = normalizeLon(longitude);
final double lat = normalizeLat(latitude);

int xtile = (int) Math.floor((lon + 180) / 360 * tiles);
int ytile = (int) Math.floor(
(1 - Math.log(
Math.tan(Math.toRadians(lat)) + 1 / Math.cos(Math.toRadians(lat))
) / Math.PI) / 2 * tiles);
if (xtile < 0)
xtile = 0;
if (xtile >= tiles)
xtile = (tiles - 1);
if (ytile < 0)
ytile = 0;
if (ytile >= tiles)
ytile = (tiles - 1);

// Zoom value is placed in front of all the bits used for the quadkey
// e.g. if max zoom is 26, the largest index would use 52 bits (51st..0th),
// leaving 12 bits unused for zoom. See MAX_ZOOM comment above.
return BitUtil.interleave(xtile, ytile) | ((long) precision << ZOOM_SHIFT);
}

@Override
public String hashAsString(long hash) {
int[] res = parseHash(hash);
return "" + res[0] + "/" + res[1] + "/" + res[2];
}

@Override
public GeoPoint hashAsGeoPoint(long hash) {
int[] res = parseHash(hash);
double tiles = Math.pow(2.0, res[0]);
return new GeoPoint(tile2lat(res[2] + 0.5, tiles), tile2lon(res[1] + 0.5, tiles));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGrid;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashType;
import org.elasticsearch.search.aggregations.bucket.global.Global;
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
Expand Down Expand Up @@ -237,7 +238,7 @@ public static HistogramAggregationBuilder histogram(String name) {
* Create a new {@link GeoHashGrid} aggregation with the given name.
*/
public static GeoGridAggregationBuilder geohashGrid(String name) {
return new GeoGridAggregationBuilder(name);
return new GeoGridAggregationBuilder(name, GeoHashType.DEFAULT);
}

/**
Expand Down
Loading