-
Notifications
You must be signed in to change notification settings - Fork 1.5k
support ST_Contains with H3 optimization #7252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,13 @@ | |
| */ | ||
| package org.apache.pinot.core.geospatial.transform.function; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
| import com.google.common.collect.ImmutableMap; | ||
| import com.uber.h3core.LengthUnit; | ||
| import com.uber.h3core.util.GeoCoord; | ||
| import java.util.Collections; | ||
| import java.util.List; | ||
| import java.util.stream.Collectors; | ||
| import org.apache.pinot.segment.local.utils.GeometrySerializer; | ||
| import org.apache.pinot.segment.local.utils.GeometryUtils; | ||
| import org.apache.pinot.segment.local.utils.H3Utils; | ||
|
|
@@ -33,6 +40,26 @@ | |
| */ | ||
| public class ScalarFunctions { | ||
|
|
||
| // from https://h3geo.org/docs/core-library/restable | ||
| private static final ImmutableMap<Double, Integer> RESOLUTIONS = ImmutableMap.<Double, Integer>builder() | ||
| .put(1107.712591000, 0) | ||
| .put(418.676005500, 1) | ||
| .put(158.244655800, 2) | ||
| .put(59.810857940, 3) | ||
| .put(22.606379400, 4) | ||
| .put(8.544408276, 5) | ||
| .put(3.229482772, 6) | ||
| .put(1.220629759, 7) | ||
| .put(0.461354684, 8) | ||
| .put(0.174375668, 9) | ||
| .put(0.065907807, 10) | ||
| .put(0.024910561, 11) | ||
| .put(0.009415526, 12) | ||
| .put(0.003559893, 13) | ||
| .put(0.001348575, 14) | ||
| .put(0.000509713, 15) | ||
| .build(); | ||
|
|
||
| /** | ||
| * Creates a point. | ||
| * | ||
|
|
@@ -111,4 +138,35 @@ public static byte[] toGeometry(byte[] bytes) { | |
| public static long geoToH3(double longitude, double latitude, int resolution) { | ||
| return H3Utils.H3_CORE.geoToH3(latitude, longitude, resolution); | ||
| } | ||
|
|
||
| public static List<Long> polygonToH3(List<Coordinate> region, int res) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. javadocs |
||
| return H3Utils.H3_CORE.polyfill( | ||
| region.stream() | ||
| .map(coord -> new GeoCoord(coord.x, coord.y)) | ||
| .collect(Collectors.toUnmodifiableList()), | ||
| ImmutableList.of(), | ||
| res); | ||
| } | ||
|
|
||
| public static int calcResFromMaxDist(double maxDist, int minHexagonEdges) { | ||
| return RESOLUTIONS.get(Collections.min(RESOLUTIONS.keySet().stream() | ||
| .filter(edgeLen -> edgeLen < maxDist / minHexagonEdges) | ||
| .collect(Collectors.toUnmodifiableSet()))); | ||
| } | ||
|
|
||
| public static double maxDist(List<Coordinate> points) { | ||
| int n = points.size(); | ||
| double max = 0; | ||
|
|
||
| for (int i = 0; i < n; i++) { | ||
| for (int j = i + 1; j < n; j++) { | ||
| max = Math.max(max, dist(points.get(i), points.get(j))); | ||
| } | ||
| } | ||
| return Math.sqrt(max); | ||
| } | ||
|
|
||
| public static double dist(Coordinate a, Coordinate b) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. either make the unit an argument or add the unit as part of the func name |
||
| return H3Utils.H3_CORE.pointDist(new GeoCoord(a.y, a.x), new GeoCoord(b.y, b.x), LengthUnit.km); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,6 @@ | |
| import org.apache.pinot.core.operator.transform.function.TransformFunction; | ||
| import org.apache.pinot.core.plan.DocIdSetPlanNode; | ||
| import org.apache.pinot.segment.local.utils.GeometrySerializer; | ||
| import org.apache.pinot.segment.local.utils.GeometryUtils; | ||
| import org.apache.pinot.segment.spi.datasource.DataSource; | ||
| import org.apache.pinot.spi.data.FieldSpec; | ||
| import org.locationtech.jts.geom.Geometry; | ||
|
|
@@ -83,9 +82,6 @@ public int[] transformToIntValuesSV(ProjectionBlock projectionBlock) { | |
| for (int i = 0; i < projectionBlock.getNumDocs(); i++) { | ||
| Geometry firstGeometry = GeometrySerializer.deserialize(firstValues[i]); | ||
| Geometry secondGeometry = GeometrySerializer.deserialize(secondValues[i]); | ||
| if (GeometryUtils.isGeography(firstGeometry) || GeometryUtils.isGeography(secondGeometry)) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why did you remove this? performance?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this check is needed, the current implementation does not handle geography correctly.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why does it not work for geography? |
||
| throw new RuntimeException(String.format("%s is available for Geometry objects only", FUNCTION_NAME)); | ||
| } | ||
| _results[i] = firstGeometry.contains(secondGeometry) ? 1 : 0; | ||
| } | ||
| return _results; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.pinot.core.operator.filter; | ||
|
|
||
| import java.util.*; | ||
| import org.apache.pinot.common.request.context.ExpressionContext; | ||
| import org.apache.pinot.common.request.context.predicate.Predicate; | ||
| import org.apache.pinot.core.geospatial.transform.function.ScalarFunctions; | ||
| import org.apache.pinot.core.operator.blocks.FilterBlock; | ||
| import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; | ||
| import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; | ||
| import org.apache.pinot.segment.local.utils.GeometrySerializer; | ||
| import org.apache.pinot.segment.spi.IndexSegment; | ||
| import org.apache.pinot.segment.spi.index.reader.H3IndexReader; | ||
| import org.apache.pinot.spi.utils.BytesUtils; | ||
| import org.locationtech.jts.geom.Coordinate; | ||
| import org.roaringbitmap.buffer.MutableRoaringBitmap; | ||
|
|
||
| /** | ||
| * A filter operator that uses H3 index for geospatial data inclusion | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. plz add comments about the algorithm used in this operator |
||
| */ | ||
| public class H3InclusionIndexFilterOperator extends BaseFilterOperator { | ||
| private static final String OPERATOR_NAME = "H3IndexFilterOperator"; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make operator name consistent with the class name? |
||
|
|
||
| private final IndexSegment _segment; | ||
| private final Predicate _predicate; | ||
| private final int _numDocs; | ||
| private final H3IndexReader _h3IndexReader; | ||
| private final List<Long> _h3Ids; | ||
|
|
||
| public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) { | ||
| _segment = segment; | ||
| _predicate = predicate; | ||
| _numDocs = numDocs; | ||
|
|
||
| List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments(); | ||
| Coordinate[] coordinates; | ||
| if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER) { | ||
| // look up arg0's h3 indices | ||
| _h3IndexReader = segment.getDataSource(arguments.get(0).getIdentifier()).getH3Index(); | ||
| // arg1 is the literal | ||
| coordinates = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(1).getLiteral())).getCoordinates(); | ||
| } else { | ||
| // look up arg1's h3 indices | ||
| _h3IndexReader = segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index(); | ||
| // arg0 is the literal | ||
| coordinates = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral())).getCoordinates(); | ||
| } | ||
| // must be some h3 index | ||
| assert _h3IndexReader != null; | ||
|
|
||
| // look up all hexagons for provided coordinates | ||
| List<Coordinate> coordinateList = Arrays.asList(coordinates); | ||
| _h3Ids = ScalarFunctions.polygonToH3(coordinateList, | ||
| ScalarFunctions.calcResFromMaxDist(ScalarFunctions.maxDist(coordinateList), 50)); | ||
| } | ||
|
|
||
| @Override | ||
| protected FilterBlock getNextBlock() { | ||
| // have list of h3 hashes for polygon provided | ||
| // return filtered num_docs | ||
| MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap(); | ||
| for (long docId : _h3Ids) { | ||
| fullMatchDocIds.or(_h3IndexReader.getDocIds(docId)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how do you determine full matches?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add unit tests to verify the algorithm |
||
| } | ||
| fullMatchDocIds.flip(0L, _numDocs); | ||
|
|
||
| // when h3 implements polyfill parameters for including partial matches, we can expand to include partial matches | ||
| return getFilterBlock(fullMatchDocIds, new MutableRoaringBitmap()); | ||
| } | ||
|
|
||
| /** | ||
| * Returns the filter block based on the given full match doc ids and the partial match doc ids. | ||
| */ | ||
| private FilterBlock getFilterBlock(MutableRoaringBitmap fullMatchDocIds, MutableRoaringBitmap partialMatchDocIds) { | ||
| ExpressionFilterOperator expressionFilterOperator = new ExpressionFilterOperator(_segment, _predicate, _numDocs); | ||
| ScanBasedDocIdIterator docIdIterator = | ||
| (ScanBasedDocIdIterator) expressionFilterOperator.getNextBlock().getBlockDocIdSet().iterator(); | ||
| MutableRoaringBitmap result = docIdIterator.applyAnd(partialMatchDocIds); | ||
| result.or(fullMatchDocIds); | ||
| return new FilterBlock(new BitmapDocIdSet(result, _numDocs) { | ||
| @Override | ||
| public long getNumEntriesScannedInFilter() { | ||
| return docIdIterator.getNumEntriesScanned(); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| @Override | ||
| public String getOperatorName() { | ||
| return OPERATOR_NAME; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
plz move this to geo util class
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also add more comments about the meaning of the value