Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
*/
package org.apache.pinot.core.geospatial.transform.function;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.uber.h3core.LengthUnit;
import com.uber.h3core.util.GeoCoord;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.pinot.segment.local.utils.GeometrySerializer;
import org.apache.pinot.segment.local.utils.GeometryUtils;
import org.apache.pinot.segment.local.utils.H3Utils;
Expand All @@ -33,6 +40,26 @@
*/
public class ScalarFunctions {

// from https://h3geo.org/docs/core-library/restable
private static final ImmutableMap<Double, Integer> RESOLUTIONS = ImmutableMap.<Double, Integer>builder()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plz move this to geo util class

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also add more comments about the meaning of the value

.put(1107.712591000, 0)
.put(418.676005500, 1)
.put(158.244655800, 2)
.put(59.810857940, 3)
.put(22.606379400, 4)
.put(8.544408276, 5)
.put(3.229482772, 6)
.put(1.220629759, 7)
.put(0.461354684, 8)
.put(0.174375668, 9)
.put(0.065907807, 10)
.put(0.024910561, 11)
.put(0.009415526, 12)
.put(0.003559893, 13)
.put(0.001348575, 14)
.put(0.000509713, 15)
.build();

/**
* Creates a point.
*
Expand Down Expand Up @@ -111,4 +138,35 @@ public static byte[] toGeometry(byte[] bytes) {
public static long geoToH3(double longitude, double latitude, int resolution) {
return H3Utils.H3_CORE.geoToH3(latitude, longitude, resolution);
}

public static List<Long> polygonToH3(List<Coordinate> region, int res) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

javadocs

return H3Utils.H3_CORE.polyfill(
region.stream()
.map(coord -> new GeoCoord(coord.x, coord.y))
.collect(Collectors.toUnmodifiableList()),
ImmutableList.of(),
res);
}

public static int calcResFromMaxDist(double maxDist, int minHexagonEdges) {
return RESOLUTIONS.get(Collections.min(RESOLUTIONS.keySet().stream()
.filter(edgeLen -> edgeLen < maxDist / minHexagonEdges)
.collect(Collectors.toUnmodifiableSet())));
}

public static double maxDist(List<Coordinate> points) {
int n = points.size();
double max = 0;

for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
max = Math.max(max, dist(points.get(i), points.get(j)));
}
}
return Math.sqrt(max);
}

public static double dist(Coordinate a, Coordinate b) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

either make the unit an argument or add the unit as part of the func name

return H3Utils.H3_CORE.pointDist(new GeoCoord(a.y, a.x), new GeoCoord(b.y, b.x), LengthUnit.km);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import org.apache.pinot.core.operator.transform.function.TransformFunction;
import org.apache.pinot.core.plan.DocIdSetPlanNode;
import org.apache.pinot.segment.local.utils.GeometrySerializer;
import org.apache.pinot.segment.local.utils.GeometryUtils;
import org.apache.pinot.segment.spi.datasource.DataSource;
import org.apache.pinot.spi.data.FieldSpec;
import org.locationtech.jts.geom.Geometry;
Expand Down Expand Up @@ -83,9 +82,6 @@ public int[] transformToIntValuesSV(ProjectionBlock projectionBlock) {
for (int i = 0; i < projectionBlock.getNumDocs(); i++) {
Geometry firstGeometry = GeometrySerializer.deserialize(firstValues[i]);
Geometry secondGeometry = GeometrySerializer.deserialize(secondValues[i]);
if (GeometryUtils.isGeography(firstGeometry) || GeometryUtils.isGeography(secondGeometry)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you remove this? performance?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this check is needed, the current implementation does not handle geography correctly.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why does it not work for geography?

throw new RuntimeException(String.format("%s is available for Geometry objects only", FUNCTION_NAME));
}
_results[i] = firstGeometry.contains(secondGeometry) ? 1 : 0;
}
return _results;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.operator.filter;

import java.util.*;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.predicate.Predicate;
import org.apache.pinot.core.geospatial.transform.function.ScalarFunctions;
import org.apache.pinot.core.operator.blocks.FilterBlock;
import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
import org.apache.pinot.segment.local.utils.GeometrySerializer;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.index.reader.H3IndexReader;
import org.apache.pinot.spi.utils.BytesUtils;
import org.locationtech.jts.geom.Coordinate;
import org.roaringbitmap.buffer.MutableRoaringBitmap;

/**
* A filter operator that uses H3 index for geospatial data inclusion
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plz add comments about the algorithm used in this operator

*/
public class H3InclusionIndexFilterOperator extends BaseFilterOperator {
private static final String OPERATOR_NAME = "H3IndexFilterOperator";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make operator name consistent with the class name?


private final IndexSegment _segment;
private final Predicate _predicate;
private final int _numDocs;
private final H3IndexReader _h3IndexReader;
private final List<Long> _h3Ids;

public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) {
_segment = segment;
_predicate = predicate;
_numDocs = numDocs;

List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments();
Coordinate[] coordinates;
if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER) {
// look up arg0's h3 indices
_h3IndexReader = segment.getDataSource(arguments.get(0).getIdentifier()).getH3Index();
// arg1 is the literal
coordinates = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(1).getLiteral())).getCoordinates();
} else {
// look up arg1's h3 indices
_h3IndexReader = segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index();
// arg0 is the literal
coordinates = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral())).getCoordinates();
}
// must be some h3 index
assert _h3IndexReader != null;

// look up all hexagons for provided coordinates
List<Coordinate> coordinateList = Arrays.asList(coordinates);
_h3Ids = ScalarFunctions.polygonToH3(coordinateList,
ScalarFunctions.calcResFromMaxDist(ScalarFunctions.maxDist(coordinateList), 50));
}

@Override
protected FilterBlock getNextBlock() {
// have list of h3 hashes for polygon provided
// return filtered num_docs
MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
for (long docId : _h3Ids) {
fullMatchDocIds.or(_h3IndexReader.getDocIds(docId));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how do you determine full matches?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add unit tests to verify the algorithm

}
fullMatchDocIds.flip(0L, _numDocs);

// when h3 implements polyfill parameters for including partial matches, we can expand to include partial matches
return getFilterBlock(fullMatchDocIds, new MutableRoaringBitmap());
}

/**
* Returns the filter block based on the given full match doc ids and the partial match doc ids.
*/
private FilterBlock getFilterBlock(MutableRoaringBitmap fullMatchDocIds, MutableRoaringBitmap partialMatchDocIds) {
ExpressionFilterOperator expressionFilterOperator = new ExpressionFilterOperator(_segment, _predicate, _numDocs);
ScanBasedDocIdIterator docIdIterator =
(ScanBasedDocIdIterator) expressionFilterOperator.getNextBlock().getBlockDocIdSet().iterator();
MutableRoaringBitmap result = docIdIterator.applyAnd(partialMatchDocIds);
result.or(fullMatchDocIds);
return new FilterBlock(new BitmapDocIdSet(result, _numDocs) {
@Override
public long getNumEntriesScannedInFilter() {
return docIdIterator.getNumEntriesScanned();
}
});
}

@Override
public String getOperatorName() {
return OPERATOR_NAME;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,9 @@
import org.apache.pinot.common.request.context.predicate.Predicate;
import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
import org.apache.pinot.common.request.context.predicate.TextMatchPredicate;
import org.apache.pinot.core.geospatial.transform.function.StContainsFunction;
import org.apache.pinot.core.geospatial.transform.function.StDistanceFunction;
import org.apache.pinot.core.operator.filter.BaseFilterOperator;
import org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator;
import org.apache.pinot.core.operator.filter.EmptyFilterOperator;
import org.apache.pinot.core.operator.filter.ExpressionFilterOperator;
import org.apache.pinot.core.operator.filter.FilterOperatorUtils;
import org.apache.pinot.core.operator.filter.H3IndexFilterOperator;
import org.apache.pinot.core.operator.filter.JsonMatchFilterOperator;
import org.apache.pinot.core.operator.filter.MatchAllFilterOperator;
import org.apache.pinot.core.operator.filter.TextMatchFilterOperator;
import org.apache.pinot.core.operator.filter.*;
import org.apache.pinot.core.operator.filter.predicate.FSTBasedRegexpPredicateEvaluatorFactory;
import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluatorProvider;
Expand Down Expand Up @@ -106,12 +99,13 @@ private boolean canApplyH3Index(Predicate predicate, FunctionContext function) {
if (predicate.getType() != Predicate.Type.RANGE) {
return false;
}
if (!function.getFunctionName().equalsIgnoreCase(StDistanceFunction.FUNCTION_NAME)) {
if (!(function.getFunctionName().equalsIgnoreCase(StDistanceFunction.FUNCTION_NAME) ||
function.getFunctionName().equalsIgnoreCase(StContainsFunction.FUNCTION_NAME))) {
return false;
}
List<ExpressionContext> arguments = function.getArguments();
if (arguments.size() != 2) {
throw new BadQueryRequestException("Expect 2 arguments for function: " + StDistanceFunction.FUNCTION_NAME);
throw new BadQueryRequestException("Expect 2 arguments for function: " + function.getFunctionName());
}
// TODO: handle nested geography/geometry conversion functions
String columnName = null;
Expand Down Expand Up @@ -165,7 +159,11 @@ private BaseFilterOperator constructPhysicalOperator(FilterContext filter,
ExpressionContext lhs = predicate.getLhs();
if (lhs.getType() == ExpressionContext.Type.FUNCTION) {
if (canApplyH3Index(predicate, lhs.getFunction())) {
return new H3IndexFilterOperator(_indexSegment, predicate, _numDocs);
if (lhs.getFunction().getFunctionName().equalsIgnoreCase(StDistanceFunction.FUNCTION_NAME)) {
return new H3IndexFilterOperator(_indexSegment, predicate, _numDocs);
} else if (lhs.getFunction().getFunctionName().equalsIgnoreCase(StContainsFunction.FUNCTION_NAME)) {
return new H3InclusionIndexFilterOperator(_indexSegment, predicate, _numDocs);
}
}
// TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL,
// IS_NOT_NULL, TEXT_MATCH)
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
<netty.version>4.1.54.Final</netty.version>
<reactivestreams.version>1.0.3</reactivestreams.version>
<jts.version>1.16.1</jts.version>
<h3.version>3.0.3</h3.version>
<h3.version>3.7.0</h3.version>
<jmh.version>1.26</jmh.version>
<audienceannotations.version>0.13.0</audienceannotations.version>

Expand Down