Skip to content

Commit

Permalink
[SEDONA-300] Add ST_HausdorffDistance (#868)
Browse files Browse the repository at this point in the history
  • Loading branch information
iGN5117 authored Jun 24, 2023
1 parent f8800d6 commit c28f016
Show file tree
Hide file tree
Showing 16 changed files with 279 additions and 1 deletion.
8 changes: 8 additions & 0 deletions common/src/main/java/org/apache/sedona/common/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.locationtech.jts.operation.valid.IsValidOp;
import org.locationtech.jts.precision.GeometryPrecisionReducer;
import org.locationtech.jts.simplify.TopologyPreservingSimplifier;
import org.locationtech.jts.algorithm.distance.DiscreteHausdorffDistance;
import org.opengis.referencing.FactoryException;
import org.opengis.referencing.NoSuchAuthorityCodeException;
import org.opengis.referencing.crs.CoordinateReferenceSystem;
Expand Down Expand Up @@ -965,4 +966,11 @@ public static Geometry boundingDiagonal(Geometry geometry) {
}
}

public static Double hausdorffDistance(Geometry g1, Geometry g2, double densityFrac) throws Exception {
return GeomUtils.getHausdorffDistance(g1, g2, densityFrac);
}

public static Double hausdorffDistance(Geometry g1, Geometry g2) throws Exception{
return GeomUtils.getHausdorffDistance(g1, g2, -1);
}
}
10 changes: 10 additions & 0 deletions common/src/main/java/org/apache/sedona/common/utils/GeomUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.locationtech.jts.io.WKTWriter;
import org.locationtech.jts.operation.polygonize.Polygonizer;
import org.locationtech.jts.operation.union.UnaryUnionOp;
import org.locationtech.jts.algorithm.distance.DiscreteHausdorffDistance;

import java.awt.*;
import java.nio.ByteOrder;
Expand Down Expand Up @@ -461,4 +462,13 @@ public static void translateGeom(Geometry geometry, double deltaX, double deltaY
geometry.geometryChanged();
}
}

public static Double getHausdorffDistance(Geometry g1, Geometry g2, double densityFrac) throws Exception {
if (g1.isEmpty() || g2.isEmpty()) return 0.0;
DiscreteHausdorffDistance hausdorffDistanceObj = new DiscreteHausdorffDistance(g1, g2);
if (densityFrac != -1) {
hausdorffDistanceObj.setDensifyFraction(densityFrac);
}
return hausdorffDistanceObj.distance();
}
}
71 changes: 71 additions & 0 deletions common/src/test/java/org/apache/sedona/common/FunctionsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.google.common.geometry.S2CellId;
import com.google.common.math.DoubleMath;
import com.sun.org.apache.xpath.internal.operations.Mult;
import org.apache.sedona.common.sphere.Haversine;
import org.apache.sedona.common.sphere.Spheroid;
import org.apache.sedona.common.utils.GeomUtils;
Expand Down Expand Up @@ -915,4 +916,74 @@ public void boundingDiagonalSingleVertex() {
String actual = Functions.boundingDiagonal(point).toText();
assertEquals(expected, actual);
}

@Test
public void hausdorffDistanceDefaultGeom2D() throws Exception {
Polygon polygon1 = GEOMETRY_FACTORY.createPolygon(coordArray3d(1, 0, 1, 1, 1, 2, 2, 1, 5, 2, 0, 1, 1, 0, 1));
Polygon polygon2 = GEOMETRY_FACTORY.createPolygon(coordArray3d(4, 0, 4, 6, 1, 4, 6, 4, 9, 6, 1, 3, 4, 0, 4));
Double expected = 5.0;
Double actual = Functions.hausdorffDistance(polygon1, polygon2);
assertEquals(expected, actual);
}

@Test
public void hausdorffDistanceGeom2D() throws Exception {
Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(10, 34));
LineString lineString = GEOMETRY_FACTORY.createLineString(coordArray(1, 2, 1, 5, 2, 6, 1, 2));
Double expected = 33.24154027718932;
Double actual = Functions.hausdorffDistance(point, lineString, 0.33);
assertEquals(expected, actual);
}

@Test
public void hausdorffDistanceInvalidDensityFrac() throws Exception {
Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(10, 34));
LineString lineString = GEOMETRY_FACTORY.createLineString(coordArray(1, 2, 1, 5, 2, 6, 1, 2));
Exception e = assertThrows(IllegalArgumentException.class, () -> Functions.hausdorffDistance(point, lineString, 3));
String expected = "Fraction is not in range (0.0 - 1.0]";
String actual = e.getMessage();
assertEquals(expected, actual);
}

@Test
public void hausdorffDistanceDefaultGeomCollection() throws Exception {
Polygon polygon = GEOMETRY_FACTORY.createPolygon(coordArray(1, 2, 2, 1, 2, 0, 4, 1, 1, 2));
Geometry point1 = GEOMETRY_FACTORY.createPoint(new Coordinate(1, 0));
Geometry point2 = GEOMETRY_FACTORY.createPoint(new Coordinate(40, 10));
Geometry point3 = GEOMETRY_FACTORY.createPoint(new Coordinate(-10, -40));
GeometryCollection multiPoint = GEOMETRY_FACTORY.createGeometryCollection(new Geometry[] {point1, point2, point3});
Double actual = Functions.hausdorffDistance(polygon, multiPoint);
Double expected = 41.7612260356422;
assertEquals(expected, actual);
}

@Test
public void hausdorffDistanceGeomCollection() throws Exception {
Polygon polygon = GEOMETRY_FACTORY.createPolygon(coordArray(1, 2, 2, 1, 2, 0, 4, 1, 1, 2));
LineString lineString1 = GEOMETRY_FACTORY.createLineString(coordArray(1, 1, 2, 1, 4, 4, 5, 5));
LineString lineString2 = GEOMETRY_FACTORY.createLineString(coordArray(10, 10, 11, 11, 12, 12, 14, 14));
LineString lineString3 = GEOMETRY_FACTORY.createLineString(coordArray(-11, -20, -11, -21, -15, -19));
MultiLineString multiLineString = GEOMETRY_FACTORY.createMultiLineString(new LineString[] {lineString1, lineString2, lineString3});
Double actual = Functions.hausdorffDistance(polygon, multiLineString, 0.0000001);
Double expected = 25.495097567963924;
assertEquals(expected, actual);
}

@Test
public void hausdorffDistanceEmptyGeom() throws Exception {
Polygon polygon = GEOMETRY_FACTORY.createPolygon(coordArray(1, 2, 2, 1, 2, 0, 4, 1, 1, 2));
LineString emptyLineString = GEOMETRY_FACTORY.createLineString();
Double expected = 0.0;
Double actual = Functions.hausdorffDistance(polygon, emptyLineString, 0.00001);
assertEquals(expected, actual);
}

@Test
public void hausdorffDistanceDefaultEmptyGeom() throws Exception {
Polygon polygon = GEOMETRY_FACTORY.createPolygon(coordArray(1, 2, 2, 1, 2, 0, 4, 1, 1, 2));
LineString emptyLineString = GEOMETRY_FACTORY.createLineString();
Double expected = 0.0;
Double actual = Functions.hausdorffDistance(polygon, emptyLineString);
assertEquals(expected, actual);
}
}
40 changes: 40 additions & 0 deletions docs/api/flink/Function.md
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,46 @@ SELECT ST_GeometryN(ST_GeomFromText('MULTIPOINT((1 2), (3 4), (5 6), (8 9))'), 1

Output: `POINT (3 4)`

## ST_HausdorffDistance

Introduction: Returns a discretized (and hence approximate) [Hausdorff distance](https://en.wikipedia.org/wiki/Hausdorff_distance) between the given 2 geometries.
Optionally, a densityFraction parameter can be specified, which gives more accurate results by densifying segments before computing hausdorff distance between them.
Each segment is broken down into equal-length subsegments whose ratio with segment length is closest to the given density fraction.

Hence, the lower the densityFrac value, the more accurate is the computed hausdorff distance, and the more time it takes to compute it.

If any of the geometry is empty, 0.0 is returned.


!!!Note
Accepted range of densityFrac is (0.0, 1.0], if any other value is provided, ST_HausdorffDistance throws an IllegalArgumentException


!!!Note
Even though the function accepts 3D geometry, the z ordinate is ignored and the computed hausdorff distance is equivalent to the geometries not having the z ordinate.

Format: `ST_HausdorffDistance(g1: geometry, g2: geometry, densityFrac)`

Since: `v1.5.0`

Example:
```sql
SELECT ST_HausdorffDistance(g1, g2, 0.1)
```

Input: `g1: POINT (0.0 1.0), g2: LINESTRING (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)`

Output: `5.0990195135927845`

```sql
SELECT ST_HausdorffDistance(ST_GeomFromText(), ST_GeomFromText())
```

Input: `g1: POLYGON Z((1 0 1, 1 1 2, 2 1 5, 2 0 1, 1 0 1)), g2: POLYGON Z((4 0 4, 6 1 4, 6 4 9, 6 1 3, 4 0 4))`

Output: `5.0`


## ST_InteriorRingN

Introduction: Returns the Nth interior linestring ring of the polygon geometry. Returns NULL if the geometry is not a polygon or the given N is out of range
Expand Down
38 changes: 38 additions & 0 deletions docs/api/sql/Function.md
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,44 @@ SELECT ST_GeometryType(polygondf.countyshape)
FROM polygondf
```

## ST_HausdorffDistance

Introduction: Returns a discretized (and hence approximate) [Hausdorff distance](https://en.wikipedia.org/wiki/Hausdorff_distance) between the given 2 geometries.
Optionally, a densityFraction parameter can be specified, which gives more accurate results by densifying segments before computing hausdorff distance between them.
Each segment is broken down into equal-length subsegments whose ratio with segment length is closest to the given density fraction.

Hence, the lower the densityFrac value, the more accurate is the computed hausdorff distance, and the more time it takes to compute it.

If any of the geometry is empty, 0.0 is returned.

!!!Note
Accepted range of densityFrac is (0.0, 1.0], if any other value is provided, ST_HausdorffDistance throws an IllegalArgumentException


!!!Note
Even though the function accepts 3D geometry, the z ordinate is ignored and the computed hausdorff distance is equivalent to the geometries not having the z ordinate.

Format: `ST_HausdorffDistance(g1: geometry, g2: geometry, densityFrac)`

Since: `v1.5.0`

Example:
```sql
SELECT ST_HausdorffDistance(g1, g2, 0.1)
```

Input: `g1: POINT (0.0 1.0), g2: LINESTRING (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)`

Output: `5.0990195135927845`

```sql
SELECT ST_HausdorffDistance(ST_GeomFromText(), ST_GeomFromText())
```

Input: `g1: POLYGON Z((1 0 1, 1 1 2, 2 1 5, 2 0 1, 1 0 1)), g2: POLYGON Z((4 0 4, 6 1 4, 6 4 9, 6 1 3, 4 0 4))`

Output: `5.0`

## ST_InteriorRingN

Introduction: Returns the Nth interior linestring ring of the polygon geometry. Returns NULL if the geometry is not a polygon or the given N is out of range
Expand Down
1 change: 1 addition & 0 deletions flink/src/main/java/org/apache/sedona/flink/Catalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ public static UserDefinedFunction[] getFuncs() {
new Functions.ST_NRings(),
new Functions.ST_Translate(),
new Functions.ST_BoundingDiagonal(),
new Functions.ST_HausdorffDistance(),
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -624,12 +624,29 @@ public Geometry eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.j
}

public static class ST_BoundingDiagonal extends ScalarFunction {

@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class)
public Geometry eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object o) {
Geometry geometry = (Geometry) o;
return org.apache.sedona.common.Functions.boundingDiagonal(geometry);
}
}

public static class ST_HausdorffDistance extends ScalarFunction {
@DataTypeHint("Double")
public Double eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object g1,
@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object g2,
@DataTypeHint("Double") Double densityFrac) throws Exception {
Geometry geom1 = (Geometry) g1;
Geometry geom2 = (Geometry) g2;
return org.apache.sedona.common.Functions.hausdorffDistance(geom1, geom2, densityFrac);
}

@DataTypeHint("Double")
public Double eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object g1,
@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object g2) throws Exception {
Geometry geom1 = (Geometry) g1;
Geometry geom2 = (Geometry) g2;
return org.apache.sedona.common.Functions.hausdorffDistance(geom1, geom2);
}
}
}
12 changes: 12 additions & 0 deletions flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -745,4 +745,16 @@ public void testBoundingDiagonal() {
assertEquals(expected, actual);
}

@Test
public void testHausdorffDistance() {
Table polyTable = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('POINT (0.0 1.0)') AS g1, ST_GeomFromWKT('LINESTRING (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)') AS g2");
Table actualTable = polyTable.select(call(Functions.ST_HausdorffDistance.class.getSimpleName(), $("g1"), $("g2"), 0.4));
Table actualTableDefault = polyTable.select(call(Functions.ST_HausdorffDistance.class.getSimpleName(), $("g1"), $("g2")));
Double expected = 5.0990195135927845;
Double expectedDefault = 5.0990195135927845;
Double actual = (Double) first(actualTable).getField(0);
Double actualDefault = (Double) first(actualTableDefault).getField(0);
assertEquals(expected, actual);
assertEquals(expectedDefault, actualDefault);
}
}
13 changes: 13 additions & 0 deletions python/sedona/sql/st_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,3 +1288,16 @@ def ST_BoundingDiagonal(geometry: ColumnOrName) -> Column:
"""

return _call_st_function("ST_BoundingDiagonal", geometry)

@validate_argument_types
def ST_HausdorffDistance(g1: ColumnOrName, g2: ColumnOrName, densityFrac: Optional[Union[ColumnOrName, float]] = -1) -> Column:
"""
Returns discretized (and hence approximate) hausdorff distance between two given geometries.
Optionally, a distance fraction can also be provided which decreases the gap between actual and discretized hausforff distance
:param g1:
:param g2:
:param densityFrac: Optional
:return:
"""
args = (g1, g2, densityFrac)
return _call_st_function("ST_HausdorffDistance", args)
3 changes: 3 additions & 0 deletions python/tests/sql/test_dataframe_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
(stf.ST_GeometricMedian, ("multipoint",), "multipoint_geom", "", "POINT (22.500002656424286 21.250001168173426)"),
(stf.ST_GeometryN, ("geom", 0), "multipoint", "", "POINT (0 0)"),
(stf.ST_GeometryType, ("point",), "point_geom", "", "ST_Point"),
(stf.ST_HausdorffDistance, ("point", "line",), "point_and_line", "", 5.0990195135927845),
(stf.ST_InteriorRingN, ("geom", 0), "geom_with_hole", "", "LINESTRING (1 1, 2 2, 2 1, 1 1)"),
(stf.ST_Intersection, ("a", "b"), "overlapping_polys", "", "POLYGON ((2 0, 1 0, 1 1, 2 1, 2 0))"),
(stf.ST_IsClosed, ("geom",), "closed_linestring_geom", "", True),
Expand Down Expand Up @@ -391,6 +392,8 @@ def base_df(self, request):
return TestDataFrameAPI.spark.sql("SELECT ST_GeomFromWKT('LINESTRING (0 0, 2 1)') AS line, ST_GeomFromWKT('POLYGON ((1 0, 2 0, 2 2, 1 2, 1 0))') AS poly")
elif request.param == "square_geom":
return TestDataFrameAPI.spark.sql("SELECT ST_GeomFromWKT('POLYGON ((1 0, 1 1, 2 1, 2 0, 1 0))') AS geom")
elif request.param == "point_and_line":
return TestDataFrameAPI.spark.sql("SELECT ST_GeomFromWKT('POINT (0.0 1.0)') AS point, ST_GeomFromWKT('LINESTRING (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)') AS line")
raise ValueError(f"Invalid base_df name passed: {request.param}")

def _id_test_configuration(val):
Expand Down
14 changes: 14 additions & 0 deletions python/tests/sql/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,3 +1104,17 @@ def test_boundingDiagonal(self):
"1 0))')) AS geom")
actual = actual_df.selectExpr("ST_AsText(geom)").take(1)[0][0]
assert expected == actual

def test_hausdorffDistance(self):
expected = 5.0
actual_df = self.spark.sql("SELECT ST_HausdorffDistance(ST_GeomFromText('POLYGON ((1 0 1, 1 1 2, 2 1 5, "
"2 0 1, 1 0 1))'), ST_GeomFromText('POLYGON ((4 0 4, 6 1 4, 6 4 9, 6 1 3, "
"4 0 4))'), 0.5)")
actual_df_default = self.spark.sql("SELECT ST_HausdorffDistance(ST_GeomFromText('POLYGON ((1 0 1, 1 1 2, "
"2 1 5, "
"2 0 1, 1 0 1))'), ST_GeomFromText('POLYGON ((4 0 4, 6 1 4, 6 4 9, 6 1 3, "
"4 0 4))'))")
actual = actual_df.take(1)[0][0]
actual_default = actual_df_default.take(1)[0][0]
assert expected == actual
assert expected == actual_default
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ object Catalog {
function[ST_NRings](),
function[ST_Translate](0.0),
function[ST_BoundingDiagonal](),
function[ST_HausdorffDistance](-1),
// Expression for rasters
function[RS_NormalizedDifference](),
function[RS_Mean](),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1016,3 +1016,10 @@ case class ST_BoundingDiagonal(inputExpressions: Seq[Expression])
copy(inputExpressions = newChildren)
}
}

case class ST_HausdorffDistance(inputExpressions: Seq[Expression])
extends InferredTernaryExpression(Functions.hausdorffDistance) with FoldableExpression {
protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -333,4 +333,12 @@ object st_functions extends DataFrameAPI {
def ST_BoundingDiagonal(geometry: String) =
wrapExpression[ST_BoundingDiagonal](geometry)

def ST_HausdorffDistance(g1: Column, g2: Column) = wrapExpression[ST_HausdorffDistance](g1, g2, -1)

def ST_HausdorffDistance(g1: String, g2: String) = wrapExpression[ST_HausdorffDistance](g1, g2, -1);

def ST_HausdorffDistance(g1: Column, g2: Column, densityFrac: Column) = wrapExpression[ST_HausdorffDistance](g1, g2, densityFrac);

def ST_HausdorffDistance(g1: String, g2: String, densityFrac: Double) = wrapExpression[ST_HausdorffDistance](g1, g2, densityFrac);

}
Original file line number Diff line number Diff line change
Expand Up @@ -1003,5 +1003,18 @@ class dataFrameAPITestScala extends TestBaseScala {
val actual = wKTWriter.write(df.take(1)(0).get(0).asInstanceOf[Geometry])
assertEquals(expected, actual)
}

it("Passed ST_HausdorffDistance") {
val polyDf = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON ((1 2, 2 1, 2 0, 4 1, 1 2))') AS g1, " +
"ST_GeomFromWKT('MULTILINESTRING ((1 1, 2 1, 4 4, 5 5), (10 10, 11 11, 12 12, 14 14), (-11 -20, -11 -21, -15 -19))') AS g2")
val df = polyDf.select(ST_HausdorffDistance("g1", "g2", 0.05))
val dfDefaultValue = polyDf.select(ST_HausdorffDistance("g1", "g2"))
val expected = 25.495097567963924
val actual = df.take(1)(0).get(0).asInstanceOf[Double]
val actualDefaultValue = dfDefaultValue.take(1)(0).get(0).asInstanceOf[Double]
assert(expected == actual)
assert(expected == actualDefaultValue)
}

}
}
Loading

0 comments on commit c28f016

Please sign in to comment.