From 236bd743be986e0afb9e2acca27d8f2d5466c7c0 Mon Sep 17 00:00:00 2001 From: Ram Sriharsha Date: Mon, 18 Sep 2017 15:14:15 +0200 Subject: [PATCH] Refactor polyline (#166) * Add tests for polyline functions * Refactor Polyline * cleanup --- src/main/scala/magellan/PolyLine.scala | 166 ++++++++++-------- src/main/scala/magellan/Shape.scala | 8 + src/main/scala/magellan/geometry/Curve.scala | 46 +++++ src/main/scala/magellan/geometry/Loop.scala | 17 +- src/main/scala/magellan/geometry/R2Loop.scala | 5 +- .../apache/spark/sql/types/PolyLineUDT.scala | 22 +-- src/test/scala/magellan/GeoJSONSuite.scala | 8 +- src/test/scala/magellan/OsmSuite.scala | 15 +- src/test/scala/magellan/PolyLineSuite.scala | 59 +++++++ src/test/scala/magellan/TestingUtils.scala | 27 ++- src/test/scala/magellan/WKTParserSuite.scala | 6 +- .../magellan/catalyst/ExpressionSuite.scala | 47 ++++- 12 files changed, 301 insertions(+), 125 deletions(-) create mode 100644 src/main/scala/magellan/geometry/Curve.scala create mode 100644 src/test/scala/magellan/PolyLineSuite.scala diff --git a/src/main/scala/magellan/PolyLine.scala b/src/main/scala/magellan/PolyLine.scala index 8c4d3e9..9ab4b5d 100644 --- a/src/main/scala/magellan/PolyLine.scala +++ b/src/main/scala/magellan/PolyLine.scala @@ -17,9 +17,12 @@ package magellan import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty} +import magellan.geometry.{Curve, R2Loop} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.types._ -import scala.util.control.Breaks._ +import scala.collection.mutable.ArrayBuffer /** * A PolyLine is an ordered set of vertices that consists of one or more parts. @@ -28,80 +31,105 @@ import scala.util.control.Breaks._ * Parts may or may not intersect one another */ @SQLUserDefinedType(udt = classOf[PolyLineUDT]) -class PolyLine( - val indices: Array[Int], - val xcoordinates: Array[Double], - val ycoordinates: Array[Double], - override val boundingBox: BoundingBox) extends Shape { +class PolyLine extends Shape { + + private var indices: Array[Int] = _ + private var xcoordinates: Array[Double] = _ + private var ycoordinates: Array[Double] = _ + + @transient var curves = new ArrayBuffer[Curve]() + + @JsonIgnore private var _boundingBox: BoundingBox = _ + + private[magellan] def init( + indices: Array[Int], + xcoordinates: Array[Double], + ycoordinates: Array[Double], + boundingBox: BoundingBox): Unit = { + + this.indices = indices + this.xcoordinates = xcoordinates + this.ycoordinates = ycoordinates + this._boundingBox = boundingBox + // initialize the loops + val offsets = indices.zip(indices.drop(1) ++ Array(xcoordinates.length)) + for ((start, end) <- offsets) { + curves += ({ + val curves = new R2Loop() + curves.init(xcoordinates, ycoordinates, start, end - 1) + curves + }) + } + } + override def getType(): Int = 3 - def this() {this(Array(0), Array(), Array(), BoundingBox(0,0,0,0))} + def init(row: InternalRow): Unit = { + init(row.getArray(5).toIntArray(), + row.getArray(6).toDoubleArray(), + row.getArray(7).toDoubleArray(), + BoundingBox(row.getDouble(1), row.getDouble(2), row.getDouble(3), row.getDouble(4))) + } - override def getType(): Int = 3 + def serialize(): InternalRow = { + val row = new GenericInternalRow(8) + val BoundingBox(xmin, ymin, xmax, ymax) = boundingBox + row.update(0, getType()) + row.update(1, xmin) + row.update(2, ymin) + row.update(3, xmax) + row.update(4, ymax) + row.update(5, new IntegerArrayData(indices)) + row.update(6, new DoubleArrayData(xcoordinates)) + row.update(7, new DoubleArrayData(ycoordinates)) + row + } @JsonProperty - private [magellan] def getXCoordinates(): Array[Double] = xcoordinates + private def getXCoordinates(): Array[Double] = xcoordinates @JsonProperty - private [magellan] def getYCoordinates(): Array[Double] = ycoordinates - - private [magellan] def contains(point:Point): Boolean = { - var startIndex = 0 - var endIndex = 1 - var contains = false - val length = xcoordinates.size - - if(!exceedsBounds(point)) - breakable { - while(endIndex < length) { - val startX = xcoordinates(startIndex) - val startY = ycoordinates(startIndex) - val endX = xcoordinates(endIndex) - val endY = ycoordinates(endIndex) - val slope = (endY - startY)/(endX - startX) - val pointSlope = (endY - point.getY())/(endX - point.getX()) - if(slope == pointSlope) { - contains = true - break - } - startIndex += 1 - endIndex += 1 - } + private def getYCoordinates(): Array[Double] = ycoordinates + + @JsonProperty + override def boundingBox = _boundingBox + + private[magellan] def contains(point: Point): Boolean = { + val numLoops = curves.size + var touches = false + var i = 0 + while (i < numLoops && !touches) { + touches |= curves(i).touches(point) + i += 1 } - contains + touches } - def exceedsBounds(point:Point):Boolean = { - val BoundingBox(pt_xmin, pt_ymin, pt_xmax, pt_ymax) = point.boundingBox - val BoundingBox(xmin, ymin, xmax, ymax) = boundingBox - pt_xmin < xmin && pt_ymin < ymin || - pt_xmax > xmax && pt_ymax > ymax + /** + * A polygon intersects a line iff it is a proper intersection, + * or if either vertex of the line touches the polygon. + * + * @param line + * @return + */ + private [magellan] def intersects(line: Line): Boolean = { + curves exists (_.intersects(line)) } - def intersects(line:Line):Boolean = { - var startIndex = 0 - var endIndex = 1 - var intersects = false - val length = xcoordinates.size - - breakable { - - while(endIndex < length) { - val startX = xcoordinates(startIndex) - val startY = ycoordinates(startIndex) - val endX = xcoordinates(endIndex) - val endY = ycoordinates(endIndex) - // check if any segment intersects incoming line - if(line.intersects(Line(Point(startX, startY), Point(endX, endY)))) { - intersects = true - break - } - startIndex += 1 - endIndex += 1 - } - } - intersects - } + @JsonIgnore + override def isEmpty(): Boolean = xcoordinates.length == 0 + + def length(): Int = xcoordinates.length + + def getVertex(index: Int) = Point(xcoordinates(index), ycoordinates(index)) + + @JsonProperty + def getRings(): Array[Int] = indices + + @JsonIgnore + def getNumRings(): Int = indices.length + + def getRing(index: Int): Int = indices(index) def canEqual(other: Any): Boolean = other.isInstanceOf[PolyLine] @@ -133,9 +161,6 @@ class PolyLine( ??? } - @JsonIgnore - override def isEmpty(): Boolean = xcoordinates.length == 0 - /*override def jsonValue: JValue = ("type" -> "udt") ~ ("class" -> this.getClass.getName) ~ @@ -171,11 +196,12 @@ object PolyLine { } i += 1 } - new PolyLine( + val polyline = new PolyLine() + polyline.init( indices, points.map(_.getX()), points.map(_.getY()), - BoundingBox(xmin, ymin, xmax, ymax) - ) + BoundingBox(xmin, ymin, xmax, ymax)) + polyline } -} \ No newline at end of file +} diff --git a/src/main/scala/magellan/Shape.scala b/src/main/scala/magellan/Shape.scala index 6a4f476..9d6837a 100644 --- a/src/main/scala/magellan/Shape.scala +++ b/src/main/scala/magellan/Shape.scala @@ -112,12 +112,20 @@ trait Shape extends DataType with Serializable { if (boundingBox.contains(other.boundingBox)) { (this, other) match { + case (p: Point, q: Point) => p.equals(q) + case (p: Point, q: Line) => false case (p: Point, q: Polygon) => false + case (p: Point, q: PolyLine) => false + case (p: Polygon, q: Point) => p.contains(q) case (p: Polygon, q: Line) => p.contains(q) + case (p: Line, q: Point) => p.contains(q) case (p: Line, q: Line) => p.contains(q) + + case (p: PolyLine, q: Point) => p.contains(q) + case _ => ??? } } else { diff --git a/src/main/scala/magellan/geometry/Curve.scala b/src/main/scala/magellan/geometry/Curve.scala new file mode 100644 index 0000000..c6fe5fe --- /dev/null +++ b/src/main/scala/magellan/geometry/Curve.scala @@ -0,0 +1,46 @@ +/** + * Copyright 2015 Ram Sriharsha + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package magellan.geometry + +import magellan.{Line, Point, Relate} + +/** + * A curve consists of a single chain of vertices represents a open curve on the plane. + * + * Curves are not allowed to have any duplicate vertices (whether adjacent or + * not), and non-adjacent edges are not allowed to intersect. Curves must have at + * least 2 vertices. Although these restrictions are not enforced in optimized + * code, you may get unexpected results if they are violated. + */ +trait Curve extends Serializable { + + /** + * Returns true if the curve touches the point, false otherwise. + * + * @param point + * @return + */ + def touches(point: Point): Boolean + + /** + * Returns true if the line intersects (properly or vertex touching) loop, false otherwise. + * + * @param line + * @return + */ + def intersects(line: Line): Boolean + +} diff --git a/src/main/scala/magellan/geometry/Loop.scala b/src/main/scala/magellan/geometry/Loop.scala index 3a9926d..5588191 100644 --- a/src/main/scala/magellan/geometry/Loop.scala +++ b/src/main/scala/magellan/geometry/Loop.scala @@ -15,11 +15,12 @@ */ package magellan.geometry +import magellan.Relate.Touches import magellan.{Line, Point, Relate} /** * - * A Loop represents a simple polygon. It consists of a single + * A Loop represents a closed curve. It consists of a single * chain of vertices where the first vertex is explicitly connected to the last. * * Loops are not allowed to have any duplicate vertices (whether adjacent or @@ -32,7 +33,11 @@ import magellan.{Line, Point, Relate} * that loops do not necessarily contain all (or any) of their vertices. * */ -trait Loop extends Serializable { +trait Loop extends Serializable with Curve { + + override def touches(point: Point) = { + containsOrCrosses(point) == Touches + } /** * A loop contains the given point iff the point is properly contained within the @@ -54,14 +59,6 @@ trait Loop extends Serializable { */ def containsOrCrosses(point: Point): Relate - /** - * Returns true if the line intersects (properly or vertex touching) loop, false otherwise. - * - * @param line - * @return - */ - def intersects(line: Line): Boolean - /** * Returns true if the two loops intersect (properly or vertex touching), false otherwise. * @param loop diff --git a/src/main/scala/magellan/geometry/R2Loop.scala b/src/main/scala/magellan/geometry/R2Loop.scala index 2eb8e21..42de845 100644 --- a/src/main/scala/magellan/geometry/R2Loop.scala +++ b/src/main/scala/magellan/geometry/R2Loop.scala @@ -72,7 +72,10 @@ class R2Loop extends Loop { override def iterator() = new LoopIterator() - override def toString = s"R2Loop($xcoordinates, $ycoordinates, $startIndex, $endIndex)" + override def toString = s"R2Loop(${xcoordinates.mkString(",")}," + + s" ${ycoordinates.mkString(",")}," + + s" $startIndex," + + s" $endIndex)" @inline private def intersects(point: Point, line: Line): Boolean = { val (start, end) = (line.getStart(), line.getEnd()) diff --git a/src/main/scala/org/apache/spark/sql/types/PolyLineUDT.scala b/src/main/scala/org/apache/spark/sql/types/PolyLineUDT.scala index cec6ce1..83bb25b 100644 --- a/src/main/scala/org/apache/spark/sql/types/PolyLineUDT.scala +++ b/src/main/scala/org/apache/spark/sql/types/PolyLineUDT.scala @@ -19,17 +19,7 @@ class PolyLineUDT extends UserDefinedType[PolyLine] with GeometricUDT { )) override def serialize(polyLine: PolyLine): InternalRow = { - val row = new GenericInternalRow(8) - val BoundingBox(xmin, ymin, xmax, ymax) = polyLine.boundingBox - row.update(0, polyLine.getType()) - row.update(1, xmin) - row.update(2, ymin) - row.update(3, xmax) - row.update(4, ymax) - row.update(5, new IntegerArrayData(polyLine.indices)) - row.update(6, new DoubleArrayData(polyLine.xcoordinates)) - row.update(7, new DoubleArrayData(polyLine.ycoordinates)) - row + polyLine.serialize() } override def serialize(shape: Shape) = serialize(shape.asInstanceOf[PolyLine]) @@ -38,13 +28,9 @@ class PolyLineUDT extends UserDefinedType[PolyLine] with GeometricUDT { override def deserialize(datum: Any): PolyLine = { val row = datum.asInstanceOf[InternalRow] - val polyLine = new PolyLine( - row.getArray(5).toIntArray(), - row.getArray(6).toDoubleArray(), - row.getArray(7).toDoubleArray(), - BoundingBox(row.getDouble(1), row.getDouble(2), row.getDouble(3), row.getDouble(4)) - ) - polyLine + val polyline = new PolyLine() + polyline.init(row) + polyline } override def pyUDT: String = "magellan.types.PolyLineUDT" diff --git a/src/test/scala/magellan/GeoJSONSuite.scala b/src/test/scala/magellan/GeoJSONSuite.scala index 5e218de..32c439d 100644 --- a/src/test/scala/magellan/GeoJSONSuite.scala +++ b/src/test/scala/magellan/GeoJSONSuite.scala @@ -84,11 +84,9 @@ class GeoJSONSuite extends FunSuite with TestSparkContext { import sqlCtx.implicits._ val p = df.select($"polyline").first()(0).asInstanceOf[PolyLine] // [ -122.04864044239585, 37.408617050391001 ], [ -122.047741818556602, 37.408915362324983 ] - assert(p.indices.size === 2) - assert(p.xcoordinates.head == -122.04864044239585) - assert(p.ycoordinates.head == 37.408617050391001) - assert(p.xcoordinates.last == -122.047741818556602) - assert(p.ycoordinates.last == 37.408915362324983) + assert(p.getNumRings() === 2) + assert(p.getVertex(0) == Point(-122.04864044239585, 37.408617050391001)) + assert(p.getVertex(1) == Point(-122.047741818556602, 37.408915362324983)) } test("Read Polygon") { diff --git a/src/test/scala/magellan/OsmSuite.scala b/src/test/scala/magellan/OsmSuite.scala index 529d056..2456eae 100644 --- a/src/test/scala/magellan/OsmSuite.scala +++ b/src/test/scala/magellan/OsmSuite.scala @@ -109,16 +109,11 @@ class OsmSuite extends FunSuite with TestSparkContext { .select("polyline") .first()(0).asInstanceOf[PolyLine] - assert(p.xcoordinates.size == 4) - assert(p.ycoordinates.size == 4) - assert(p.xcoordinates(0) == -75.6362879) - assert(p.xcoordinates(1) == -75.6378443) - assert(p.xcoordinates(2) == -75.6382141) - assert(p.xcoordinates(3) == -75.6390858) - assert(p.ycoordinates(0) == 45.4188896) - assert(p.ycoordinates(1) == 45.4191178) - assert(p.ycoordinates(2) == 45.4191290) - assert(p.ycoordinates(3) == 45.4190782) + assert(p.length() == 4) + assert(p.getVertex(0) == Point(-75.6362879, 45.4188896)) + assert(p.getVertex(1) == Point(-75.6378443, 45.4191178)) + assert(p.getVertex(2) == Point(-75.6382141, 45.4191290)) + assert(p.getVertex(3) == Point(-75.6390858, 45.4190782)) } test("read polygon") { diff --git a/src/test/scala/magellan/PolyLineSuite.scala b/src/test/scala/magellan/PolyLineSuite.scala new file mode 100644 index 0000000..6bf30c0 --- /dev/null +++ b/src/test/scala/magellan/PolyLineSuite.scala @@ -0,0 +1,59 @@ +/** + * Copyright 2015 Ram Sriharsha + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package magellan + +import magellan.TestingUtils._ +import org.scalatest.FunSuite + +class PolyLineSuite extends FunSuite { + + test("bounding box") { + val line = Array(Point(1.0, 1.0), Point(1.0, -1.0), + Point(-1.0, -1.0), Point(-1.0, 1.0)) + val polyline = Polygon(Array(0), line) + val BoundingBox(xmin, ymin, xmax, ymax) = polyline.boundingBox + assert(xmin === -1.0) + assert(ymin === -1.0) + assert(xmax === 1.0) + assert(ymax === 1.0) + } + + test("point touches polyline") { + val line = Array(Point(1.0, 1.0), Point(1.0, -1.0), + Point(-1.0, -1.0), Point(-1.0, 1.0)) + val polyline = PolyLine(Array(0), line) + assert(polyline.contains(Point(1.0, 1.0))) + assert(polyline.contains(Point(1.0, 0.0))) + } + + test("line intersects polyline") { + val line = Array(Point(1.0, 1.0), Point(1.0, -1.0), + Point(-1.0, -1.0), Point(-1.0, 1.0)) + val polyline = PolyLine(Array(0), line) + + val candidates = Seq( + (true, "0:0,2:0"), + (true, "1:1,2:2"), + (false, "1.1:1.1,2:2"), + (false, "0:0,0:1") + ) map { + case (cond, str) => + (cond, makeLine(str)) + } + + candidates foreach { case (cond, line) => assert(cond === polyline.intersects(line))} + } +} diff --git a/src/test/scala/magellan/TestingUtils.scala b/src/test/scala/magellan/TestingUtils.scala index 72483f5..a7d3b1b 100644 --- a/src/test/scala/magellan/TestingUtils.scala +++ b/src/test/scala/magellan/TestingUtils.scala @@ -202,20 +202,35 @@ object TestingUtils { val xcoordinates = Array.fill(size)(0.0) val ycoordinates = Array.fill(size)(0.0) var index = 0 - for (token <- tokens) { - val colon = token.indexOf(':') - if (colon == -1) throw new IllegalArgumentException( - "Illegal string:" + token + ". Should look like '35:20'") - val x = token.substring(0, colon).toDouble - val y = token.substring(colon + 1).toDouble + makePoints(str) foreach { case (x: Double, y: Double) => xcoordinates(index) = x ycoordinates(index) = y index += 1 } + val r2Loop = new R2Loop() r2Loop.init(xcoordinates, ycoordinates, 0, size - 1) r2Loop } + + def makeLine(str: String): Line = { + val iter = makePoints(str) map { + case (x: Double, y: Double) => Point(x, y) + } + Line(iter.next(), iter.next()) + } + + private def makePoints(str: String): Iterator[(Double, Double)] = { + val tokens = Splitter.on(',').split(str) + tokens.iterator().map { s => + val colon = s.indexOf(':') + if (colon == -1) throw new IllegalArgumentException( + "Illegal string:" + s + ". Should look like '35:20'") + val x = s.substring(0, colon).toDouble + val y = s.substring(colon + 1).toDouble + (x, y) + } + } } case class MockPointExpr(point: Point) extends LeafExpression with CodegenFallback { diff --git a/src/test/scala/magellan/WKTParserSuite.scala b/src/test/scala/magellan/WKTParserSuite.scala index 202320a..3a179d0 100644 --- a/src/test/scala/magellan/WKTParserSuite.scala +++ b/src/test/scala/magellan/WKTParserSuite.scala @@ -48,14 +48,14 @@ class WKTParserSuite extends FunSuite { test("parse linestring") { var parsed = WKTParser.linestring.parse("LINESTRING (30 10, 10 30, 40 40)") var p: PolyLine = parsed.get.value - assert(p.indices.length === 1) - assert(p.xcoordinates.length === 3) + assert(p.getNumRings() === 1) + assert(p.length === 3) parsed = WKTParser.linestring.parse( "LINESTRING (-79.470579 35.442827,-79.469465 35.444889,-79.468907 35.445829,-79.468294 35.446608,-79.46687 35.447893)") p = parsed.get.value - assert(p.xcoordinates.length === 5) + assert(p.length === 5) } diff --git a/src/test/scala/magellan/catalyst/ExpressionSuite.scala b/src/test/scala/magellan/catalyst/ExpressionSuite.scala index 534c974..a0fac48 100644 --- a/src/test/scala/magellan/catalyst/ExpressionSuite.scala +++ b/src/test/scala/magellan/catalyst/ExpressionSuite.scala @@ -142,6 +142,29 @@ class ExpressionSuite extends FunSuite with TestSparkContext { assert(polyline1.intersects(line) === true) assert(polyline2.intersects(line) === true) assert(polyline3.intersects(line) === false) + + val sqlCtx = this.sqlContext + import sqlCtx.implicits._ + + val lines = sc.parallelize(Seq(("1", line))).toDF("id", "line") + + val polylines = sc.parallelize(Seq( + (true, polyline1), + (true, polyline2), + (false, polyline3))).toDF("cond", "polyline") + + val joined = polylines.join(lines, $"polyline" intersects $"line", "leftOuter"). + select("cond", "line"). + collect(). + map { + case Row(cond: Boolean, line: Line) => + (cond, Some(line)) + + case Row(cond: Boolean, null) => + (cond, None) + } + + joined foreach { case (cond, line) => assert(cond || line.isEmpty)} } test("PolyLine contains Point") { @@ -149,8 +172,28 @@ class ExpressionSuite extends FunSuite with TestSparkContext { val polyline = PolyLine(new Array[Int](3), Array( Point(0.0, 0.0), Point(3.0, 3.0), Point(-2.0, -2.0) )) - assert(polyline.contains(Point(1.0, 1.0)) === true) - assert(polyline.contains(Point(2.0, 1.0)) === false) + + val sqlCtx = this.sqlContext + import sqlCtx.implicits._ + + val polylines = sc.parallelize(Seq(("1", polyline))).toDF("id", "polyline") + + val points = sc.parallelize(Seq( + (true, Point(1.0, 1.0)), + (false, Point(2.0, 1.0)))).toDF("cond", "point") + + val joined = points.join(polylines, $"point" within $"polyline", "leftOuter"). + select("cond", "polyline"). + collect(). + map { + case Row(cond: Boolean, polyline: PolyLine) => + (cond, Some(polyline)) + + case Row(cond: Boolean, null) => + (cond, None) + } + + joined foreach { case (cond, polyline) => assert(cond || polyline.isEmpty)} } test("Point within Range") {