Skip to content

Commit

Permalink
Refactors filtering.py, indexing.py to Accept GeoSeries (#938)
Browse files Browse the repository at this point in the history
closes #936, closes #937

This PR changes `quadtree_on_points` and `points_in_spatial_window` to accept a Geoseries of points.

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - H. Thomson Comer (https://github.com/thomcom)

URL: #938
  • Loading branch information
isVoid authored Feb 21, 2023
1 parent c28a4ac commit 8299bdc
Show file tree
Hide file tree
Showing 6 changed files with 295 additions and 230 deletions.
27 changes: 19 additions & 8 deletions python/cuspatial/benchmarks/api/bench_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,28 +121,35 @@ def bench_points_in_spatial_window(benchmark, gpu_dataframe):
geometry = gpu_dataframe["geometry"]
mean_x, std_x = (geometry.polygons.x.mean(), geometry.polygons.x.std())
mean_y, std_y = (geometry.polygons.y.mean(), geometry.polygons.y.std())
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame(
{"x": geometry.polygons.x, "y": geometry.polygons.y}
).interleave_columns()
)
benchmark(
cuspatial.points_in_spatial_window,
points,
mean_x - std_x,
mean_x + std_x,
mean_y - std_y,
mean_y + std_y,
geometry.polygons.x,
geometry.polygons.y,
)


def bench_quadtree_on_points(benchmark, gpu_dataframe):
polygons = gpu_dataframe["geometry"].polygons
x_points = (cupy.random.random(10000000) - 0.5) * 360
y_points = (cupy.random.random(10000000) - 0.5) * 180
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame({"x": x_points, "y": y_points}).interleave_columns()
)

scale = 5
max_depth = 7
min_size = 125
benchmark(
cuspatial.quadtree_on_points,
x_points,
y_points,
points,
polygons.x.min(),
polygons.x.max(),
polygons.y.min(),
Expand All @@ -160,9 +167,11 @@ def bench_quadtree_point_in_polygon(benchmark, polygons):
scale = 5
max_depth = 7
min_size = 125
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame({"x": x_points, "y": y_points}).interleave_columns()
)
point_indices, quadtree = cuspatial.quadtree_on_points(
x_points,
y_points,
points,
polygons.x.min(),
polygons.x.max(),
polygons.y.min(),
Expand Down Expand Up @@ -215,9 +224,11 @@ def bench_quadtree_point_to_nearest_linestring(benchmark):
polygons = gpu_countries["geometry"].polygons
points_x = gpu_cities["geometry"].points.x
points_y = gpu_cities["geometry"].points.y
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame({"x": points_x, "y": points_y}).interleave_columns()
)
point_indices, quadtree = cuspatial.quadtree_on_points(
points_x,
points_y,
points,
polygons.x.min(),
polygons.x.max(),
polygons.y.min(),
Expand Down
41 changes: 25 additions & 16 deletions python/cuspatial/cuspatial/core/spatial/filtering.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

from cudf import DataFrame
from cudf.core.column import as_column

from cuspatial._lib import spatial_window
from cuspatial.utils.column_utils import normalize_point_columns
from cuspatial.core.geoseries import GeoSeries
from cuspatial.utils.column_utils import contains_only_points


def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys):
def points_in_spatial_window(points: GeoSeries, min_x, max_x, min_y, max_y):
"""Return only the subset of coordinates that fall within a
rectangular window.
Expand All @@ -19,32 +20,40 @@ def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys):
Parameters
----------
min_x
points: GeoSeries
A geoseries of points
min_x: float
lower x-coordinate of the query window
max_x
max_x: float
upper x-coordinate of the query window
min_y
min_y: float
lower y-coordinate of the query window
max_y
max_y: float
upper y-coordinate of the query window
xs
column of x-coordinates that may fall within the window
ys
column of y-coordinates that may fall within the window
Returns
-------
result : cudf.DataFrame
subset of `(x, y)` pairs above that fall within the window
result : GeoSeries
subset of `points` above that fall within the window
Notes
-----
* Swaps ``min_x`` and ``max_x`` if ``min_x > max_x``
* Swaps ``min_y`` and ``max_y`` if ``min_y > max_y``
"""
xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
return DataFrame._from_data(

if len(points) == 0:
return GeoSeries([])

if not contains_only_points(points):
raise ValueError("GeoSeries must contain only points.")

xs = as_column(points.points.x)
ys = as_column(points.points.y)

res_xy = DataFrame._from_data(
*spatial_window.points_in_spatial_window(
min_x, max_x, min_y, max_y, xs, ys
)
)
).interleave_columns()
return GeoSeries.from_points_xy(res_xy)
20 changes: 12 additions & 8 deletions python/cuspatial/cuspatial/core/spatial/indexing.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

import warnings

from cudf import DataFrame, Series
from cudf.core.column import as_column

from cuspatial import GeoSeries
from cuspatial._lib.quadtree import (
quadtree_on_points as cpp_quadtree_on_points,
)
from cuspatial.utils.column_utils import normalize_point_columns
from cuspatial.utils.column_utils import contains_only_points


def quadtree_on_points(
xs, ys, x_min, x_max, y_min, y_max, scale, max_depth, max_size
points: GeoSeries, x_min, x_max, y_min, y_max, scale, max_depth, max_size
):
"""
Construct a quadtree from a set of points for a given area-of-interest
bounding box.
Parameters
----------
xs
Column of x-coordinates for each point.
ys
Column of y-coordinates for each point.
points
Series of points.
x_min
The lower-left x-coordinate of the area of interest bounding box.
x_max
Expand Down Expand Up @@ -157,7 +156,12 @@ def quadtree_on_points(
Length: 120, dtype: int32
"""

xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
if not len(points) == 0 and not contains_only_points(points):
raise ValueError("GeoSeries must contain only points.")

xs = as_column(points.points.x)
ys = as_column(points.points.y)

x_min, x_max, y_min, y_max = (
min(x_min, x_max),
max(x_min, x_max),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.

import geopandas as gpd
import pytest

import cudf
from geopandas.testing import assert_geoseries_equal
from shapely.geometry import Point

import cuspatial


def test_zeros():
result = cuspatial.points_in_spatial_window( # noqa: F841
0, 0, 0, 0, cudf.Series([0.0]), cudf.Series([0.0])
cuspatial.GeoSeries([Point(0, 0)]), 0, 0, 0, 0
)
assert result.empty


def test_centered():
result = cuspatial.points_in_spatial_window(
-1, 1, -1, 1, cudf.Series([0.0]), cudf.Series([0.0])
)
cudf.testing.assert_frame_equal(
result, cudf.DataFrame({"x": [0.0], "y": [0.0]})
)
s = cuspatial.GeoSeries([Point(0, 0)])
result = cuspatial.points_in_spatial_window(s, -1, 1, -1, 1)

assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([Point(0, 0)]))


@pytest.mark.parametrize(
Expand All @@ -29,38 +28,48 @@ def test_centered():
def test_corners(coords):
x, y = coords
result = cuspatial.points_in_spatial_window(
-1.1, 1.1, -1.1, 1.1, cudf.Series([x]), cudf.Series([y])
)
cudf.testing.assert_frame_equal(
result, cudf.DataFrame({"x": [x], "y": [y]})
cuspatial.GeoSeries([Point(x, y)]), -1.1, 1.1, -1.1, 1.1
)
assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([Point(x, y)]))


def test_pair():
result = cuspatial.points_in_spatial_window(
-1.1, 1.1, -1.1, 1.1, cudf.Series([0.0, 1.0]), cudf.Series([1.0, 0.0])
cuspatial.GeoSeries([Point(0, 1), Point(1, 0)]), -1.1, 1.1, -1.1, 1.1
)
cudf.testing.assert_frame_equal(
result, cudf.DataFrame({"x": [0.0, 1.0], "y": [1.0, 0.0]})
assert_geoseries_equal(
result.to_geopandas(), gpd.GeoSeries([Point(0, 1), Point(1, 0)])
)


def test_oob():
result = cuspatial.points_in_spatial_window(
-1, 1, -1, 1, cudf.Series([-2.0, 2.0]), cudf.Series([2.0, -2.0])
cuspatial.GeoSeries([Point(-2.0, 2.0), Point(2.0, -2.0)]),
-1,
1,
-1,
1,
)
cudf.testing.assert_frame_equal(result, cudf.DataFrame({"x": [], "y": []}))
assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([]))


def test_half():
result = cuspatial.points_in_spatial_window(
cuspatial.GeoSeries(
[
Point(-1.0, 1.0),
Point(1.0, -1.0),
Point(3.0, 3.0),
Point(-3.0, -3.0),
]
),
-2,
2,
-2,
2,
cudf.Series([-1.0, 1.0, 3.0, -3.0]),
cudf.Series([1.0, -1.0, 3.0, -3.0]),
)
cudf.testing.assert_frame_equal(
result, cudf.DataFrame({"x": [-1.0, 1.0], "y": [1.0, -1.0]})

assert_geoseries_equal(
result.to_geopandas(),
gpd.GeoSeries([Point(-1.0, 1.0), Point(1.0, -1.0)]),
)
Loading

0 comments on commit 8299bdc

Please sign in to comment.