Skip to content

Commit

Permalink
Refactors API to accept GeoSeries input for projection and trajectory…
Browse files Browse the repository at this point in the history
… functions (#955)

This PR refactors API to accept GeoSeries input for projection and trajectory functions.

closes #949, closes #950, closes #951, closes #952

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - H. Thomson Comer (https://github.com/thomcom)

URL: #955
  • Loading branch information
isVoid authored Feb 23, 2023
1 parent 8e22478 commit ef368f0
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 130 deletions.
36 changes: 29 additions & 7 deletions python/cuspatial/benchmarks/api/bench_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,46 @@ def bench_derive_trajectories(benchmark, sorted_trajectories):
timestamps = cupy.random.random(10000) * 10000
x = cupy.random.random(10000)
y = cupy.random.random(10000)
benchmark(cuspatial.derive_trajectories, ids, x, y, timestamps)
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame({"x": x, "y": y}).interleave_columns()
)
benchmark(cuspatial.derive_trajectories, ids, points, timestamps)


def bench_trajectory_distances_and_speeds(benchmark, sorted_trajectories):
length = len(cudf.Series(sorted_trajectories[1]).unique())
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame(
{
"x": sorted_trajectories[0]["x"],
"y": sorted_trajectories[0]["y"],
}
).interleave_columns()
)
benchmark(
cuspatial.trajectory_distances_and_speeds,
length,
sorted_trajectories[0]["object_id"],
sorted_trajectories[0]["x"],
sorted_trajectories[0]["y"],
points,
sorted_trajectories[0]["timestamp"],
)


def bench_trajectory_bounding_boxes(benchmark, sorted_trajectories):
length = len(cudf.Series(sorted_trajectories[1]).unique())
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame(
{
"x": sorted_trajectories[0]["x"],
"y": sorted_trajectories[0]["y"],
}
).interleave_columns()
)
benchmark(
cuspatial.trajectory_bounding_boxes,
length,
sorted_trajectories[0]["object_id"],
sorted_trajectories[0]["x"],
sorted_trajectories[0]["y"],
points,
)


Expand All @@ -69,12 +86,17 @@ def bench_sinusoidal_projection(benchmark, gpu_dataframe):
afghanistan = gpu_dataframe["geometry"][
gpu_dataframe["name"] == "Afghanistan"
]
lonlat = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame(
{"lon": afghanistan.polygons.y, "lat": afghanistan.polygons.x}
).interleave_columns()
)

benchmark(
cuspatial.sinusoidal_projection,
afghanistan.polygons.y.mean(),
afghanistan.polygons.x.mean(),
afghanistan.polygons.y,
afghanistan.polygons.x,
lonlat,
)


Expand Down
6 changes: 5 additions & 1 deletion python/cuspatial/benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
Polygon,
)

import cudf

import cuspatial


Expand Down Expand Up @@ -142,7 +144,9 @@ def sorted_trajectories():
timestamps = cp.random.random(10000) * 10000
x = cp.random.random(10000)
y = cp.random.random(10000)
return cuspatial.derive_trajectories(ids, x, y, timestamps)
xy = cudf.DataFrame({"x": x, "y": y}).interleave_columns()
points = cuspatial.GeoSeries.from_points_xy(xy)
return cuspatial.derive_trajectories(ids, points, timestamps)


@pytest_cases.fixture()
Expand Down
40 changes: 26 additions & 14 deletions python/cuspatial/cuspatial/core/spatial/projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,15 @@
from cuspatial._lib.spatial import (
sinusoidal_projection as cpp_sinusoidal_projection,
)
from cuspatial.core.geoseries import GeoSeries
from cuspatial.utils.column_utils import (
contain_single_type_geometry,
contains_only_multipoints,
contains_only_points,
)


def sinusoidal_projection(origin_lon, origin_lat, input_lon, input_lat):
def sinusoidal_projection(origin_lon, origin_lat, lonlat: GeoSeries):
"""
Sinusoidal projection of longitude/latitude relative to origin to
Cartesian (x/y) coordinates in km.
Expand All @@ -26,23 +32,29 @@ def sinusoidal_projection(origin_lon, origin_lat, input_lon, input_lat):
origin_lat : ``number``
latitude offset (this is subtracted from each input before
converting to x,y)
input_lon : ``Series`` or ``list``
longitude coordinates to convert to x
input_lat : ``Series`` or ``list``
latitude coordinates to convert to y
lonlat: GeoSeries
A GeoSeries of Points that contains the longitude and latitude
to transform
Returns
-------
result : cudf.DataFrame
x : cudf.Series
x-coordinate of the input relative to the size of the Earth in
kilometers.
y : cudf.Series
y-coordinate of the input relative to the size of the Earth in
kilometers.
result : GeoSeries
A GeoSeries that contains the transformed coordinates.
"""

if contain_single_type_geometry(lonlat):
if not contains_only_points(lonlat) or contains_only_multipoints(
lonlat
):
raise ValueError("`lonlat` must contain only POINTS geometry.")

result = cpp_sinusoidal_projection(
origin_lon, origin_lat, input_lon._column, input_lat._column
origin_lon,
origin_lat,
lonlat.points.x._column,
lonlat.points.y._column,
)
return DataFrame({"x": result[0], "y": result[1]})
lonlat_transformed = DataFrame(
{"x": result[0], "y": result[1]}
).interleave_columns()
return GeoSeries.from_points_xy(lonlat_transformed)
45 changes: 26 additions & 19 deletions python/cuspatial/cuspatial/core/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,28 @@
from cudf import DataFrame, Series
from cudf.core.column import as_column

from cuspatial import GeoSeries
from cuspatial._lib.trajectory import (
derive_trajectories as cpp_derive_trajectories,
trajectory_bounding_boxes as cpp_trajectory_bounding_boxes,
trajectory_distances_and_speeds as cpp_trajectory_distances_and_speeds,
)
from cuspatial.utils.column_utils import (
normalize_point_columns,
contains_only_points,
normalize_timestamp_column,
)


def derive_trajectories(object_ids, xs, ys, timestamps):
def derive_trajectories(object_ids, points: GeoSeries, timestamps):
"""
Derive trajectories from object ids, points, and timestamps.
Parameters
----------
object_ids
column of object (e.g., vehicle) ids
xs
column of x-coordinates (in kilometers)
ys
column of y-coordinates (in kilometers)
points : GeoSeries
The points of the trajectories
timestamps
column of timestamps in any resolution
Expand Down Expand Up @@ -62,16 +61,20 @@ def derive_trajectories(object_ids, xs, ys, timestamps):
3 1 1.0 1.0 1970-01-01 00:00:10
"""

if len(points) > 0 and not contains_only_points(points):
raise ValueError("`points` must only contain point geometries.")

object_ids = as_column(object_ids, dtype=np.int32)
xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
xs = as_column(points.points.x)
ys = as_column(points.points.y)
timestamps = normalize_timestamp_column(as_column(timestamps))
objects, traj_offsets = cpp_derive_trajectories(
object_ids, xs, ys, timestamps
)
return DataFrame._from_data(*objects), Series(data=traj_offsets)


def trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys):
def trajectory_bounding_boxes(num_trajectories, object_ids, points: GeoSeries):
"""Compute the bounding boxes of sets of trajectories.
Parameters
Expand All @@ -80,10 +83,8 @@ def trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys):
number of trajectories (unique object ids)
object_ids
column of object (e.g., vehicle) ids
xs
column of x-coordinates (in kilometers)
ys
column of y-coordinates (in kilometers)
points: GeoSeries
Series of trajectory points
Returns
-------
Expand Down Expand Up @@ -121,15 +122,19 @@ def trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys):
1 1.0 1.0 3.0 3.0
"""

if len(points) > 0 and not contains_only_points(points):
raise ValueError("`points` must only contain point geometries.")

object_ids = as_column(object_ids, dtype=np.int32)
xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
xs = as_column(points.points.x)
ys = as_column(points.points.y)
return DataFrame._from_data(
*cpp_trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys)
)


def trajectory_distances_and_speeds(
num_trajectories, object_ids, xs, ys, timestamps
num_trajectories, object_ids, points: GeoSeries, timestamps
):
"""
Compute the distance traveled and speed of sets of trajectories
Expand All @@ -140,10 +145,8 @@ def trajectory_distances_and_speeds(
number of trajectories (unique object ids)
object_ids
column of object (e.g., vehicle) ids
xs
column of x-coordinates (in kilometers)
ys
column of y-coordinates (in kilometers)
points: GeoSeries
A series of points
timestamps
column of timestamps in any resolution
Expand Down Expand Up @@ -174,8 +177,12 @@ def trajectory_distances_and_speeds(
1 1414.213562 141.421356
"""

if len(points) > 0 and not contains_only_points(points):
raise ValueError("`points` must only contain point geometries.")

object_ids = as_column(object_ids, dtype=np.int32)
xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
xs = as_column(points.points.x)
ys = as_column(points.points.y)
timestamps = normalize_timestamp_column(as_column(timestamps))
df = DataFrame._from_data(
*cpp_trajectory_distances_and_speeds(
Expand Down
Loading

0 comments on commit ef368f0

Please sign in to comment.