Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor spatial related functions under spatial package #656

Merged
merged 23 commits into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0700863
Move trajectory into sub-package
isVoid Aug 15, 2022
6a9b0dd
Organize test files in the same folder structure
isVoid Aug 15, 2022
eb35b2c
Group interpolate under `trajectory` category.
isVoid Aug 15, 2022
a007d57
reorganize spline tests
isVoid Aug 15, 2022
265874a
Create new hierarchy for spatial functions
isVoid Aug 16, 2022
bf01510
Organize tests in the same hierarchy as code
isVoid Aug 16, 2022
b2017fb
Refactor `CubicSpline` to be independent of `trajectory` component.
isVoid Aug 16, 2022
943c5a8
Match test files structure
isVoid Aug 16, 2022
8125efe
style
isVoid Aug 16, 2022
dd78588
Merge branch 'refactor/trajectory' into refactor/spatial
isVoid Aug 17, 2022
4b77ee2
Update documentation
isVoid Aug 17, 2022
e0845f2
Update top level module visibility
isVoid Aug 17, 2022
060c562
Merge branch 'branch-22.10' of https://github.com/rapidsai/cuspatial …
isVoid Aug 18, 2022
bed6c62
add sections to spatial api reference
isVoid Aug 18, 2022
b14806d
Revert "Refactor `CubicSpline` to be independent of `trajectory` comp…
isVoid Aug 19, 2022
5915ce9
Revert "Move trajectory into sub-package"
isVoid Aug 19, 2022
9d3ab98
Expose spatial package to top level API
isVoid Aug 19, 2022
d673faa
style, license date
isVoid Aug 19, 2022
30c3f87
Add project wide config file for black
isVoid Aug 19, 2022
7b7f7eb
black format
isVoid Aug 19, 2022
a812008
renaming the files to new suggestion
isVoid Aug 23, 2022
516157a
Update docs/source/api_docs/spatial.rst
isVoid Aug 23, 2022
2219db0
Merge branch 'branch-22.10' of https://github.com/rapidsai/cuspatial …
isVoid Aug 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor CubicSpline to be independent of trajectory component.
  • Loading branch information
isVoid committed Aug 16, 2022
commit b2017fb6db0ff3e977e260e3ee6967125711c4e4
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._version import get_versions
from .core.trajectory import interpolate
from .core import interpolate
from .core.gis import (
directed_hausdorff_distance,
haversine_distance,
Expand All @@ -10,7 +10,7 @@
pairwise_linestring_distance,
)
from .core.indexing import quadtree_on_points
from .core.trajectory import CubicSpline
from .core.interpolate import CubicSpline
from .core.spatial_join import (
join_quadtree_and_bounding_boxes,
quadtree_point_in_polygon,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
cubicspline_interpolate,
)


def _cubic_spline_coefficients(x, y, ids, prefix_sums):
x_c = x._column
y_c = y._column
Expand All @@ -36,87 +35,82 @@ class CubicSpline:
"""
Fits each column of the input Series `y` to a hermetic cubic spline.

``cuspatial.CubicSpline`` supports two usage patterns: The first is
identical to scipy.interpolate.CubicSpline::

curve = cuspatial.CubicSpline(t, y)
new_points = curve(np.linspace(t.min, t.max, 50))

This allows API parity with scipy. This isn't recommended, as scipy
host based interpolation performance is likely to exceed GPU performance
for a single curve.

However, cuSpatial significantly outperforms scipy when many splines are
``cuspatial.CubicSpline`` supports basic usage identical to
scipy.interpolate.CubicSpline::

curve = cuspatial.CubicSpline(x, y)
new_points = curve(np.linspace(x.min, x.max, 50))

Parameters
----------
x : cudf.Series
1-D array containing values of the independent variable.
Values must be real, finite and in strictly increasing order.
y : cudf.Series
Array containing values of the dependent variable.
ids (Optional) : cudf.Series
ids of each spline
size (Optional) : cudf.Series
fixed size of each spline
offset (Optional) : cudf.Series
alternative to `size`, allows splines of varying
length. Not yet fully supported.

Returns
-------
CubicSpline : callable `o`
``o.c`` contains the coefficients that can be used to compute new
points along the spline fitting the original ``t`` data. ``o(n)``
interpolates the spline coordinates along new input values ``n``.

Note
----
cuSpatial will outperform scipy when many splines are
fit simultaneously. Data must be arranged in a SoA format, and the
exclusive `prefix_sum` of the separate curves must also be passed to the
function.::

NUM_SPLINES = 100000
SPLINE_LENGTH = 101
t = cudf.Series(
function. See example for detail.

Example
-------
>>> import cudf, cuspatial
>>> NUM_SPLINES = 100000
>>> SPLINE_LENGTH = 101
>>> x = cudf.Series(
np.hstack((np.arange(SPLINE_LENGTH),) * NUM_SPLINES)
).astype('float32')
y = cudf.Series(
>>> y = cudf.Series(
np.random.random(SPLINE_LENGTH*NUM_SPLINES)
).astype('float32')
prefix_sum = cudf.Series(
cp.arange(NUM_SPLINES + 1)*SPLINE_LENGTH
>>> prefix_sum = cudf.Series(
np.arange(NUM_SPLINES + 1)*SPLINE_LENGTH
).astype('int32')
curve = cuspatial.CubicSpline(t, y, prefixes=prefix_sum)
new_samples = cudf.Series(
>>> curve = cuspatial.CubicSpline(x, y, offset=prefix_sum)
>>> new_samples = cudf.Series(
np.hstack((np.linspace(
0, (SPLINE_LENGTH - 1), (SPLINE_LENGTH - 1) * 2 + 1
),) * NUM_SPLINES)
).astype('float32')
curve_ids = cudf.Series(np.repeat(
>>> curve_ids = cudf.Series(np.repeat(
np.arange(0, NUM_SPLINES), SPLINE_LENGTH * 2 - 1
), dtype="int32")
new_points = curve(new_samples, curve_ids)

>>> new_points = curve(new_samples, curve_ids)
"""

def __init__(self, t, y, ids=None, size=None, prefixes=None):
"""
Computes various error preconditions on the input data, then
uses CUDA to compute cubic splines for each set of input
coordinates on the GPU in parallel.

Parameters
----------
t : cudf.Series
time sample values. Must be monotonically increasing.
y : cudf.Series
columns to have curves fit to according to x
ids (Optional) : cudf.Series
ids of each spline
size (Optional) : cudf.Series
fixed size of each spline
prefixes (Optional) : cudf.Series
alternative to `size`, allows splines of varying
length. Not yet fully supported.

Returns
-------
CubicSpline : callable `o`
``o.c`` contains the coefficients that can be used to compute new
points along the spline fitting the original ``t`` data. ``o(n)``
interpolates the spline coordinates along new input values ``n``.
"""

def __init__(self, x, y, ids=None, size=None, offset=None):
# error protections:
if len(t) < 5:
if len(x) < 5:
raise ValueError(
"Use of GPU cubic spline requires splines of length > 4"
)
if not isinstance(t, Series):
if not isinstance(x, Series):
raise TypeError(
"Error: input independent vars must be cudf Series"
)
if not isinstance(y, (Series, DataFrame)):
raise TypeError(
"Error: input dependent vars must be cudf Series or DataFrame"
)
if not len(t) == len(y):
if not len(x) == len(y):
raise TypeError(
"Error: dependent and independent vars have different length"
)
Expand All @@ -128,33 +122,33 @@ def __init__(self, t, y, ids=None, size=None, prefixes=None):
if not ids.dtype == np.int32:
raise TypeError("Error: int32 only supported at this time.")
self.ids = ids
self.size = size if size is not None else len(t)
self.size = size if size is not None else len(x)
if not isinstance(self.size, int):
raise TypeError("Error: size must be an integer")
if not ((len(t) % self.size) == 0):
if not ((len(x) % self.size) == 0):
raise ValueError(
"Error: length of input is not a multiple of size"
)
if not isinstance(t, Series):
if not isinstance(x, Series):
raise TypeError("cuspatial.CubicSpline requires a cudf.Series")
if not t.dtype == np.float32:
if not x.dtype == np.float32:
raise TypeError("Error: float32 only supported at this time.")
if not isinstance(y, Series):
raise TypeError("cuspatial.CubicSpline requires a cudf.Series")
if not y.dtype == np.float32:
raise TypeError("Error: float32 only supported at this time.")
self.t = t
self.x = x
self.y = y
if prefixes is None:
self.prefix = Series(
cp.arange((len(t) / self.size) + 1) * self.size
if offset is None:
self.offset = Series(
cp.arange((len(x) / self.size) + 1) * self.size
).astype("int32")
else:
if not isinstance(prefixes, Series):
if not isinstance(offset, Series):
raise TypeError("cuspatial.CubicSpline requires a cudf.Series")
if not prefixes.dtype == np.int32:
if not offset.dtype == np.int32:
raise TypeError("Error: int32 only supported at this time.")
self.prefix = prefixes
self.offset = offset

self.c = self._compute_coefficients()

Expand All @@ -164,13 +158,13 @@ def _compute_coefficients(self):
"""
if isinstance(self.y, Series):
return _cubic_spline_coefficients(
self.t, self.y, self.ids, self.prefix
self.x, self.y, self.ids, self.offset
)
else:
c = {}
for col in self.y.columns:
c[col] = _cubic_spline_coefficients(
self.t, self.y, self.ids, self.prefix
self.x, self.y, self.ids, self.offset
)
return c

Expand All @@ -187,7 +181,7 @@ def __call__(self, coordinates, groups=None):
cp.repeat(cp.array(0), len(coordinates))
).astype("int32")
result = _cubic_spline_fit(
coordinates, self.groups, self.prefix, self.t, self.c
coordinates, self.groups, self.offset, self.x, self.c
)
return Series(result)
else:
Expand Down
14 changes: 0 additions & 14 deletions python/cuspatial/cuspatial/core/trajectory/__init__.py

This file was deleted.

10 changes: 5 additions & 5 deletions python/cuspatial/cuspatial/tests/trajectory/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_cusparse():
cudf.Series([3, 2, 3, 4, 3, 3, 2, 3, 4, 3, 3, 2, 3, 4, 3]).astype(
"float32"
),
prefixes=cudf.Series([0, 5, 10, 15]).astype("int32"),
offset=cudf.Series([0, 5, 10, 15]).astype("int32"),
)
cudf.testing.assert_frame_equal(
result.c,
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_class_triple():
"float32"
)
prefixes = cudf.Series([0, 5, 10, 15]).astype("int32")
g = cuspatial.interpolate.CubicSpline(t, x, prefixes=prefixes)
g = cuspatial.interpolate.CubicSpline(t, x, offset=prefixes)
groups = cudf.Series(
np.ravel(np.array([np.repeat(0, 5), np.repeat(1, 5), np.repeat(2, 5)]))
)
Expand All @@ -171,7 +171,7 @@ def test_class_triple_six():
[3, 2, 3, 4, 3, 1, 3, 2, 3, 4, 3, 1, 3, 2, 3, 4, 3, 1]
).astype("float32")
prefixes = cudf.Series([0, 6, 12, 18]).astype("int32")
g = cuspatial.interpolate.CubicSpline(t, x, prefixes=prefixes)
g = cuspatial.interpolate.CubicSpline(t, x, offset=prefixes)
groups = cudf.Series(
np.ravel(np.array([np.repeat(0, 6), np.repeat(1, 6), np.repeat(2, 6)]))
)
Expand All @@ -186,7 +186,7 @@ def test_class_triple_six_splits():
[3, 2, 3, 4, 3, 1, 3, 2, 3, 4, 3, 1, 3, 2, 3, 4, 3, 1]
).astype("float32")
prefixes = cudf.Series([0, 6, 12, 18]).astype("int32")
g = cuspatial.interpolate.CubicSpline(t, x, prefixes=prefixes)
g = cuspatial.interpolate.CubicSpline(t, x, offset=prefixes)
groups = cudf.Series(
np.ravel(
np.array([np.repeat(0, 12), np.repeat(1, 12), np.repeat(2, 12)])
Expand Down Expand Up @@ -236,7 +236,7 @@ def test_class_new_interpolation():
new_samples = cudf.Series(np.hstack((np.linspace(0, 4, 9),) * 3)).astype(
"float32"
)
curve = cuspatial.CubicSpline(t, y, prefixes=prefix_sum)
curve = cuspatial.CubicSpline(t, y, offset=prefix_sum)
new_x = cudf.Series(np.repeat(np.arange(0, 3), 9)).astype("int32")
old_x = cudf.Series(np.repeat(np.arange(0, 3), 5)).astype("int32")
new_points = curve(new_samples, groups=new_x)
Expand Down