Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add find_duplicate_points Internal API #815

Merged
merged 8 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cuspatial/cuda_utils.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/uninitialized_fill.h>

namespace cuspatial {
namespace detail {

/**
* @internal
* @brief Kernel to compute duplicate points in each multipoint. Naive N^2 algorithm.
*/
template <typename MultiPointRange, typename OutputIt>
void __global__ combine_duplicate_points_kernel_simple(MultiPointRange multipoints,
OutputIt stencils)
{
for (auto idx = threadIdx.x + blockIdx.x * blockDim.x; idx < multipoints.num_points();
idx += gridDim.x * blockDim.x) {
auto multipoint = multipoints[idx];
auto global_offset = multipoints.offsets_begin()[idx];
for (auto i = 0; i < multipoint.size() && stencils[i] != 1; ++i)
for (auto j = i + 1; j < multipoint.size(); ++j) {
if (multipoint[i] == multipoint[j]) stencils[j + global_offset] = 1;
}
}
}

/**
* @internal
* @brief For each multipoint, computes duplicate points and stores as stencil.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean by "stores as stencil"? Do you mean an array of flags indicating which points are duplicates?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doc updated as a result of addressing: #815 (comment)

*/
template <typename MultiPointRange, typename OutputIt>
void combine_duplicate_points(MultiPointRange multipoints,
isVoid marked this conversation as resolved.
Show resolved Hide resolved
OutputIt output_stencils,
rmm::cuda_stream_view stream)
{
if (multipoints.size() == 0) return;

thrust::uninitialized_fill_n(
rmm::exec_policy(stream), output_stencils, multipoints.num_points(), 0);

auto [threads_per_block, num_blocks] = grid_1d(multipoints.size());
combine_duplicate_points_kernel_simple<<<num_blocks, threads_per_block, 0, stream.value()>>>(
multipoints, output_stencils);
}

} // namespace detail
} // namespace cuspatial
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#pragma once
#include <cuspatial/cuda_utils.hpp>

#include <thrust/distance.h>

namespace cuspatial {

template <typename VecIterator>
Expand All @@ -37,4 +39,17 @@ CUSPATIAL_HOST_DEVICE auto multipoint_ref<VecIterator>::point_end() const
return _points_end;
}

template <typename VecIterator>
CUSPATIAL_HOST_DEVICE auto multipoint_ref<VecIterator>::num_points() const
{
return thrust::distance(_points_begin, _points_end);
}

template <typename VecIterator>
template <typename IndexType>
CUSPATIAL_HOST_DEVICE auto multipoint_ref<VecIterator>::operator[](IndexType i)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just thinking about host / device here. Is this really intended to work on the host? What if it is called from host when the iterator points to device memory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can work on host, if all vectors are host vectors. Although I don't really see a use case at the moment. Up till now I mostly rely on developer's prudence to not access device memory from host.

{
return point_begin()[i];
}

} // namespace cuspatial
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,17 @@ multipoint_range<GeometryIterator, VecIterator>::multipoint_range(GeometryIterat
}

template <typename GeometryIterator, typename VecIterator>
auto multipoint_range<GeometryIterator, VecIterator>::size()
CUSPATIAL_HOST_DEVICE auto multipoint_range<GeometryIterator, VecIterator>::num_multipoints()
{
return thrust::distance(_geometry_begin, _geometry_end) - 1;
}

template <typename GeometryIterator, typename VecIterator>
CUSPATIAL_HOST_DEVICE auto multipoint_range<GeometryIterator, VecIterator>::num_points()
{
return thrust::distance(_points_begin, _points_end);
}

template <typename GeometryIterator, typename VecIterator>
auto multipoint_range<GeometryIterator, VecIterator>::multipoint_begin()
{
Expand All @@ -85,17 +91,29 @@ auto multipoint_range<GeometryIterator, VecIterator>::multipoint_end()
}

template <typename GeometryIterator, typename VecIterator>
auto multipoint_range<GeometryIterator, VecIterator>::point_begin()
CUSPATIAL_HOST_DEVICE auto multipoint_range<GeometryIterator, VecIterator>::point_begin()
{
return _points_begin;
}

template <typename GeometryIterator, typename VecIterator>
auto multipoint_range<GeometryIterator, VecIterator>::point_end()
CUSPATIAL_HOST_DEVICE auto multipoint_range<GeometryIterator, VecIterator>::point_end()
{
return _points_end;
}

template <typename GeometryIterator, typename VecIterator>
CUSPATIAL_HOST_DEVICE auto multipoint_range<GeometryIterator, VecIterator>::offsets_begin()
{
return _geometry_begin;
}

template <typename GeometryIterator, typename VecIterator>
CUSPATIAL_HOST_DEVICE auto multipoint_range<GeometryIterator, VecIterator>::offsets_end()
{
return _geometry_end;
}

template <typename GeometryIterator, typename VecIterator>
template <typename IndexType>
CUSPATIAL_HOST_DEVICE auto multipoint_range<GeometryIterator, VecIterator>::operator[](
Expand All @@ -105,4 +123,14 @@ CUSPATIAL_HOST_DEVICE auto multipoint_range<GeometryIterator, VecIterator>::oper
_points_begin + _geometry_begin[idx + 1]};
}

template <typename GeometryIterator, typename VecIterator>
template <typename IndexType>
CUSPATIAL_HOST_DEVICE auto
multipoint_range<GeometryIterator, VecIterator>::geometry_idx_from_point_idx(IndexType idx) const
{
return thrust::distance(
_geometry_begin,
thrust::prev(thrust::upper_bound(thrust::seq, _geometry_begin, _geometry_end, idx)));
}

} // namespace cuspatial
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once
#include <cuspatial/cuda_utils.hpp>
#include <cuspatial/traits.hpp>

namespace cuspatial {

Expand All @@ -25,6 +26,8 @@ namespace cuspatial {
*/
template <typename VecIterator>
class multipoint_ref {
using point_t = iterator_value_type<VecIterator>;

public:
CUSPATIAL_HOST_DEVICE multipoint_ref(VecIterator begin, VecIterator end);

Expand All @@ -38,6 +41,14 @@ class multipoint_ref {
/// Return iterator the the one-past the last point of the multipoint.
CUSPATIAL_HOST_DEVICE auto end() const { return point_end(); }

/// Return the number of points in multipoint.
CUSPATIAL_HOST_DEVICE auto num_points() const;
/// Return the number of points in multipoint.
CUSPATIAL_HOST_DEVICE auto size() const { return num_points(); }

template <typename IndexType>
CUSPATIAL_HOST_DEVICE auto operator[](IndexType point_idx);

protected:
VecIterator _points_begin;
VecIterator _points_end;
Expand Down
32 changes: 29 additions & 3 deletions cpp/include/cuspatial/experimental/ranges/multipoint_range.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class multipoint_range {
public:
using geometry_it_t = GeometryIterator;
using point_it_t = VecIterator;
using index_t = iterator_value_type<geometry_it_t>;
using point_t = iterator_value_type<point_it_t>;
using element_t = iterator_vec_base_type<point_it_t>;

Expand All @@ -57,11 +58,20 @@ class multipoint_range {
GeometryIterator geometry_end,
VecIterator points_begin,
VecIterator points_end);
/**
* @brief Returns the number of multipoints in the array.
*/
CUSPATIAL_HOST_DEVICE auto num_multipoints();

/**
* @brief Returns the number of points in the array.
*/
CUSPATIAL_HOST_DEVICE auto num_points();

/**
* @brief Returns the number of multipoints in the array.
*/
auto size();
CUSPATIAL_HOST_DEVICE auto size() { return num_multipoints(); }

/**
* @brief Returns the iterator to the first multipoint in the multipoint array.
Expand All @@ -86,12 +96,28 @@ class multipoint_range {
/**
* @brief Returns the iterator to the start of the underlying point array.
*/
auto point_begin();
CUSPATIAL_HOST_DEVICE auto point_begin();

/**
* @brief Returns the iterator to the end of the underlying point array.
*/
auto point_end();
CUSPATIAL_HOST_DEVICE auto point_end();

/**
* @brief Returns the iterator to the start of the underlying offsets array.
*/
CUSPATIAL_HOST_DEVICE auto offsets_begin();

/**
* @brief Returns the iterator to the end of the underlying offsets array.
*/
CUSPATIAL_HOST_DEVICE auto offsets_end();

/**
* @brief Returns the geometry index of the given point index.
*/
template <typename IndexType>
CUSPATIAL_HOST_DEVICE auto geometry_idx_from_point_idx(IndexType point_idx) const;

/**
* @brief Returns the `idx`th multipoint in the array.
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cuspatial/vec_2d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class alignas(2 * sizeof(T)) vec_2d {
/**
* @brief Compare two 2D vectors for equality.
*/
friend bool operator==(vec_2d<T> const& lhs, vec_2d<T> const& rhs)
friend bool CUSPATIAL_HOST_DEVICE operator==(vec_2d<T> const& lhs, vec_2d<T> const& rhs)
{
return (lhs.x == rhs.x) && (lhs.y == rhs.y);
}
Expand Down
86 changes: 86 additions & 0 deletions cpp/include/cuspatial_test/vector_factories.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
*/

#include <cuspatial/experimental/ranges/multilinestring_range.cuh>
#include <cuspatial/experimental/ranges/multipoint_range.cuh>
#include <cuspatial/traits.hpp>
#include <cuspatial/vec_2d.hpp>

#include <rmm/device_vector.hpp>

#include <initializer_list>
#include <numeric>
#include <vector>

namespace cuspatial {
Expand All @@ -31,6 +34,13 @@ auto make_device_vector(std::initializer_list<T> inl)
return rmm::device_vector<T>(inl.begin(), inl.end());
}

/**
* @brief Owning object of a multilinestring array following geoarrow layout.
*
* @tparam GeometryArray Array type of geometry offset array
* @tparam PartArray Array type of part offset array
* @tparam CoordinateArray Array type of coordinate array
*/
template <typename GeometryArray, typename PartArray, typename CoordinateArray>
class multilinestring_array {
public:
Expand All @@ -43,8 +53,10 @@ class multilinestring_array {
{
}

/// Return the number of multilinestrings
auto size() { return _geometry_offset_array.size() - 1; }

/// Return range object of the multilinestring array
auto range()
{
return multilinestring_range(_geometry_offset_array.begin(),
Expand All @@ -61,6 +73,15 @@ class multilinestring_array {
CoordinateArray _coordinate_offset_array;
};

/**
* @brief Construct an owning object of a multilinestring array from initializer lists
*
* @tparam T Type of coordinate
* @param geometry_inl Initializer list of geometry offsets
* @param part_inl Initializer list of part offsets
* @param coord_inl Initializer list of coordinate
* @return multilinestring array object
*/
template <typename T>
auto make_multilinestring_array(std::initializer_list<std::size_t> geometry_inl,
std::initializer_list<std::size_t> part_inl,
Expand All @@ -73,5 +94,70 @@ auto make_multilinestring_array(std::initializer_list<std::size_t> geometry_inl,
rmm::device_vector<vec_2d<T>>(std::vector<vec_2d<T>>(coord_inl.begin(), coord_inl.end())));
}

/**
* @brief Owning object of a multipoint array following geoarrow format
*
* @tparam GeometryArray Array type of geometry offsets
* @tparam CoordinateArray Array type of coordinates
*/
template <typename GeometryArray, typename CoordinateArray>
class multipoint_array {
public:
multipoint_array(GeometryArray geometry_offsets_array, CoordinateArray coordinate_array)
: _geometry_offsets(geometry_offsets_array), _coordinates(coordinate_array)
{
}

/// Return the number of multipoints
auto size() { return _geometry_offsets.size() - 1; }

/// Return range object of the multipoint array
auto range()
{
return multipoint_range{
_geometry_offsets.begin(), _geometry_offsets.end(), _coordinates.begin(), _coordinates.end()};
}

/// Release ownership
auto release() { return std::pair{std::move(_geometry_offsets), std::move(_coordinates)}; }

private:
GeometryArray _geometry_offsets;
CoordinateArray _coordinates;
};

/**
* @brief Factory method to construct multipoint array from initializer list of multipoints.
*
* Example: Construct an array of 2 multipoints, each with 2, 0, 1 points:
* using P = vec_2d<float>;
* make_multipoints_array({{P{0.0, 1.0}, P{2.0, 0.0}}, {}, {P{3.0, 4.0}}});
*
* Example: Construct an empty multilinestring array:
* make_multipoints_array<float>({}); // Explict parameter required to deduce type.
*
* @tparam T Type of coordinate
* @param inl List of multipoints
* @return multipoints_array object
*/
template <typename T>
auto make_multipoints_array(std::initializer_list<std::initializer_list<vec_2d<T>>> inl)
{
std::vector<std::size_t> offsets{0};
std::transform(inl.begin(), inl.end(), std::back_inserter(offsets), [](auto multipoint) {
return multipoint.size();
});
std::inclusive_scan(offsets.begin(), offsets.end(), offsets.begin());

std::vector<vec_2d<T>> coordinates = std::accumulate(
inl.begin(), inl.end(), std::vector<vec_2d<T>>{}, [](std::vector<vec_2d<T>>& init, auto cur) {
init.insert(init.end(), cur.begin(), cur.end());
return init;
});

return multipoint_array{rmm::device_vector<std::size_t>(offsets),
rmm::device_vector<vec_2d<T>>(coordinates)};
}

} // namespace test
} // namespace cuspatial
Loading