Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add header-only cuspatial::join_quadtree_and_bounding_boxes #861

146 changes: 146 additions & 0 deletions cpp/include/cuspatial/experimental/detail/join/intersection.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cuspatial/detail/utility/z_order.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/copy.h>
#include <thrust/distance.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/remove.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>

#include <utility>

namespace cuspatial {
namespace detail {

static __device__ uint8_t const leaf_indicator = 0;
static __device__ uint8_t const quad_indicator = 1;
static __device__ uint8_t const none_indicator = 2;
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved

template <typename InputIterator, typename OutputIterator>
inline int32_t copy_leaf_intersections(InputIterator input_begin,
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
InputIterator input_end,
OutputIterator output_begin,
rmm::cuda_stream_view stream)
{
return thrust::distance(
output_begin,
thrust::copy_if(
rmm::exec_policy(stream), input_begin, input_end, output_begin, [] __device__(auto const& t) {
return thrust::get<0>(t) == leaf_indicator;
}));
}

template <typename InputIterator, typename OutputIterator>
inline int32_t remove_non_quad_intersections(InputIterator input_begin,
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
InputIterator input_end,
OutputIterator output_begin,
rmm::cuda_stream_view stream)
{
return thrust::distance(
output_begin,
thrust::remove_if(
rmm::exec_policy(stream), input_begin, input_end, output_begin, [] __device__(auto const& t) {
return thrust::get<0>(t) != quad_indicator;
}));
}

template <class T,
class KeyIterator,
class LevelIterator,
class IsInternalIterator,
class BoundingBoxIterator,
class NodeIndicesIterator,
class BBoxIndicesIterator,
class NodePairsIterator,
class LeafPairsIterator>
inline std::pair<int32_t, int32_t> find_intersections(KeyIterator keys_first,
LevelIterator levels_first,
IsInternalIterator is_internal_node_first,
BoundingBoxIterator bounding_box_first,
NodeIndicesIterator node_indices,
BBoxIndicesIterator bbox_indices,
NodePairsIterator node_pairs,
LeafPairsIterator leaf_pairs,
int32_t num_pairs,
T x_min,
T y_min,
T scale,
int8_t max_depth,
rmm::cuda_stream_view stream)
{
auto nodes_first = thrust::make_zip_iterator(keys_first, levels_first, is_internal_node_first);

thrust::transform(
rmm::exec_policy(stream),
thrust::make_zip_iterator(node_indices, bbox_indices),
thrust::make_zip_iterator(node_indices, bbox_indices) + num_pairs,
node_pairs,
[x_min, y_min, scale, max_depth, nodes = nodes_first, bboxes = bounding_box_first] __device__(
auto const& node_and_bbox) {
auto const& node_idx = thrust::get<0>(node_and_bbox);
auto const& bbox_idx = thrust::get<1>(node_and_bbox);

auto const& node = nodes[node_idx];
uint32_t const& key = thrust::get<0>(node);
uint8_t const& level = thrust::get<1>(node);
uint8_t const& is_internal_node = thrust::get<2>(node);

auto const& bbox = bboxes[bbox_idx];
auto const& bbox_min = bbox.v1;
auto const& bbox_max = bbox.v2;
auto const& poly_x_min = bbox_min.x;
auto const& poly_y_min = bbox_min.y;
auto const& poly_x_max = bbox_max.x;
auto const& poly_y_max = bbox_max.y;

T const key_x = utility::z_order_x(key);
T const key_y = utility::z_order_y(key);
T const level_scale = scale * (1 << (max_depth - 1 - level));
T const node_x_min = x_min + (key_x + 0) * level_scale;
T const node_y_min = y_min + (key_y + 0) * level_scale;
T const node_x_max = x_min + (key_x + 1) * level_scale;
T const node_y_max = y_min + (key_y + 1) * level_scale;

if ((node_x_min > poly_x_max) || (node_x_max < poly_x_min) || (node_y_min > poly_y_max) ||
(node_y_max < poly_y_min)) {
// if no overlap, return type = none_indicator
return thrust::make_tuple(none_indicator, level, node_idx, bbox_idx);
}
// otherwise return type = leaf_indicator (0) or quad_indicator (1)
return thrust::make_tuple(is_internal_node, level, node_idx, bbox_idx);
isVoid marked this conversation as resolved.
Show resolved Hide resolved
});

auto num_leaves = copy_leaf_intersections(node_pairs, node_pairs + num_pairs, leaf_pairs, stream);

auto num_parents =
remove_non_quad_intersections(node_pairs, node_pairs + num_pairs, node_pairs, stream);

return std::make_pair(num_parents, num_leaves);
}

} // namespace detail
} // namespace cuspatial
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,8 +16,6 @@

#pragma once

#include <indexing/construction/detail/utilities.cuh>

#include <cuspatial/detail/utility/z_order.cuh>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -52,19 +50,19 @@ namespace detail {
* @param parent_node_indices indices of the intersecting quadrants at the current level
* @param parent_poly_indices indices of the intersecting polygons at the current level
* @param stream CUDA stream on which to schedule work
* @return An std::tuple containing the `cudf::size_type` number of elements in the next level, and
* @return A `std::tuple` containing the `int32_t` number of elements in the next level, and
* `rmm::device_uvectors` for each of the `types`, `levels`, `quad_indices`, and `poly_indices` of
* the next-level quadrants and polygons
*/
template <typename LengthsIter, typename OffsetsIter>
inline std::tuple<cudf::size_type,
template <typename LengthsIterator, typename OffsetsIterator>
inline std::tuple<int32_t,
rmm::device_uvector<uint8_t>,
rmm::device_uvector<uint8_t>,
rmm::device_uvector<uint32_t>,
rmm::device_uvector<uint32_t>>
descend_quadtree(LengthsIter counts,
OffsetsIter offsets,
cudf::size_type num_quads,
descend_quadtree(LengthsIterator counts,
OffsetsIterator offsets,
int32_t num_quads,
rmm::device_uvector<uint8_t>& parent_types,
rmm::device_uvector<uint8_t>& parent_levels,
rmm::device_uvector<uint32_t>& parent_node_indices,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cuspatial/detail/iterator.hpp>
#include <cuspatial/experimental/detail/join/intersection.cuh>
#include <cuspatial/experimental/detail/join/traversal.cuh>
#include <cuspatial/experimental/point_quadtree.cuh>
#include <cuspatial/traits.hpp>

#include <thrust/iterator/discard_iterator.h>

#include <iterator>
#include <utility>

namespace cuspatial {

template <class KeyIterator,
class LevelIterator,
class IsInternalIterator,
class BoundingBoxIterator,
class T>
std::pair<rmm::device_uvector<uint32_t>, rmm::device_uvector<uint32_t>>
join_quadtree_and_bounding_boxes(KeyIterator keys_first,
KeyIterator keys_last,
LevelIterator levels_first,
IsInternalIterator is_internal_nodes_first,
KeyIterator lengths_first,
KeyIterator offsets_first,
BoundingBoxIterator bounding_boxes_first,
BoundingBoxIterator bounding_boxes_last,
T x_min,
T y_min,
T scale,
int8_t max_depth,
rmm::mr::device_memory_resource* mr,
rmm::cuda_stream_view stream)
{
static_assert(is_same<T, cuspatial::iterator_vec_base_type<BoundingBoxIterator>>(),
"Iterator value_type mismatch");

auto const num_nodes = std::distance(keys_first, keys_last);
auto const num_boxes = std::distance(bounding_boxes_first, bounding_boxes_last);

// Count the number of top-level nodes to start.
// This could be provided explicitly, but count_if should be fast enough.
auto num_top_level_leaves = thrust::count_if(rmm::exec_policy(stream),
levels_first,
levels_first + num_nodes,
thrust::placeholders::_1 == 0);
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved

auto num_pairs = num_top_level_leaves * num_boxes;

int32_t num_leaves{0};
int32_t num_results{0};
int32_t num_parents{0};

// The found bbox-quad pairs are dynamic and can not be pre-allocated.
// Relevant arrays are resized accordingly for memory efficiency.

// Vectors for intermediate bbox and node indices at each level
rmm::device_uvector<uint8_t> cur_types(num_pairs, stream);
rmm::device_uvector<uint8_t> cur_levels(num_pairs, stream);
rmm::device_uvector<uint32_t> cur_node_idxs(num_pairs, stream);
rmm::device_uvector<uint32_t> cur_bbox_idxs(num_pairs, stream);

// Vectors for found pairs of bbox and leaf node indices
rmm::device_uvector<uint32_t> out_node_idxs(num_pairs, stream, mr);
rmm::device_uvector<uint32_t> out_bbox_idxs(num_pairs, stream, mr);

auto make_current_level_iter = [&]() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to use a lambda here? It seems we should create the iterator as a variable instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this lambda closes over variables that are updated each iteration of the loop.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean that the iterators gets invalidated after every iteration of the loop?

Copy link
Contributor Author

@trxcllnt trxcllnt Jan 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make_current_level_iter() returns a zip iterator over the cur_{types,levels,node_idxs,bbox_idxs} vectors, which represent the current level in the tree descent. The descend_quadtree() call walks one level in the tree and returns vectors representing the next "current level."

Similarly, make_output_values_iter() returns a zip iterator of num_results + out_{node,bbox}_idxs. num_results is updated by the detail::find_intersections() call.

The iterators returned by these functions are used both in setting up the initial state of the the top-level quadrants, and in the loop that does the tree traversal. An earlier implementation tracked and updated these two iterators each iteration of the loop, but they were refactored into these lambdas in a cleanup PR.

return thrust::make_zip_iterator(
cur_types.begin(), cur_levels.begin(), cur_node_idxs.begin(), cur_bbox_idxs.begin());
};

auto make_output_values_iter = [&]() {
return num_results + thrust::make_zip_iterator(thrust::make_discard_iterator(),
thrust::make_discard_iterator(),
out_node_idxs.begin(),
out_bbox_idxs.begin());
};

// Find intersections for all the top level quadrants and bounding boxes
std::tie(num_parents, num_leaves) =
detail::find_intersections(keys_first,
levels_first,
is_internal_nodes_first,
bounding_boxes_first,
// The top-level node indices
detail::make_counting_transform_iterator(
0, [=] __device__(auto i) { return i % num_top_level_leaves; }),
// The top-level bbox indices
detail::make_counting_transform_iterator(
0, [=] __device__(auto i) { return i / num_top_level_leaves; }),
make_current_level_iter(), // intermediate intersections or parent
// quadrants found during traversal
// found intersecting quadrant and bbox indices for output
make_output_values_iter(),
num_pairs,
x_min,
y_min,
scale,
max_depth,
stream);

num_results += num_leaves;

// Traverse the quadtree descending to `max_depth`, or until no more parent quadrants are found
for (uint8_t level{1}; level < max_depth && num_parents > 0; ++level) {
// Shrink the current level's vectors to overwrite elements removed by `find_intersections()`
cur_types.shrink_to_fit(stream);
cur_levels.shrink_to_fit(stream);
cur_node_idxs.shrink_to_fit(stream);
cur_bbox_idxs.shrink_to_fit(stream);

// Grow preallocated output vectors. The next level will expand out to no more
// than `num_parents * 4` pairs, since each parent quadrant has up to 4 children.
size_t max_num_results = num_results + num_parents * 4;
if (max_num_results > out_node_idxs.capacity()) {
// TODO: grow preallocated output sizes in multiples of the current capacity?
// auto new_size = out_node_idxs.capacity() * //
// ((max_num_results / out_node_idxs.capacity()) + 1);
out_node_idxs.resize(max_num_results, stream);
out_bbox_idxs.resize(max_num_results, stream);
}

// Walk one level down and fill the current level's vectors with
// the next level's quadrant info and bbox indices.
std::tie(num_pairs, cur_types, cur_levels, cur_node_idxs, cur_bbox_idxs) =
detail::descend_quadtree(lengths_first,
offsets_first,
num_parents,
cur_types,
cur_levels,
cur_node_idxs,
cur_bbox_idxs,
stream);

// Find intersections for the the next level's quadrants and bboxes
std::tie(num_parents, num_leaves) =
detail::find_intersections(keys_first,
levels_first,
is_internal_nodes_first,
bounding_boxes_first,
cur_node_idxs.begin(),
cur_bbox_idxs.begin(),
make_current_level_iter(), // intermediate intersections or parent
// quadrants found during traversal
// found intersecting quadrant and bbox indices for output
make_output_values_iter(),
num_pairs,
x_min,
y_min,
scale,
max_depth,
stream);

num_results += num_leaves;
}

// Sort the output bbox/quad indices by quadrant
[&]() {
// Copy the relevant node offsets into a temporary vector so we don't modify the quadtree column
rmm::device_uvector<uint32_t> tmp_node_offsets(num_results, stream);

auto const iter = thrust::make_permutation_iterator(offsets_first, out_node_idxs.begin());

thrust::copy(rmm::exec_policy(stream), iter, iter + num_results, tmp_node_offsets.begin());

thrust::stable_sort_by_key(
rmm::exec_policy(stream),
tmp_node_offsets.begin(),
tmp_node_offsets.end(),
thrust::make_zip_iterator(out_bbox_idxs.begin(), out_node_idxs.begin()));
}();

out_node_idxs.resize(num_results, stream);
out_bbox_idxs.resize(num_results, stream);
out_node_idxs.shrink_to_fit(stream);
out_bbox_idxs.shrink_to_fit(stream);

return {std::move(out_bbox_idxs), std::move(out_node_idxs)};
}

} // namespace cuspatial
Loading