rapidsai · rapids-bot · Feb 1, 2023 · Dec 19, 2022 · Dec 19, 2022 · Dec 20, 2022
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuspatial/detail/utility/z_order.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/copy.h>
+#include <thrust/distance.h>
+#include <thrust/iterator/permutation_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/remove.h>
+#include <thrust/transform.h>
+#include <thrust/tuple.h>
+
+#include <utility>
+
+namespace cuspatial {
+namespace detail {
+
+static __device__ uint8_t const leaf_indicator = 0;
+static __device__ uint8_t const quad_indicator = 1;
+static __device__ uint8_t const none_indicator = 2;
+
+template <typename InputIterator, typename OutputIterator>
+inline int32_t copy_leaf_intersections(InputIterator input_begin,
+                                       InputIterator input_end,
+                                       OutputIterator output_begin,
+                                       rmm::cuda_stream_view stream)
+{
+  return thrust::distance(
+    output_begin,
+    thrust::copy_if(
+      rmm::exec_policy(stream), input_begin, input_end, output_begin, [] __device__(auto const& t) {
+        return thrust::get<0>(t) == leaf_indicator;
+      }));
+}
+
+template <typename InputIterator, typename OutputIterator>
+inline int32_t remove_non_quad_intersections(InputIterator input_begin,
+                                             InputIterator input_end,
+                                             OutputIterator output_begin,
+                                             rmm::cuda_stream_view stream)
+{
+  return thrust::distance(
+    output_begin,
+    thrust::remove_if(
+      rmm::exec_policy(stream), input_begin, input_end, output_begin, [] __device__(auto const& t) {
+        return thrust::get<0>(t) != quad_indicator;
+      }));
+}
+
+template <class T,
+          class KeyIterator,
+          class LevelIterator,
+          class IsInternalIterator,
+          class BoundingBoxIterator,
+          class NodeIndicesIterator,
+          class BBoxIndicesIterator,
+          class NodePairsIterator,
+          class LeafPairsIterator>
+inline std::pair<int32_t, int32_t> find_intersections(KeyIterator keys_first,
+                                                      LevelIterator levels_first,
+                                                      IsInternalIterator is_internal_node_first,
+                                                      BoundingBoxIterator bounding_box_first,
+                                                      NodeIndicesIterator node_indices,
+                                                      BBoxIndicesIterator bbox_indices,
+                                                      NodePairsIterator node_pairs,
+                                                      LeafPairsIterator leaf_pairs,
+                                                      int32_t num_pairs,
+                                                      T x_min,
+                                                      T y_min,
+                                                      T scale,
+                                                      int8_t max_depth,
+                                                      rmm::cuda_stream_view stream)
+{
+  auto nodes_first = thrust::make_zip_iterator(keys_first, levels_first, is_internal_node_first);
+
+  thrust::transform(
+    rmm::exec_policy(stream),
+    thrust::make_zip_iterator(node_indices, bbox_indices),
+    thrust::make_zip_iterator(node_indices, bbox_indices) + num_pairs,
+    node_pairs,
+    [x_min, y_min, scale, max_depth, nodes = nodes_first, bboxes = bounding_box_first] __device__(
+      auto const& node_and_bbox) {
+      auto const& node_idx = thrust::get<0>(node_and_bbox);
+      auto const& bbox_idx = thrust::get<1>(node_and_bbox);
+
+      auto const& node                = nodes[node_idx];
+      uint32_t const& key             = thrust::get<0>(node);
+      uint8_t const& level            = thrust::get<1>(node);
+      uint8_t const& is_internal_node = thrust::get<2>(node);
+
+      auto const& bbox       = bboxes[bbox_idx];
+      auto const& bbox_min   = bbox.v1;
+      auto const& bbox_max   = bbox.v2;
+      auto const& poly_x_min = bbox_min.x;
+      auto const& poly_y_min = bbox_min.y;
+      auto const& poly_x_max = bbox_max.x;
+      auto const& poly_y_max = bbox_max.y;
+
+      T const key_x       = utility::z_order_x(key);
+      T const key_y       = utility::z_order_y(key);
+      T const level_scale = scale * (1 << (max_depth - 1 - level));
+      T const node_x_min  = x_min + (key_x + 0) * level_scale;
+      T const node_y_min  = y_min + (key_y + 0) * level_scale;
+      T const node_x_max  = x_min + (key_x + 1) * level_scale;
+      T const node_y_max  = y_min + (key_y + 1) * level_scale;
+
+      if ((node_x_min > poly_x_max) || (node_x_max < poly_x_min) || (node_y_min > poly_y_max) ||
+          (node_y_max < poly_y_min)) {
+        // if no overlap, return type = none_indicator
+        return thrust::make_tuple(none_indicator, level, node_idx, bbox_idx);
+      }
+      // otherwise return type = leaf_indicator (0) or quad_indicator (1)
+      return thrust::make_tuple(is_internal_node, level, node_idx, bbox_idx);
+    });
+
+  auto num_leaves = copy_leaf_intersections(node_pairs, node_pairs + num_pairs, leaf_pairs, stream);
+
+  auto num_parents =
+    remove_non_quad_intersections(node_pairs, node_pairs + num_pairs, node_pairs, stream);
+
+  return std::make_pair(num_parents, num_leaves);
+}
+
+}  // namespace detail
+}  // namespace cuspatial
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,8 +16,6 @@
 
 #pragma once
 
-#include <indexing/construction/detail/utilities.cuh>
-
 #include <cuspatial/detail/utility/z_order.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -52,19 +50,19 @@ namespace detail {
  * @param parent_node_indices indices of the intersecting quadrants at the current level
  * @param parent_poly_indices indices of the intersecting polygons at the current level
  * @param stream CUDA stream on which to schedule work
- * @return An std::tuple containing the `cudf::size_type` number of elements in the next level, and
+ * @return A `std::tuple` containing the `int32_t` number of elements in the next level, and
  * `rmm::device_uvectors` for each of the `types`, `levels`, `quad_indices`, and `poly_indices` of
  * the next-level quadrants and polygons
  */
-template <typename LengthsIter, typename OffsetsIter>
-inline std::tuple<cudf::size_type,
+template <typename LengthsIterator, typename OffsetsIterator>
+inline std::tuple<int32_t,
                   rmm::device_uvector<uint8_t>,
                   rmm::device_uvector<uint8_t>,
                   rmm::device_uvector<uint32_t>,
                   rmm::device_uvector<uint32_t>>
-descend_quadtree(LengthsIter counts,
-                 OffsetsIter offsets,
-                 cudf::size_type num_quads,
+descend_quadtree(LengthsIterator counts,
+                 OffsetsIterator offsets,
+                 int32_t num_quads,
                  rmm::device_uvector<uint8_t>& parent_types,
                  rmm::device_uvector<uint8_t>& parent_levels,
                  rmm::device_uvector<uint32_t>& parent_node_indices,

@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuspatial/detail/iterator.hpp>
+#include <cuspatial/experimental/detail/join/intersection.cuh>
+#include <cuspatial/experimental/detail/join/traversal.cuh>
+#include <cuspatial/experimental/point_quadtree.cuh>
+#include <cuspatial/traits.hpp>
+
+#include <thrust/iterator/discard_iterator.h>
+
+#include <iterator>
+#include <utility>
+
+namespace cuspatial {
+
+template <class KeyIterator,
+          class LevelIterator,
+          class IsInternalIterator,
+          class BoundingBoxIterator,
+          class T>
+std::pair<rmm::device_uvector<uint32_t>, rmm::device_uvector<uint32_t>>
+join_quadtree_and_bounding_boxes(KeyIterator keys_first,
+                                 KeyIterator keys_last,
+                                 LevelIterator levels_first,
+                                 IsInternalIterator is_internal_nodes_first,
+                                 KeyIterator lengths_first,
+                                 KeyIterator offsets_first,
+                                 BoundingBoxIterator bounding_boxes_first,
+                                 BoundingBoxIterator bounding_boxes_last,
+                                 T x_min,
+                                 T y_min,
+                                 T scale,
+                                 int8_t max_depth,
+                                 rmm::mr::device_memory_resource* mr,
+                                 rmm::cuda_stream_view stream)
+{
+  static_assert(is_same<T, cuspatial::iterator_vec_base_type<BoundingBoxIterator>>(),
+                "Iterator value_type mismatch");
+
+  auto const num_nodes = std::distance(keys_first, keys_last);
+  auto const num_boxes = std::distance(bounding_boxes_first, bounding_boxes_last);
+
+  // Count the number of top-level nodes to start.
+  // This could be provided explicitly, but count_if should be fast enough.
+  auto num_top_level_leaves = thrust::count_if(rmm::exec_policy(stream),
+                                               levels_first,
+                                               levels_first + num_nodes,
+                                               thrust::placeholders::_1 == 0);
+
+  auto num_pairs = num_top_level_leaves * num_boxes;
+
+  int32_t num_leaves{0};
+  int32_t num_results{0};
+  int32_t num_parents{0};
+
+  // The found bbox-quad pairs are dynamic and can not be pre-allocated.
+  // Relevant arrays are resized accordingly for memory efficiency.
+
+  // Vectors for intermediate bbox and node indices at each level
+  rmm::device_uvector<uint8_t> cur_types(num_pairs, stream);
+  rmm::device_uvector<uint8_t> cur_levels(num_pairs, stream);
+  rmm::device_uvector<uint32_t> cur_node_idxs(num_pairs, stream);
+  rmm::device_uvector<uint32_t> cur_bbox_idxs(num_pairs, stream);
+
+  // Vectors for found pairs of bbox and leaf node indices
+  rmm::device_uvector<uint32_t> out_node_idxs(num_pairs, stream, mr);
+  rmm::device_uvector<uint32_t> out_bbox_idxs(num_pairs, stream, mr);
+
+  auto make_current_level_iter = [&]() {
+    return thrust::make_zip_iterator(
+      cur_types.begin(), cur_levels.begin(), cur_node_idxs.begin(), cur_bbox_idxs.begin());
+  };
+
+  auto make_output_values_iter = [&]() {
+    return num_results + thrust::make_zip_iterator(thrust::make_discard_iterator(),
+                                                   thrust::make_discard_iterator(),
+                                                   out_node_idxs.begin(),
+                                                   out_bbox_idxs.begin());
+  };
+
+  // Find intersections for all the top level quadrants and bounding boxes
+  std::tie(num_parents, num_leaves) =
+    detail::find_intersections(keys_first,
+                               levels_first,
+                               is_internal_nodes_first,
+                               bounding_boxes_first,
+                               // The top-level node indices
+                               detail::make_counting_transform_iterator(
+                                 0, [=] __device__(auto i) { return i % num_top_level_leaves; }),
+                               // The top-level bbox indices
+                               detail::make_counting_transform_iterator(
+                                 0, [=] __device__(auto i) { return i / num_top_level_leaves; }),
+                               make_current_level_iter(),  // intermediate intersections or parent
+                                                           // quadrants found during traversal
+                               // found intersecting quadrant and bbox indices for output
+                               make_output_values_iter(),
+                               num_pairs,
+                               x_min,
+                               y_min,
+                               scale,
+                               max_depth,
+                               stream);
+
+  num_results += num_leaves;
+
+  // Traverse the quadtree descending to `max_depth`, or until no more parent quadrants are found
+  for (uint8_t level{1}; level < max_depth && num_parents > 0; ++level) {
+    // Shrink the current level's vectors to overwrite elements removed by `find_intersections()`
+    cur_types.shrink_to_fit(stream);
+    cur_levels.shrink_to_fit(stream);
+    cur_node_idxs.shrink_to_fit(stream);
+    cur_bbox_idxs.shrink_to_fit(stream);
+
+    // Grow preallocated output vectors. The next level will expand out to no more
+    // than `num_parents * 4` pairs, since each parent quadrant has up to 4 children.
+    size_t max_num_results = num_results + num_parents * 4;
+    if (max_num_results > out_node_idxs.capacity()) {
+      // TODO: grow preallocated output sizes in multiples of the current capacity?
+      // auto new_size = out_node_idxs.capacity() *  //
+      //                 ((max_num_results / out_node_idxs.capacity()) + 1);
+      out_node_idxs.resize(max_num_results, stream);
+      out_bbox_idxs.resize(max_num_results, stream);
+    }
+
+    // Walk one level down and fill the current level's vectors with
+    // the next level's quadrant info and bbox indices.
+    std::tie(num_pairs, cur_types, cur_levels, cur_node_idxs, cur_bbox_idxs) =
+      detail::descend_quadtree(lengths_first,
+                               offsets_first,
+                               num_parents,
+                               cur_types,
+                               cur_levels,
+                               cur_node_idxs,
+                               cur_bbox_idxs,
+                               stream);
+
+    // Find intersections for the the next level's quadrants and bboxes
+    std::tie(num_parents, num_leaves) =
+      detail::find_intersections(keys_first,
+                                 levels_first,
+                                 is_internal_nodes_first,
+                                 bounding_boxes_first,
+                                 cur_node_idxs.begin(),
+                                 cur_bbox_idxs.begin(),
+                                 make_current_level_iter(),  // intermediate intersections or parent
+                                                             // quadrants found during traversal
+                                 // found intersecting quadrant and bbox indices for output
+                                 make_output_values_iter(),
+                                 num_pairs,
+                                 x_min,
+                                 y_min,
+                                 scale,
+                                 max_depth,
+                                 stream);
+
+    num_results += num_leaves;
+  }
+
+  // Sort the output bbox/quad indices by quadrant
+  [&]() {
+    // Copy the relevant node offsets into a temporary vector so we don't modify the quadtree column
+    rmm::device_uvector<uint32_t> tmp_node_offsets(num_results, stream);
+
+    auto const iter = thrust::make_permutation_iterator(offsets_first, out_node_idxs.begin());
+
+    thrust::copy(rmm::exec_policy(stream), iter, iter + num_results, tmp_node_offsets.begin());
+
+    thrust::stable_sort_by_key(
+      rmm::exec_policy(stream),
+      tmp_node_offsets.begin(),
+      tmp_node_offsets.end(),
+      thrust::make_zip_iterator(out_bbox_idxs.begin(), out_node_idxs.begin()));
+  }();
+
+  out_node_idxs.resize(num_results, stream);
+  out_bbox_idxs.resize(num_results, stream);
+  out_node_idxs.shrink_to_fit(stream);
+  out_bbox_idxs.shrink_to_fit(stream);
+
+  return {std::move(out_bbox_idxs), std::move(out_node_idxs)};
+}
+
+}  // namespace cuspatial