Template and generalize Morton code generation

arborx · aprokop · Aug 29, 2022 · Jul 28, 2022 · Jul 28, 2022 · Aug 25, 2022
commit 1cd771c174e6ca8dc5052ba8ac7cb172d65761a4
diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp
@@ -148,7 +148,7 @@ BasicBoundingVolumeHierarchy<MemorySpace, BoundingVolume, Enable>::
   static_assert(KokkosExt::is_accessible_from<typename Access::memory_space,
                                               ExecutionSpace>::value,
                 "Primitives must be accessible from the execution space");
-  Details::check_valid_space_filling_curve(curve);
+  Details::check_valid_space_filling_curve<SpaceFillingCurve, Box>(curve);
 
   KokkosExt::ScopedProfileRegion guard("ArborX::BVH::BVH");
 
@@ -185,7 +185,7 @@ BasicBoundingVolumeHierarchy<MemorySpace, BoundingVolume, Enable>::
   // map primitives from multidimensional domain to one-dimensional interval
   using LinearOrderingValueType = Kokkos::detected_t<
       Details::SpaceFillingCurveProjectionArchetypeExpression,
-      SpaceFillingCurve, Point>;
+      SpaceFillingCurve, Box, Point>;
   Kokkos::View<LinearOrderingValueType *, MemorySpace> linear_ordering_indices(
       Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
                          "ArborX::BVH::BVH::linear_ordering"),

diff --git a/src/details/ArborX_DetailsBatchedQueries.hpp b/src/details/ArborX_DetailsBatchedQueries.hpp
@@ -60,7 +60,7 @@ struct BatchedQueries
 
     using LinearOrderingValueType =
         Kokkos::detected_t<SpaceFillingCurveProjectionArchetypeExpression,
-                           SpaceFillingCurve, Point>;
+                           SpaceFillingCurve, Box, Point>;
     Kokkos::View<LinearOrderingValueType *, DeviceType> linear_ordering_indices(
         Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
                            "ArborX::BVH::query::linear_ordering"),

diff --git a/src/details/ArborX_DetailsMortonCode.hpp b/src/details/ArborX_DetailsMortonCode.hpp
@@ -13,6 +13,7 @@
 #define ARBORX_DETAILS_MORTON_CODE_UTILS_HPP
 
 #include <ArborX_DetailsKokkosExtMinMaxOperations.hpp> // min. max
+#include <ArborX_GeometryTraits.hpp>
 
 namespace ArborX
 {
@@ -23,9 +24,12 @@ namespace Details
 // Magic numbers generated by the script in
 // https://stackoverflow.com/questions/1024754/how-to-compute-a-3d-morton-number-interleave-the-bits-of-3-ints/18528775#18528775
 
+template <int DIM>
+KOKKOS_INLINE_FUNCTION unsigned int expandBitsBy(unsigned int);
+
 // Insert one 0 bit after each of the 16 low bits of x
-KOKKOS_INLINE_FUNCTION
-unsigned int expandBitsBy1(unsigned int x)
+template <>
+KOKKOS_INLINE_FUNCTION unsigned int expandBitsBy<1>(unsigned int x)
 {
   x &= 0x0000ffffu;
   x = (x ^ (x << 8)) & 0x00ff00ffu;
@@ -35,9 +39,24 @@ unsigned int expandBitsBy1(unsigned int x)
   return x;
 }
 
+// Insert two 0 bits after each of the 10 low bits of x
+template <>
+KOKKOS_INLINE_FUNCTION unsigned int expandBitsBy<2>(unsigned int x)
+{
+  x &= 0x000003ffu;
+  x = (x ^ (x << 16)) & 0xff0000ffu;
+  x = (x ^ (x << 8)) & 0x0300f00fu;
+  x = (x ^ (x << 4)) & 0x030c30c3u;
+  x = (x ^ (x << 2)) & 0x09249249u;
+  return x;
+}
+
+template <int DIM>
+KOKKOS_INLINE_FUNCTION unsigned long long expandBitsBy(unsigned long long);
+
 // Insert one 0 bit after each of the 31 low bits of x
-KOKKOS_INLINE_FUNCTION
-unsigned long long expandBitsBy1(unsigned long long x)
+template <>
+KOKKOS_INLINE_FUNCTION unsigned long long expandBitsBy<1>(unsigned long long x)
 {
   x &= 0x7fffffffllu;
   x = (x | x << 16) & 0x7fff0000ffffllu;
@@ -48,21 +67,9 @@ unsigned long long expandBitsBy1(unsigned long long x)
   return x;
 }
 
-// Insert two 0 bits after each of the 10 low bits of x
-KOKKOS_INLINE_FUNCTION
-unsigned int expandBitsBy2(unsigned int x)
-{
-  x &= 0x000003ffu;
-  x = (x ^ (x << 16)) & 0xff0000ffu;
-  x = (x ^ (x << 8)) & 0x0300f00fu;
-  x = (x ^ (x << 4)) & 0x030c30c3u;
-  x = (x ^ (x << 2)) & 0x09249249u;
-  return x;
-}
-
 // Insert two 0 bits after each of the 21 low bits of x
-KOKKOS_INLINE_FUNCTION
-unsigned long long expandBitsBy2(unsigned long long x)
+template <>
+KOKKOS_INLINE_FUNCTION unsigned long long expandBitsBy<2>(unsigned long long x)
 {
   x &= 0x1fffffllu;
   x = (x | x << 32) & 0x1f00000000ffffllu;
@@ -73,79 +80,110 @@ unsigned long long expandBitsBy2(unsigned long long x)
   return x;
 }
 
-// Calculate a 32-bit Morton code for a 2D point located within [0, 1]^2.
-KOKKOS_INLINE_FUNCTION
-unsigned int morton32(float x, float y)
+// Insert three 0 bits after each of the 15 low bits of x
+template <>
+KOKKOS_INLINE_FUNCTION unsigned long long expandBitsBy<3>(unsigned long long x)
 {
-  // The interval [0,1] is subdivided into 65,536 bins (in each direction).
-  constexpr unsigned N = (1u << 16);
-
-  using KokkosExt::max;
-  using KokkosExt::min;
+  x &= 0x7fffllu;
+  x = (x | x << 32) & 0x7800000007ffllu;
+  x = (x | x << 16) & 0x780007c0003fllu;
+  x = (x | x << 8) & 0x40380700c03807llu;
+  x = (x | x << 4) & 0x43084308430843llu;
+  x = (x | x << 2) & 0x109090909090909llu;
+  x = (x | x << 1) & 0x111111111111111llu;
+  return x;
+}
 
-  x = min(max(x * N, 0.f), (float)N - 1);
-  y = min(max(y * N, 0.f), (float)N - 1);
+// Insert four 0 bits after each of the 12 low bits of x
+template <>
+KOKKOS_INLINE_FUNCTION unsigned long long expandBitsBy<4>(unsigned long long x)
+{
+  x &= 0xfffllu;
+  x = (x | x << 32) & 0xf00000000ffllu;
+  x = (x | x << 16) & 0xf0000f0000fllu;
+  x = (x | x << 8) & 0xc0300c0300c03llu;
+  x = (x | x << 4) & 0x84210842108421llu;
+  return x;
+}
 
-  return 2 * expandBitsBy1((unsigned int)x) + expandBitsBy1((unsigned int)y);
+// Insert five 0 bits after each of the 10 low bits of x
+template <>
+KOKKOS_INLINE_FUNCTION unsigned long long expandBitsBy<5>(unsigned long long x)
+{
+  x &= 0x3ffllu;
+  x = (x | x << 32) & 0x3800000007fllu;
+  x = (x | x << 16) & 0x3800070000fllu;
+  x = (x | x << 8) & 0x3008060100c03llu;
+  x = (x | x << 4) & 0x21008421008421llu;
+  x = (x | x << 2) & 0x21021021021021llu;
+  x = (x | x << 1) & 0x41041041041041llu;
+  return x;
 }
 
-// Calculate a 30-bit Morton code for a 3D point located within [0, 1]^3.
-KOKKOS_INLINE_FUNCTION
-unsigned int morton32(float x, float y, float z)
+template <typename Point,
+          typename Enable = std::enable_if_t<GeometryTraits::is_point<Point>{}>>
+KOKKOS_INLINE_FUNCTION unsigned int morton32(Point const &p)
 {
-  // The interval [0,1] is subdivided into 1024 bins (in each direction).
-  constexpr unsigned N = (1u << 10);
+  constexpr int DIM = GeometryTraits::dimension<Point>::value;
+  constexpr unsigned N = (1u << (32 / DIM));
 
   using KokkosExt::max;
   using KokkosExt::min;
 
-  x = min(max(x * N, 0.f), (float)N - 1);
-  y = min(max(y * N, 0.f), (float)N - 1);
-  z = min(max(z * N, 0.f), (float)N - 1);
+  unsigned int r = 0;
+  for (int d = 0; d < DIM; ++d)
+  {
+    auto x = min(max(p[d] * N, 0.f), (float)N - 1);
+    r += (expandBitsBy<DIM - 1>((unsigned int)x) << (DIM - d - 1));
+  }
 
-  return 4 * expandBitsBy2((unsigned)x) + 2 * expandBitsBy2((unsigned)y) +
-         expandBitsBy2((unsigned)z);
+  return r;
 }
 
-// Calculate a 62-bit Morton code for a 2D point located within [0, 1]^2.
-KOKKOS_INLINE_FUNCTION
-unsigned long long morton64(float x, float y)
+template <
+    typename Point,
+    std::enable_if_t<GeometryTraits::is_point<Point>{} &&
+                     GeometryTraits::dimension<Point>::value != 2> * = nullptr>
+KOKKOS_INLINE_FUNCTION unsigned long long morton64(Point const &p)
 {
-  // The interval [0,1] is subdivided into 2,147,483,648 bins (in each
-  // direction).
-  constexpr unsigned N = (1u << 31);
+  constexpr int DIM = GeometryTraits::dimension<Point>::value;
+  constexpr unsigned N = (1u << (63 / DIM));
 
   using KokkosExt::max;
   using KokkosExt::min;
 
-  // Have to use double as float is not sufficient to represent large integers,
-  // which would result in some missing bins.
-  double xd = min(max((double)x * N, 0.), (double)N - 1);
-  double yd = min(max((double)y * N, 0.), (double)N - 1);
+  unsigned long long r = 0;
+  for (int d = 0; d < DIM; ++d)
+  {
+    auto x = min(max(p[d] * N, 0.f), (float)N - 1);
+    r += (expandBitsBy<DIM - 1>((unsigned long long)x) << (DIM - d - 1));
+  }
 
-  return 2 * expandBitsBy1((unsigned long long)xd) +
-         expandBitsBy1((unsigned long long)yd);
+  return r;
 }
 
-// Calculate a 63-bit Morton code for a 3D point located within [0, 1]^3.
-KOKKOS_INLINE_FUNCTION
-unsigned long long morton64(float x, float y, float z)
+// Calculate a 62-bit Morton code for a 2D point located within [0, 1]^2.
+// Special case because it needs double.
+template <
+    typename Point,
+    std::enable_if_t<GeometryTraits::is_point<Point>{} &&
+                     GeometryTraits::dimension<Point>::value == 2> * = nullptr>
+KOKKOS_INLINE_FUNCTION unsigned long long morton64(Point const &p)
 {
-  // The interval [0,1] is subdivided into 2,097,152 bins (in each direction).
-  constexpr unsigned N = (1u << 21);
+  // The interval [0,1] is subdivided into 2,147,483,648 bins (in each
+  // direction).
+  constexpr unsigned N = (1u << 31);
 
   using KokkosExt::max;
   using KokkosExt::min;
 
-  // float is sufficient to represent all integers up to 16,777,216 (2^24),
-  // which is greater than N. So there is no need to use double here.
-  x = min(max(x * N, 0.f), (float)N - 1);
-  y = min(max(y * N, 0.f), (float)N - 1);
-  z = min(max(z * N, 0.f), (float)N - 1);
+  // Have to use double as float is not sufficient to represent large
+  // integers, which would result in some missing bins.
+  auto xd = min(max((double)p[0] * N, 0.), (double)N - 1);
+  auto yd = min(max((double)p[1] * N, 0.), (double)N - 1);
 
-  return 4 * expandBitsBy2((unsigned long long)x) +
-         2 * expandBitsBy2((unsigned long long)y) +
-         expandBitsBy2((unsigned long long)z);
+  return 2 * expandBitsBy<1>((unsigned long long)xd) +
+         expandBitsBy<1>((unsigned long long)yd);
 }
 
 } // namespace Details

diff --git a/src/details/ArborX_SpaceFillingCurves.hpp b/src/details/ArborX_SpaceFillingCurves.hpp
@@ -12,7 +12,6 @@
 #ifndef ARBORX_SPACE_FILLING_CURVES_HPP
 #define ARBORX_SPACE_FILLING_CURVES_HPP
 
-#include <ArborX_Box.hpp>
 #include <ArborX_DetailsAlgorithms.hpp>
 #include <ArborX_DetailsMortonCode.hpp>
 
@@ -26,35 +25,41 @@ namespace Experimental
 
 struct Morton32
 {
+  template <typename Box, typename Point,
+            std::enable_if_t<GeometryTraits::is_point<Point>{}> * = nullptr>
   KOKKOS_FUNCTION auto operator()(Box const &scene_bounding_box, Point p) const
   {
     Details::translateAndScale(p, p, scene_bounding_box);
-    return Details::morton32(p[0], p[1], p[2]);
+    return Details::morton32(p);
   }
-  template <typename Geometry>
+  template <typename Box, typename Geometry,
+            std::enable_if_t<!GeometryTraits::is_point<Geometry>{}> * = nullptr>
   KOKKOS_FUNCTION auto operator()(Box const &scene_bounding_box,
                                   Geometry const &geometry) const
   {
     auto p = Details::returnCentroid(geometry);
     Details::translateAndScale(p, p, scene_bounding_box);
-    return Details::morton32(p[0], p[1], p[2]);
+    return Details::morton32(p);
   }
 };
 
 struct Morton64
 {
+  template <typename Box, typename Point,
+            std::enable_if_t<GeometryTraits::is_point<Point>{}> * = nullptr>
   KOKKOS_FUNCTION auto operator()(Box const &scene_bounding_box, Point p) const
   {
     Details::translateAndScale(p, p, scene_bounding_box);
-    return Details::morton64(p[0], p[1], p[2]);
+    return Details::morton64(p);
   }
-  template <class Geometry>
+  template <typename Box, class Geometry,
+            std::enable_if_t<!GeometryTraits::is_point<Geometry>{}> * = nullptr>
   KOKKOS_FUNCTION auto operator()(Box const &scene_bounding_box,
                                   Geometry const &geometry) const
   {
     auto p = Details::returnCentroid(geometry);
     Details::translateAndScale(p, p, scene_bounding_box);
-    return Details::morton64(p[0], p[1], p[2]);
+    return Details::morton64(p);
   }
 };
 
@@ -63,30 +68,32 @@ struct Morton64
 namespace Details
 {
 
-template <class SpaceFillingCurve, class Geometry>
+template <class SpaceFillingCurve, typename Box, class Geometry>
 using SpaceFillingCurveProjectionArchetypeExpression =
     decltype(std::declval<SpaceFillingCurve const &>()(
         std::declval<Box const &>(), std::declval<Geometry const &>()));
 
-template <class SpaceFillingCurve>
+template <class SpaceFillingCurve, typename Box>
 void check_valid_space_filling_curve(SpaceFillingCurve const &)
 {
+  using Point = std::decay_t<decltype(std::declval<Box>().minCorner())>;
+
   static_assert(
       Kokkos::is_detected<SpaceFillingCurveProjectionArchetypeExpression,
-                          SpaceFillingCurve, Point>::value);
+                          SpaceFillingCurve, Box, Point>::value);
   static_assert(
       Kokkos::is_detected<SpaceFillingCurveProjectionArchetypeExpression,
-                          SpaceFillingCurve, Box>::value);
+                          SpaceFillingCurve, Box, Box>::value);
   using OrderValueType =
       Kokkos::detected_t<SpaceFillingCurveProjectionArchetypeExpression,
-                         SpaceFillingCurve, Point>;
+                         SpaceFillingCurve, Box, Point>;
   static_assert(std::is_same<OrderValueType, unsigned int>::value ||
                 std::is_same<OrderValueType, unsigned long long>::value);
   static_assert(
       std::is_same<
           OrderValueType,
           Kokkos::detected_t<SpaceFillingCurveProjectionArchetypeExpression,
-                             SpaceFillingCurve, Box>>::value);
+                             SpaceFillingCurve, Box, Box>>::value);
 }
 
 } // namespace Details