From 23bdc22469c00201b581156b10b3d3d977e39088 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 5 Apr 2024 16:28:51 -0700 Subject: [PATCH] add patch to unwrap nested tuples of iterators --- .../unwrap_nested_tuple_of_iterators.diff | 376 ++++++++++++++++++ rapids-cmake/cpm/versions.json | 11 +- 2 files changed, 384 insertions(+), 3 deletions(-) create mode 100644 rapids-cmake/cpm/patches/cccl/unwrap_nested_tuple_of_iterators.diff diff --git a/rapids-cmake/cpm/patches/cccl/unwrap_nested_tuple_of_iterators.diff b/rapids-cmake/cpm/patches/cccl/unwrap_nested_tuple_of_iterators.diff new file mode 100644 index 000000000..64de95659 --- /dev/null +++ b/rapids-cmake/cpm/patches/cccl/unwrap_nested_tuple_of_iterators.diff @@ -0,0 +1,376 @@ +diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/tuple b/libcudacxx/include/cuda/std/detail/libcxx/include/tuple +index a02f31fa8..a3e6f6e5d 100644 +--- a/libcudacxx/include/cuda/std/detail/libcxx/include/tuple ++++ b/libcudacxx/include/cuda/std/detail/libcxx/include/tuple +@@ -197,6 +197,10 @@ template + + _LIBCUDACXX_BEGIN_NAMESPACE_STD + ++template ++struct __is_tuple_of_iterator_references : false_type ++{}; ++ + // __tuple_leaf + struct __tuple_leaf_default_constructor_tag {}; + +@@ -851,6 +855,15 @@ public: + _Tp...>::template __tuple_like_constraints<_Tuple>, + __invalid_tuple_constraints>; + ++ // Horrible hack to make tuple_of_iterator_references work ++ template ::value, int> = 0, ++ __enable_if_t<(tuple_size<_TupleOfIteratorReferences>::value == sizeof...(_Tp)), int> = 0> ++ _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR_AFTER_CXX11 tuple(_TupleOfIteratorReferences&& __t) ++ : tuple(_CUDA_VSTD::forward<_TupleOfIteratorReferences>(__t).template __to_tuple<_Tp...>( ++ __make_tuple_indices_t())) ++ {} ++ + template < + class _Tuple, class _Constraints = __tuple_like_constraints<_Tuple>, + __enable_if_t::value, int> = 0, +diff --git a/thrust/testing/zip_function.cu b/thrust/testing/zip_function.cu +index a1545a1a1..9f038f907 100644 +--- a/thrust/testing/zip_function.cu ++++ b/thrust/testing/zip_function.cu +@@ -2,13 +2,17 @@ + + #if THRUST_CPP_DIALECT >= 2011 && !defined(THRUST_LEGACY_GCC) + +-#include ++#include + #include ++#include ++#include + #include + #include + + #include + ++#include ++ + using namespace unittest; + + struct SumThree +@@ -67,4 +71,98 @@ struct TestZipFunctionTransform + }; + VariableUnitTest TestZipFunctionTransformInstance; + ++struct RemovePred ++{ ++ __host__ __device__ bool operator()(const thrust::tuple& ele1, const float&) ++ { ++ return thrust::get<0>(ele1) == thrust::get<1>(ele1); ++ } ++}; ++template ++struct TestZipFunctionMixed ++{ ++ void operator()() ++ { ++ thrust::device_vector vecA{0, 0, 2, 0}; ++ thrust::device_vector vecB{0, 2, 2, 2}; ++ thrust::device_vector vecC{88.0f, 88.0f, 89.0f, 89.0f}; ++ thrust::device_vector expected{88.0f, 89.0f}; ++ ++ auto inputKeyItBegin = ++ thrust::make_zip_iterator(thrust::make_zip_iterator(vecA.begin(), vecB.begin()), vecC.begin()); ++ auto endIt = ++ thrust::remove_if(inputKeyItBegin, inputKeyItBegin + vecA.size(), thrust::make_zip_function(RemovePred{})); ++ auto numEle = endIt - inputKeyItBegin; ++ vecA.resize(numEle); ++ vecB.resize(numEle); ++ vecC.resize(numEle); ++ ++ ASSERT_EQUAL(numEle, 2); ++ ASSERT_EQUAL(vecC, expected); ++ } ++}; ++SimpleUnitTest > TestZipFunctionMixedInstance; ++ ++struct NestedFunctionCall ++{ ++ __host__ __device__ bool ++ operator()(const thrust::tuple, thrust::tuple>>& idAndPt) ++ { ++ thrust::tuple, thrust::tuple> ele1 = thrust::get<1>(idAndPt); ++ thrust::tuple p1 = thrust::get<0>(ele1); ++ thrust::tuple p2 = thrust::get<1>(ele1); ++ return thrust::get<0>(p1) == thrust::get<0>(p2) || thrust::get<1>(p1) == thrust::get<1>(p2); ++ } ++}; ++ ++template ++struct TestNestedZipFunction ++{ ++ void operator()() ++ { ++ thrust::device_vector PX{0, 1, 2, 3}; ++ thrust::device_vector PY{0, 1, 2, 2}; ++ thrust::device_vector SS{0, 1, 2}; ++ thrust::device_vector ST{1, 2, 3}; ++ thrust::device_vector vecC{88.0f, 88.0f, 89.0f, 89.0f}; ++ ++ auto segIt = thrust::make_zip_iterator( ++ thrust::make_zip_iterator(thrust::make_permutation_iterator(PX.begin(), SS.begin()), ++ thrust::make_permutation_iterator(PY.begin(), SS.begin())), ++ thrust::make_zip_iterator(thrust::make_permutation_iterator(PX.begin(), ST.begin()), ++ thrust::make_permutation_iterator(PY.begin(), ST.begin()))); ++ auto idAndSegIt = thrust::make_zip_iterator(thrust::make_counting_iterator(0u), segIt); ++ ++ thrust::device_vector isMH{false, false, false}; ++ thrust::device_vector expected{false, false, true}; ++ thrust::transform(idAndSegIt, idAndSegIt + SS.size(), isMH.begin(), NestedFunctionCall{}); ++ ASSERT_EQUAL(isMH, expected); ++ } ++}; ++SimpleUnitTest > TestNestedZipFunctionInstance; ++ ++struct SortPred { ++ __device__ __forceinline__ ++ bool operator()(const thrust::tuple, int>& a, ++ const thrust::tuple, int>& b) { ++ return thrust::get<1>(a) < thrust::get<1>(b); ++ } ++}; ++template ++struct TestNestedZipFunction2 ++{ ++ void operator()() ++ { ++ thrust::device_vector A(5); ++ thrust::device_vector B(5); ++ thrust::device_vector C(5); ++ auto n = A.size(); ++ ++ auto tupleIt = thrust::make_zip_iterator(cuda::std::begin(A), cuda::std::begin(B)); ++ auto nestedTupleIt = thrust::make_zip_iterator(tupleIt, cuda::std::begin(C)); ++ thrust::sort(nestedTupleIt, nestedTupleIt + n, SortPred{}); ++ } ++}; ++SimpleUnitTest > TestNestedZipFunctionInstance2; ++ + #endif // THRUST_CPP_DIALECT +diff --git a/thrust/thrust/iterator/detail/tuple_of_iterator_references.h b/thrust/thrust/iterator/detail/tuple_of_iterator_references.h +index 1bb721909..91f4fcc65 100644 +--- a/thrust/thrust/iterator/detail/tuple_of_iterator_references.h ++++ b/thrust/thrust/iterator/detail/tuple_of_iterator_references.h +@@ -26,111 +26,124 @@ + # pragma system_header + #endif // no system header + +-#include +-#include +- +-#include +-#include +-#include + #include ++#include ++#include ++#include ++ ++#include ++#include + + THRUST_NAMESPACE_BEGIN + + namespace detail + { + +-template< +- typename... Ts +-> +- class tuple_of_iterator_references : public thrust::tuple ++template ++class tuple_of_iterator_references; ++ ++template ++struct maybe_unwrap_nested ++{ ++ __host__ __device__ U operator()(const T& t) const ++ { ++ return t; ++ } ++}; ++ ++template ++struct maybe_unwrap_nested, tuple_of_iterator_references> + { +- public: +- using super_t = thrust::tuple; +- using super_t::super_t; ++ __host__ __device__ thrust::tuple operator()(const tuple_of_iterator_references& t) const ++ { ++ return t.template __to_tuple(typename ::cuda::std::__make_tuple_indices::type{}); ++ } ++}; + +- inline __host__ __device__ +- tuple_of_iterator_references() ++template < typename... Ts > ++class tuple_of_iterator_references : public thrust::tuple ++{ ++public: ++ using super_t = thrust::tuple; ++ using super_t::super_t; ++ ++ inline __host__ __device__ tuple_of_iterator_references() + : super_t() +- {} ++ {} + +- // allow implicit construction from tuple +- inline __host__ __device__ +- tuple_of_iterator_references(const super_t& other) ++ // allow implicit construction from tuple ++ inline __host__ __device__ tuple_of_iterator_references(const super_t& other) + : super_t(other) +- {} ++ {} + +- inline __host__ __device__ +- tuple_of_iterator_references(super_t&& other) ++ inline __host__ __device__ tuple_of_iterator_references(super_t&& other) + : super_t(::cuda::std::move(other)) +- {} +- +- // allow assignment from tuples +- // XXX might be worthwhile to guard this with an enable_if is_assignable +- __thrust_exec_check_disable__ +- template +- inline __host__ __device__ +- tuple_of_iterator_references &operator=(const thrust::tuple &other) +- { +- super_t::operator=(other); +- return *this; +- } +- +- // allow assignment from pairs +- // XXX might be worthwhile to guard this with an enable_if is_assignable +- __thrust_exec_check_disable__ +- template +- inline __host__ __device__ +- tuple_of_iterator_references &operator=(const thrust::pair &other) +- { +- super_t::operator=(other); +- return *this; +- } +- +- // allow assignment from reference +- // XXX perhaps we should generalize to reference +- // we could captures reference this way +- __thrust_exec_check_disable__ +- template +- inline __host__ __device__ +- tuple_of_iterator_references& +- operator=(const thrust::reference, Pointer, Derived> &other) +- { +- typedef thrust::tuple tuple_type; +- +- // XXX perhaps this could be accelerated +- super_t::operator=(tuple_type{other}); +- return *this; +- } +- +- template = 0> +- inline __host__ __device__ +- constexpr operator thrust::tuple() const { +- return to_tuple(typename ::cuda::std::__make_tuple_indices::type{}); +- } +- +- // this overload of swap() permits swapping tuple_of_iterator_references returned as temporaries from +- // iterator dereferences +- template +- inline __host__ __device__ +- friend void swap(tuple_of_iterator_references&& x, tuple_of_iterator_references&& y) +- { +- x.swap(y); +- } +- +-private: +- template +- inline __host__ __device__ +- constexpr thrust::tuple to_tuple(::cuda::std::__tuple_indices) const { +- return {get(*this)...}; +- } ++ {} ++ ++ // allow assignment from tuples ++ // XXX might be worthwhile to guard this with an enable_if is_assignable ++ __thrust_exec_check_disable__ template ++ inline __host__ __device__ tuple_of_iterator_references& operator=(const thrust::tuple& other) ++ { ++ super_t::operator=(other); ++ return *this; ++ } ++ ++ // allow assignment from pairs ++ // XXX might be worthwhile to guard this with an enable_if is_assignable ++ __thrust_exec_check_disable__ template ++ inline __host__ __device__ tuple_of_iterator_references& operator=(const thrust::pair& other) ++ { ++ super_t::operator=(other); ++ return *this; ++ } ++ ++ // allow assignment from reference ++ // XXX perhaps we should generalize to reference ++ // we could captures reference this way ++ __thrust_exec_check_disable__ template ++ inline __host__ __device__ tuple_of_iterator_references& ++ operator=(const thrust::reference, Pointer, Derived>& other) ++ { ++ typedef thrust::tuple tuple_type; ++ ++ // XXX perhaps this could be accelerated ++ super_t::operator=(tuple_type{other}); ++ return *this; ++ } ++ ++ template = 0> ++ inline __host__ __device__ constexpr operator thrust::tuple() const ++ { ++ return __to_tuple(typename ::cuda::std::__make_tuple_indices::type{}); ++ } ++ ++ // this overload of swap() permits swapping tuple_of_iterator_references returned as temporaries from ++ // iterator dereferences ++ template ++ inline __host__ __device__ friend void swap(tuple_of_iterator_references&& x, tuple_of_iterator_references&& y) ++ { ++ x.swap(y); ++ } ++ ++ template ++ inline __host__ __device__ constexpr thrust::tuple __to_tuple(::cuda::std::__tuple_indices) const ++ { ++ return {maybe_unwrap_nested{}(get(*this))...}; ++ } + }; + +-} // end detail ++} // namespace detail + + THRUST_NAMESPACE_END + + _LIBCUDACXX_BEGIN_NAMESPACE_STD + ++template ++struct __is_tuple_of_iterator_references> ++ : integral_constant ++{}; ++ + // define tuple_size, tuple_element, etc. + template + struct tuple_size> +@@ -145,7 +158,8 @@ struct tuple_element + struct tuple_size> diff --git a/rapids-cmake/cpm/versions.json b/rapids-cmake/cpm/versions.json index 19d99ea2c..519addd82 100644 --- a/rapids-cmake/cpm/versions.json +++ b/rapids-cmake/cpm/versions.json @@ -17,7 +17,7 @@ }, { "file" : "cccl/hide_kernels.diff", - "issue" : "Mark all cub and thrust kernels with hidden visibility [https://github.com/nvidia/cccl/pulls/443]", + "issue" : "Mark all cub and thrust kernels with hidden visibility [https://github.com/nvidia/cccl/pull/443]", "fixed_in" : "2.3" }, { @@ -27,13 +27,18 @@ }, { "file" : "cccl/backport_pr_1499.diff", - "issue" : "Fix issues with ambiguous calls to addressof in thrust::optional.", + "issue" : "Fix issues with ambiguous calls to addressof in thrust::optional. [https://github.com/NVIDIA/cccl/pull/1499]", "fixed_in" : "2.4" }, { "file" : "cccl/device_nullopt.diff", - "issue" : "Ensure cuda::std::nullopt is visible in device code.", + "issue" : "Ensure cuda::std::nullopt is visible in device code. [https://github.com/NVIDIA/cccl/pull/1598]", "fixed_in" : "" + }, + { + "file" : "cccl/unwrap_nested_tuple_of_iterators.diff [https://github.com/NVIDIA/cccl/pull/1469]", + "issue" : "Unwrap nested thrust::tuple_of_iterator_references.", + "fixed_in" : "2.4" } ] },