Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to CCCL 2.2.0. #14576

Merged
merged 12 commits into from
Dec 19, 2023
1 change: 1 addition & 0 deletions ci/test_cpp_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ conda activate test
set -u

CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"
Expand Down
21 changes: 4 additions & 17 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,17 +183,15 @@ rapids_cpm_init()
include(cmake/thirdparty/get_jitify.cmake)
# find nvCOMP
include(cmake/thirdparty/get_nvcomp.cmake)
# find thrust/cub
include(cmake/thirdparty/get_thrust.cmake)
# find CCCL before rmm so that we get cudf's patched version of CCCL
include(cmake/thirdparty/get_cccl.cmake)
# find rmm
include(cmake/thirdparty/get_rmm.cmake)
# find arrow
include(cmake/thirdparty/get_arrow.cmake)
# find dlpack
include(cmake/thirdparty/get_dlpack.cmake)
# find libcu++
include(cmake/thirdparty/get_libcudacxx.cmake)
# find cuCollections Should come after including thrust and libcudacxx
# find cuCollections, should come after including CCCL
include(cmake/thirdparty/get_cucollections.cmake)
# find or install GoogleTest
if(CUDF_BUILD_TESTUTIL)
Expand Down Expand Up @@ -758,7 +756,7 @@ add_dependencies(cudf jitify_preprocess_run)
# Specify the target module library dependencies
target_link_libraries(
cudf
PUBLIC ${ARROW_LIBRARIES} libcudacxx::libcudacxx cudf::Thrust rmm::rmm
PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm
PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
$<TARGET_NAME_IF_EXISTS:cuFile_interface>
)
Expand Down Expand Up @@ -1009,14 +1007,6 @@ following IMPORTED GLOBAL targets:
]=]
)

set(common_code_string
[=[
if(NOT TARGET cudf::Thrust)
thrust_create_target(cudf::Thrust FROM_OPTIONS)
endif()
]=]
)

if(CUDF_ENABLE_ARROW_PARQUET)
string(
APPEND
Expand All @@ -1040,7 +1030,6 @@ if(testing IN_LIST cudf_FIND_COMPONENTS)
endif()
]=]
)
string(APPEND install_code_string "${common_code_string}")

rapids_export(
INSTALL cudf
Expand All @@ -1064,8 +1053,6 @@ endif()
]=]
)

string(APPEND build_code_string "${common_code_string}")

rapids_export(
BUILD cudf
EXPORT_SET cudf-exports ${_components_export_string}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -12,34 +12,30 @@
# the License.
# =============================================================================

# This function finds thrust and sets any additional necessary environment variables.
function(find_and_configure_thrust)
# This function finds cccl and sets any additional necessary environment variables.
function(find_and_configure_cccl)

include(${rapids-cmake-dir}/cpm/thrust.cmake)
include(${rapids-cmake-dir}/cpm/cccl.cmake)
include(${rapids-cmake-dir}/cpm/package_override.cmake)

set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/thrust_override.json")
rapids_cpm_package_override("${cudf_patch_dir}/cccl_override.json")

# Make sure we install thrust into the `include/libcudf` subdirectory instead of the default
# Make sure we install cccl into the `include/libcudf` subdirectory instead of the default
include(GNUInstallDirs)
set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf")
set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib")

# Find or install Thrust with our custom set of patches
rapids_cpm_thrust(
NAMESPACE cudf
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
)
# Find or install CCCL with our custom set of patches
rapids_cpm_cccl(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)

# Store where CMake can find our custom Thrust install
# Store where CMake can find our custom CCCL install
include("${rapids-cmake-dir}/export/find_package_root.cmake")
rapids_export_find_package_root(
INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/thrust]=]
INSTALL CCCL [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/rapids/cmake/cccl]=]
EXPORT_SET cudf-exports
CONDITION Thrust_SOURCE_DIR
CONDITION CCCL_SOURCE_DIR
)
endfunction()

find_and_configure_thrust()
find_and_configure_cccl()
35 changes: 0 additions & 35 deletions cpp/cmake/thirdparty/get_libcudacxx.cmake

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@

{
"packages" : {
"Thrust" : {
"CCCL" : {
"patches" : [
{
"file" : "Thrust/install_rules.diff",
"issue" : "Thrust 1.X installs incorrect files [https://github.com/NVIDIA/thrust/issues/1790]",
"fixed_in" : "2.0.0"
"file" : "cccl/bug_fixes.diff",
"issue" : "CCCL installs header-search.cmake files in nondeterministic order and has a typo in checking target creation that leads to duplicates",
"fixed_in" : "2.3"
},
{
"file" : "${current_json_dir}/thrust_transform_iter_with_reduce_by_key.diff",
"issue" : "Support transform_output_iterator as output of reduce by key [https://github.com/NVIDIA/thrust/pull/1805]",
"fixed_in" : "2.1"
"file" : "cccl/revert_pr_211.diff",
"issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
Expand All @@ -27,11 +27,6 @@
"file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
"fixed_in" : ""
},
{
"file" : "${current_json_dir}/cub_segmented_sort_with_bool_key.diff",
"issue" : "Fix an error in CUB DeviceSegmentedSort when the keys are bool type [https://github.com/NVIDIA/cub/issues/594]",
"fixed_in" : "2.1"
}
]
}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,29 +1,25 @@
diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h
index d0e3f94..76774b0 100644
--- a/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/system/cuda/detail/dispatch.h
@@ -32,9 +32,8 @@
diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
index d0e3f94ec..5c32a9c60 100644
--- a/thrust/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/thrust/system/cuda/detail/dispatch.h
@@ -32,8 +32,7 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
- status = call arguments; \
- }
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+ }
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
vyasr marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
@@ -52,10 +51,8 @@
@@ -52,9 +51,7 @@
status = call arguments; \
} \
else { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
- status = call arguments; \
- }
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+ }
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}
/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
* implementation. This version allows using different token sequences for callables
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
index b188c75f..3f36656f 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy
diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
index 84b6ccffd..25a237f93 100644
--- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
@@ -808,7 +808,7 @@ struct DeviceRadixSortPolicy


/// SM60 (GP100)
Expand All @@ -11,29 +11,29 @@ index b188c75f..3f36656f 100644
{
enum {
PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100)
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
index e0470ccb..6a0c2ed6 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
@@ -280,7 +280,7 @@ struct DeviceReducePolicy
};
diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh
index 994adc095..d3e6719a7 100644
--- a/cub/cub/device/dispatch/dispatch_reduce.cuh
+++ b/cub/cub/device/dispatch/dispatch_reduce.cuh
@@ -479,7 +479,7 @@ struct DeviceReducePolicy
};

/// SM60
- struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
// ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items)
typedef AgentReducePolicy<
diff --git a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
index c2d04588..ac2d10e0 100644
--- a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
+++ b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
@@ -177,7 +177,7 @@ struct DeviceScanPolicy
};
/// SM60
- struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
{
static constexpr int threads_per_block = 256;
static constexpr int items_per_thread = 16;
diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
index 0ea5c41ad..1bcd8a111 100644
--- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh
+++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
@@ -303,7 +303,7 @@ struct DeviceScanPolicy
/// SM600
struct Policy600
: DefaultTuning
- , ChainedPolicy<600, Policy600, Policy520>
+ , ChainedPolicy<600, Policy600, Policy600>
{};

/// SM600
- struct Policy600 : ChainedPolicy<600, Policy600, Policy520>
+ struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
vyasr marked this conversation as resolved.
Show resolved Hide resolved
{
typedef AgentScanPolicy<
128, 15, ///< Threads per block, items per thread
/// SM800
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
diff --git a/dependencies/cub/cub/block/block_merge_sort.cuh b/dependencies/cub/cub/block/block_merge_sort.cuh
index 4769df36..d86d6342 100644
--- a/dependencies/cub/cub/block/block_merge_sort.cuh
+++ b/dependencies/cub/cub/block/block_merge_sort.cuh
diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
index dc07ef6c2..a066c14da 100644
--- a/cub/cub/block/block_merge_sort.cuh
+++ b/cub/cub/block/block_merge_sort.cuh
@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared,
KeyT key1 = keys_shared[keys1_beg];
KeyT key2 = keys_shared[keys2_beg];
Expand All @@ -20,19 +20,10 @@ index 4769df36..d86d6342 100644
for (int item = 1; item < ITEMS_PER_THREAD; ++item)
{
if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
@@ -407,7 +407,7 @@ public:
// each thread has sorted keys
// merge sort keys in shared memory
//
- #pragma unroll
+ #pragma unroll 1
vyasr marked this conversation as resolved.
Show resolved Hide resolved
for (int target_merged_threads_number = 2;
target_merged_threads_number <= NUM_THREADS;
target_merged_threads_number *= 2)
diff --git a/dependencies/cub/cub/thread/thread_sort.cuh b/dependencies/cub/cub/thread/thread_sort.cuh
index 5d486789..b42fb5f0 100644
--- a/dependencies/cub/cub/thread/thread_sort.cuh
+++ b/dependencies/cub/cub/thread/thread_sort.cuh
diff --git a/cub/cub/thread/thread_sort.cuh b/cub/cub/thread/thread_sort.cuh
index 5d4867896..b42fb5f00 100644
--- a/cub/cub/thread/thread_sort.cuh
+++ b/cub/cub/thread/thread_sort.cuh
@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD],
{
constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;
Expand Down

This file was deleted.