From 2c97abeb1a1b6d03b73f38813420b784feb33e87 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 10 Jan 2023 18:57:23 -0500 Subject: [PATCH] Decoupling raft handle from underlying resources (#1111) This implements a design idea a few of us have been kicking around for a little while now to help decouple underlying resources from the raft handle and also allow users to never have to explicitly include headers for resources that are never used (such as cublas, cusolver, cusparse, comms, etc...). This effectively breaks the existing raft::handle_t into separate headers for the various resources it contains, providing functions that can be individually included and invoked on a `raft::resources`. This still allows us to write something like a `raft::device_resources` (and also allows us to maintain API compatibility in the meantime by backing the existing `raft::handle_t` with a `raft::resources`. One of the major goals of this PR is to also enable a handle to be used outside of just cuda resources and to allow for unused resources to not need to be loaded nor compiled at all into user code downstream. Follow-on work after this PR will include: 1. Updating all of RAFT's public functions to accept `raft::resources` and using the individual resource accessors instead of assuming `device_resources` everywhere. 2. Deprecating the `handle_t` in favor of the more explicit `device_resources` Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Divye Gala (https://github.com/divyegala) - Dante Gama Dessavre (https://github.com/dantegd) - William Hicks (https://github.com/wphicks) - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/raft/pull/1111 --- build.sh | 3 +- cpp/include/raft/comms/detail/test.hpp | 2 +- cpp/include/raft/core/comms.hpp | 3 +- cpp/include/raft/core/device_resources.hpp | 241 +++++++++++ cpp/include/raft/core/handle.hpp | 312 +------------- cpp/include/raft/core/resource/comms.hpp | 69 +++ .../raft/core/resource/cublas_handle.hpp | 71 +++ cpp/include/raft/core/resource/cuda_event.hpp | 38 ++ .../raft/core/resource/cuda_stream.hpp | 94 ++++ .../raft/core/resource/cuda_stream_pool.hpp | 171 ++++++++ .../raft/core/resource/cusolver_dn_handle.hpp | 75 ++++ .../raft/core/resource/cusolver_sp_handle.hpp | 74 ++++ .../raft/core/resource/cusparse_handle.hpp | 69 +++ .../resource/detail/stream_sync_event.hpp | 50 +++ cpp/include/raft/core/resource/device_id.hpp | 66 +++ .../raft/core/resource/device_properties.hpp | 68 +++ .../raft/core/resource/resource_types.hpp | 105 +++++ cpp/include/raft/core/resource/sub_comms.hpp | 72 ++++ .../raft/core/resource/thrust_policy.hpp | 64 +++ cpp/include/raft/core/resources.hpp | 128 ++++++ .../spatial/knn/detail/ivf_flat_search.cuh | 2 +- cpp/test/CMakeLists.txt | 34 +- cpp/test/{ => cluster}/cluster_solvers.cu | 9 +- .../cluster_solvers_deprecated.cu | 2 +- cpp/test/cluster/kmeans.cu | 14 +- cpp/test/cluster/linkage.cu | 14 +- cpp/test/core/handle.cpp | 251 +++++++++++ cpp/test/{ => core}/interruptible.cu | 2 +- cpp/test/{common => core}/logger.cpp | 2 +- cpp/test/{ => core}/mdarray.cu | 2 +- cpp/test/{ => core}/mdspan_utils.cu | 2 +- cpp/test/{ => core}/memory_type.cpp | 2 +- cpp/test/{ => core}/nvtx.cpp | 2 +- cpp/test/{common => core}/seive.cu | 2 +- cpp/test/{ => core}/span.cpp | 2 +- cpp/test/{ => core}/span.cu | 2 +- cpp/test/{ => core}/test_span.hpp | 2 +- cpp/test/distance/distance_base.cuh | 4 +- cpp/test/distance/fused_l2_nn.cu | 6 +- cpp/test/handle.cpp | 67 --- cpp/test/{ => linalg}/eigen_solvers.cu | 2 +- cpp/test/matrix/columnSort.cu | 4 +- cpp/test/matrix/linewise_op.cu | 4 +- cpp/test/neighbors/epsilon_neighborhood.cu | 4 +- cpp/test/neighbors/selection.cu | 92 ++-- cpp/test/random/make_blobs.cu | 4 +- cpp/test/random/multi_variable_gaussian.cu | 17 +- cpp/test/{ => sparse}/mst.cu | 4 +- cpp/test/{ => sparse}/spectral_matrix.cu | 2 +- cpp/test/stats/cov.cu | 6 +- cpp/test/stats/regression_metrics.cu | 4 +- cpp/test/stats/silhouette_score.cu | 4 +- cpp/test/stats/trustworthiness.cu | 19 +- cpp/test/{ => util}/cudart_utils.cpp | 2 +- cpp/test/{ => util}/device_atomics.cu | 2 +- cpp/test/{ => util}/integer_utils.cpp | 2 +- cpp/test/{ => util}/pow2_utils.cu | 2 +- docs/source/build.md | 4 +- docs/source/developer_guide.md | 405 +++++++++++++++++- .../pylibraft/pylibraft/test/test_refine.py | 2 +- python/raft-dask/setup.py | 4 +- 61 files changed, 2284 insertions(+), 503 deletions(-) create mode 100644 cpp/include/raft/core/device_resources.hpp create mode 100644 cpp/include/raft/core/resource/comms.hpp create mode 100644 cpp/include/raft/core/resource/cublas_handle.hpp create mode 100644 cpp/include/raft/core/resource/cuda_event.hpp create mode 100644 cpp/include/raft/core/resource/cuda_stream.hpp create mode 100644 cpp/include/raft/core/resource/cuda_stream_pool.hpp create mode 100644 cpp/include/raft/core/resource/cusolver_dn_handle.hpp create mode 100644 cpp/include/raft/core/resource/cusolver_sp_handle.hpp create mode 100644 cpp/include/raft/core/resource/cusparse_handle.hpp create mode 100644 cpp/include/raft/core/resource/detail/stream_sync_event.hpp create mode 100644 cpp/include/raft/core/resource/device_id.hpp create mode 100644 cpp/include/raft/core/resource/device_properties.hpp create mode 100644 cpp/include/raft/core/resource/resource_types.hpp create mode 100644 cpp/include/raft/core/resource/sub_comms.hpp create mode 100644 cpp/include/raft/core/resource/thrust_policy.hpp create mode 100644 cpp/include/raft/core/resources.hpp rename cpp/test/{ => cluster}/cluster_solvers.cu (96%) rename cpp/test/{ => cluster}/cluster_solvers_deprecated.cu (96%) create mode 100644 cpp/test/core/handle.cpp rename cpp/test/{ => core}/interruptible.cu (98%) rename cpp/test/{common => core}/logger.cpp (98%) rename cpp/test/{ => core}/mdarray.cu (99%) rename cpp/test/{ => core}/mdspan_utils.cu (99%) rename cpp/test/{ => core}/memory_type.cpp (96%) rename cpp/test/{ => core}/nvtx.cpp (96%) rename cpp/test/{common => core}/seive.cu (95%) rename cpp/test/{ => core}/span.cpp (99%) rename cpp/test/{ => core}/span.cu (99%) rename cpp/test/{ => core}/test_span.hpp (99%) delete mode 100644 cpp/test/handle.cpp rename cpp/test/{ => linalg}/eigen_solvers.cu (98%) rename cpp/test/{ => sparse}/mst.cu (99%) rename cpp/test/{ => sparse}/spectral_matrix.cu (98%) rename cpp/test/{ => util}/cudart_utils.cpp (98%) rename cpp/test/{ => util}/device_atomics.cu (97%) rename cpp/test/{ => util}/integer_utils.cpp (96%) rename cpp/test/{ => util}/pow2_utils.cu (98%) diff --git a/build.sh b/build.sh index 94bc055adb..b47e1ed862 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # raft build script @@ -153,6 +153,7 @@ function limitTests { # Remove the full LIMIT_TEST_TARGETS argument from list of args so that it passes validArgs function ARGS=${ARGS//--limit-tests=$LIMIT_TEST_TARGETS/} TEST_TARGETS=${LIMIT_TEST_TARGETS} + echo "Limiting tests to $TEST_TARGETS" fi fi } diff --git a/cpp/include/raft/comms/detail/test.hpp b/cpp/include/raft/comms/detail/test.hpp index 6ba4be3886..4f879540b4 100644 --- a/cpp/include/raft/comms/detail/test.hpp +++ b/cpp/include/raft/comms/detail/test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/core/comms.hpp b/cpp/include/raft/core/comms.hpp index 35ab6680de..463c17f2f6 100644 --- a/cpp/include/raft/core/comms.hpp +++ b/cpp/include/raft/core/comms.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include diff --git a/cpp/include/raft/core/device_resources.hpp b/cpp/include/raft/core/device_resources.hpp new file mode 100644 index 0000000000..faca07e8f4 --- /dev/null +++ b/cpp/include/raft/core/device_resources.hpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __RAFT_DEVICE_RESOURCES +#define __RAFT_DEVICE_RESOURCES + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { + +/** + * @brief Main resource container object that stores all necessary resources + * used for calling necessary device functions, cuda kernels and/or libraries + */ +class device_resources : public resources { + public: + // delete copy/move constructors and assignment operators as + // copying and moving underlying resources is unsafe + device_resources(const device_resources&) = delete; + device_resources& operator=(const device_resources&) = delete; + device_resources(device_resources&&) = delete; + device_resources& operator=(device_resources&&) = delete; + + /** + * @brief Construct a resources instance with a stream view and stream pool + * + * @param[in] stream_view the default stream (which has the default per-thread stream if + * unspecified) + * @param[in] stream_pool the stream pool used (which has default of nullptr if unspecified) + */ + device_resources(rmm::cuda_stream_view stream_view = rmm::cuda_stream_per_thread, + std::shared_ptr stream_pool = {nullptr}) + : resources{} + { + resources::add_resource_factory(std::make_shared()); + resources::add_resource_factory( + std::make_shared(stream_view)); + resources::add_resource_factory( + std::make_shared(stream_pool)); + } + + /** Destroys all held-up resources */ + virtual ~device_resources() {} + + int get_device() const { return resource::get_device_id(*this); } + + cublasHandle_t get_cublas_handle() const { return resource::get_cublas_handle(*this); } + + cusolverDnHandle_t get_cusolver_dn_handle() const + { + return resource::get_cusolver_dn_handle(*this); + } + + cusolverSpHandle_t get_cusolver_sp_handle() const + { + return resource::get_cusolver_sp_handle(*this); + } + + cusparseHandle_t get_cusparse_handle() const { return resource::get_cusparse_handle(*this); } + + rmm::exec_policy& get_thrust_policy() const { return resource::get_thrust_policy(*this); } + + /** + * @brief synchronize a stream on the current container + */ + void sync_stream(rmm::cuda_stream_view stream) const { resource::sync_stream(*this, stream); } + + /** + * @brief synchronize main stream on the current container + */ + void sync_stream() const { resource::sync_stream(*this); } + + /** + * @brief returns main stream on the current container + */ + rmm::cuda_stream_view get_stream() const { return resource::get_cuda_stream(*this); } + + /** + * @brief returns whether stream pool was initialized on the current container + */ + + bool is_stream_pool_initialized() const { return resource::is_stream_pool_initialized(*this); } + + /** + * @brief returns stream pool on the current container + */ + const rmm::cuda_stream_pool& get_stream_pool() const + { + return resource::get_cuda_stream_pool(*this); + } + + std::size_t get_stream_pool_size() const { return resource::get_stream_pool_size(*this); } + + /** + * @brief return stream from pool + */ + rmm::cuda_stream_view get_stream_from_stream_pool() const + { + return resource::get_stream_from_stream_pool(*this); + } + + /** + * @brief return stream from pool at index + */ + rmm::cuda_stream_view get_stream_from_stream_pool(std::size_t stream_idx) const + { + return resource::get_stream_from_stream_pool(*this, stream_idx); + } + + /** + * @brief return stream from pool if size > 0, else main stream on current container + */ + rmm::cuda_stream_view get_next_usable_stream() const + { + return resource::get_next_usable_stream(*this); + } + + /** + * @brief return stream from pool at index if size > 0, else main stream on current container + * + * @param[in] stream_idx the required index of the stream in the stream pool if available + */ + rmm::cuda_stream_view get_next_usable_stream(std::size_t stream_idx) const + { + return resource::get_next_usable_stream(*this, stream_idx); + } + + /** + * @brief synchronize the stream pool on the current container + */ + void sync_stream_pool() const { return resource::sync_stream_pool(*this); } + + /** + * @brief synchronize subset of stream pool + * + * @param[in] stream_indices the indices of the streams in the stream pool to synchronize + */ + void sync_stream_pool(const std::vector stream_indices) const + { + return resource::sync_stream_pool(*this, stream_indices); + } + + /** + * @brief ask stream pool to wait on last event in main stream + */ + void wait_stream_pool_on_stream() const { return resource::wait_stream_pool_on_stream(*this); } + + void set_comms(std::shared_ptr communicator) + { + resource::set_comms(*this, communicator); + } + + const comms::comms_t& get_comms() const { return resource::get_comms(*this); } + + void set_subcomm(std::string key, std::shared_ptr subcomm) + { + resource::set_subcomm(*this, key, subcomm); + } + + const comms::comms_t& get_subcomm(std::string key) const + { + return resource::get_subcomm(*this, key); + } + + bool comms_initialized() const { return resource::comms_initialized(*this); } + + const cudaDeviceProp& get_device_properties() const + { + return resource::get_device_properties(*this); + } +}; // class device_resources + +/** + * @brief RAII approach to synchronizing across all streams in the current container + */ +class stream_syncer { + public: + explicit stream_syncer(const device_resources& handle) : handle_(handle) + { + handle_.sync_stream(); + } + ~stream_syncer() + { + handle_.wait_stream_pool_on_stream(); + handle_.sync_stream_pool(); + } + + stream_syncer(const stream_syncer& other) = delete; + stream_syncer& operator=(const stream_syncer& other) = delete; + + private: + const device_resources& handle_; +}; // class stream_syncer + +} // namespace raft + +#endif \ No newline at end of file diff --git a/cpp/include/raft/core/handle.hpp b/cpp/include/raft/core/handle.hpp index 08cb812bb7..48c1718eb0 100644 --- a/cpp/include/raft/core/handle.hpp +++ b/cpp/include/raft/core/handle.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,44 +14,23 @@ * limitations under the License. */ -#ifndef __RAFT_RT_HANDLE -#define __RAFT_RT_HANDLE - #pragma once -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -///@todo: enable once we have migrated cuml-comms layer too -//#include - -#include - -#include -#include -#include -#include -#include -#include -#include +#include namespace raft { /** - * @brief Main handle object that stores all necessary context used for calling - * necessary cuda kernels and/or libraries + * raft::handle_t is being kept around for backwards + * compatibility and will be removed in a future version. + * + * Extending the `raft::device_resources` instead of `using` to + * minimize needed changes downstream + * (e.g. existing forward declarations, etc...) + * + * Use of `raft::resources` or `raft::device_resources` is preferred. */ -class handle_t { +class handle_t : public raft::device_resources { public: // delete copy/move constructors and assignment operators as // copying and moving underlying resources is unsafe @@ -61,7 +40,7 @@ class handle_t { handle_t& operator=(handle_t&&) = delete; /** - * @brief Construct a handle with a stream view and stream pool + * @brief Construct a resources instance with a stream view and stream pool * * @param[in] stream_view the default stream (which has the default per-thread stream if * unspecified) @@ -69,271 +48,12 @@ class handle_t { */ handle_t(rmm::cuda_stream_view stream_view = rmm::cuda_stream_per_thread, std::shared_ptr stream_pool = {nullptr}) - : dev_id_([]() -> int { - int cur_dev = -1; - RAFT_CUDA_TRY(cudaGetDevice(&cur_dev)); - return cur_dev; - }()), - stream_view_{stream_view}, - stream_pool_{stream_pool} + : device_resources{stream_view, stream_pool} { - create_resources(); } /** Destroys all held-up resources */ - virtual ~handle_t() { destroy_resources(); } - - int get_device() const { return dev_id_; } - - cublasHandle_t get_cublas_handle() const - { - std::lock_guard _(mutex_); - if (!cublas_initialized_) { - RAFT_CUBLAS_TRY_NO_THROW(cublasCreate(&cublas_handle_)); - RAFT_CUBLAS_TRY_NO_THROW(cublasSetStream(cublas_handle_, stream_view_)); - cublas_initialized_ = true; - } - return cublas_handle_; - } - - cusolverDnHandle_t get_cusolver_dn_handle() const - { - std::lock_guard _(mutex_); - if (!cusolver_dn_initialized_) { - RAFT_CUSOLVER_TRY_NO_THROW(cusolverDnCreate(&cusolver_dn_handle_)); - RAFT_CUSOLVER_TRY_NO_THROW(cusolverDnSetStream(cusolver_dn_handle_, stream_view_)); - cusolver_dn_initialized_ = true; - } - return cusolver_dn_handle_; - } - - cusolverSpHandle_t get_cusolver_sp_handle() const - { - std::lock_guard _(mutex_); - if (!cusolver_sp_initialized_) { - RAFT_CUSOLVER_TRY_NO_THROW(cusolverSpCreate(&cusolver_sp_handle_)); - RAFT_CUSOLVER_TRY_NO_THROW(cusolverSpSetStream(cusolver_sp_handle_, stream_view_)); - cusolver_sp_initialized_ = true; - } - return cusolver_sp_handle_; - } - - cusparseHandle_t get_cusparse_handle() const - { - std::lock_guard _(mutex_); - if (!cusparse_initialized_) { - RAFT_CUSPARSE_TRY_NO_THROW(cusparseCreate(&cusparse_handle_)); - RAFT_CUSPARSE_TRY_NO_THROW(cusparseSetStream(cusparse_handle_, stream_view_)); - cusparse_initialized_ = true; - } - return cusparse_handle_; - } - - rmm::exec_policy& get_thrust_policy() const { return *thrust_policy_; } - - /** - * @brief synchronize a stream on the handle - */ - void sync_stream(rmm::cuda_stream_view stream) const { interruptible::synchronize(stream); } - - /** - * @brief synchronize main stream on the handle - */ - void sync_stream() const { sync_stream(stream_view_); } - - /** - * @brief returns main stream on the handle - */ - rmm::cuda_stream_view get_stream() const { return stream_view_; } - - /** - * @brief returns whether stream pool was initialized on the handle - */ - - bool is_stream_pool_initialized() const { return stream_pool_.get() != nullptr; } - - /** - * @brief returns stream pool on the handle - */ - const rmm::cuda_stream_pool& get_stream_pool() const - { - RAFT_EXPECTS(stream_pool_, "ERROR: rmm::cuda_stream_pool was not initialized"); - return *stream_pool_; - } - - std::size_t get_stream_pool_size() const - { - return is_stream_pool_initialized() ? stream_pool_->get_pool_size() : 0; - } - - /** - * @brief return stream from pool - */ - rmm::cuda_stream_view get_stream_from_stream_pool() const - { - RAFT_EXPECTS(stream_pool_, "ERROR: rmm::cuda_stream_pool was not initialized"); - return stream_pool_->get_stream(); - } - - /** - * @brief return stream from pool at index - */ - rmm::cuda_stream_view get_stream_from_stream_pool(std::size_t stream_idx) const - { - RAFT_EXPECTS(stream_pool_, "ERROR: rmm::cuda_stream_pool was not initialized"); - return stream_pool_->get_stream(stream_idx); - } - - /** - * @brief return stream from pool if size > 0, else main stream on handle - */ - rmm::cuda_stream_view get_next_usable_stream() const - { - return is_stream_pool_initialized() ? get_stream_from_stream_pool() : stream_view_; - } - - /** - * @brief return stream from pool at index if size > 0, else main stream on handle - * - * @param[in] stream_idx the required index of the stream in the stream pool if available - */ - rmm::cuda_stream_view get_next_usable_stream(std::size_t stream_idx) const - { - return is_stream_pool_initialized() ? get_stream_from_stream_pool(stream_idx) : stream_view_; - } - - /** - * @brief synchronize the stream pool on the handle - */ - void sync_stream_pool() const - { - for (std::size_t i = 0; i < get_stream_pool_size(); i++) { - sync_stream(stream_pool_->get_stream(i)); - } - } - - /** - * @brief synchronize subset of stream pool - * - * @param[in] stream_indices the indices of the streams in the stream pool to synchronize - */ - void sync_stream_pool(const std::vector stream_indices) const - { - RAFT_EXPECTS(stream_pool_, "ERROR: rmm::cuda_stream_pool was not initialized"); - for (const auto& stream_index : stream_indices) { - sync_stream(stream_pool_->get_stream(stream_index)); - } - } - - /** - * @brief ask stream pool to wait on last event in main stream - */ - void wait_stream_pool_on_stream() const - { - RAFT_CUDA_TRY(cudaEventRecord(event_, stream_view_)); - for (std::size_t i = 0; i < get_stream_pool_size(); i++) { - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream_pool_->get_stream(i), event_, 0)); - } - } - - void set_comms(std::shared_ptr communicator) { communicator_ = communicator; } - - const comms::comms_t& get_comms() const - { - RAFT_EXPECTS(this->comms_initialized(), "ERROR: Communicator was not initialized\n"); - return *communicator_; - } - - void set_subcomm(std::string key, std::shared_ptr subcomm) - { - subcomms_[key] = subcomm; - } - - const comms::comms_t& get_subcomm(std::string key) const - { - RAFT_EXPECTS( - subcomms_.find(key) != subcomms_.end(), "%s was not found in subcommunicators.", key.c_str()); - - auto subcomm = subcomms_.at(key); - - RAFT_EXPECTS(nullptr != subcomm.get(), "ERROR: Subcommunicator was not initialized"); - - return *subcomm; - } - - bool comms_initialized() const { return (nullptr != communicator_.get()); } - - const cudaDeviceProp& get_device_properties() const - { - std::lock_guard _(mutex_); - if (!device_prop_initialized_) { - RAFT_CUDA_TRY_NO_THROW(cudaGetDeviceProperties(&prop_, dev_id_)); - device_prop_initialized_ = true; - } - return prop_; - } - - private: - std::shared_ptr communicator_; - std::unordered_map> subcomms_; - - const int dev_id_; - mutable cublasHandle_t cublas_handle_; - mutable bool cublas_initialized_{false}; - mutable cusolverDnHandle_t cusolver_dn_handle_; - mutable bool cusolver_dn_initialized_{false}; - mutable cusolverSpHandle_t cusolver_sp_handle_; - mutable bool cusolver_sp_initialized_{false}; - mutable cusparseHandle_t cusparse_handle_; - mutable bool cusparse_initialized_{false}; - std::unique_ptr thrust_policy_{nullptr}; - rmm::cuda_stream_view stream_view_{rmm::cuda_stream_per_thread}; - std::shared_ptr stream_pool_{nullptr}; - cudaEvent_t event_; - mutable cudaDeviceProp prop_; - mutable bool device_prop_initialized_{false}; - mutable std::mutex mutex_; - - void create_resources() - { - thrust_policy_ = std::make_unique(stream_view_); - - RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); - } - - void destroy_resources() - { - if (cusparse_initialized_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroy(cusparse_handle_)); } - if (cusolver_dn_initialized_) { - RAFT_CUSOLVER_TRY_NO_THROW(cusolverDnDestroy(cusolver_dn_handle_)); - } - if (cusolver_sp_initialized_) { - RAFT_CUSOLVER_TRY_NO_THROW(cusolverSpDestroy(cusolver_sp_handle_)); - } - if (cublas_initialized_) { RAFT_CUBLAS_TRY_NO_THROW(cublasDestroy(cublas_handle_)); } - RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(event_)); - } -}; // class handle_t - -/** - * @brief RAII approach to synchronizing across all streams in the handle - */ -class stream_syncer { - public: - explicit stream_syncer(const handle_t& handle) : handle_(handle) { handle_.sync_stream(); } - ~stream_syncer() - { - handle_.wait_stream_pool_on_stream(); - handle_.sync_stream_pool(); - } - - stream_syncer(const stream_syncer& other) = delete; - stream_syncer& operator=(const stream_syncer& other) = delete; - - private: - const handle_t& handle_; -}; // class stream_syncer - -} // namespace raft + ~handle_t() override {} +}; -#endif \ No newline at end of file +} // end NAMESPACE raft \ No newline at end of file diff --git a/cpp/include/raft/core/resource/comms.hpp b/cpp/include/raft/core/resource/comms.hpp new file mode 100644 index 0000000000..b7a74b7dd5 --- /dev/null +++ b/cpp/include/raft/core/resource/comms.hpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +namespace raft::resource { +class comms_resource : public resource { + public: + comms_resource(std::shared_ptr comnumicator) : communicator_(comnumicator) {} + + void* get_resource() override { return &communicator_; } + + ~comms_resource() override {} + + private: + std::shared_ptr communicator_; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class comms_resource_factory : public resource_factory { + public: + comms_resource_factory(std::shared_ptr communicator) : communicator_(communicator) + { + } + + resource_type get_resource_type() override { return resource_type::COMMUNICATOR; } + + resource* make_resource() override { return new comms_resource(communicator_); } + + private: + std::shared_ptr communicator_; +}; + +inline bool comms_initialized(resources const& res) +{ + return res.has_resource_factory(resource_type::COMMUNICATOR); +} + +inline comms::comms_t const& get_comms(resources const& res) +{ + RAFT_EXPECTS(comms_initialized(res), "ERROR: Communicator was not initialized\n"); + return *(*res.get_resource>(resource_type::COMMUNICATOR)); +} + +inline void set_comms(resources const& res, std::shared_ptr communicator) +{ + res.add_resource_factory(std::make_shared(communicator)); +} +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/cublas_handle.hpp b/cpp/include/raft/core/resource/cublas_handle.hpp new file mode 100644 index 0000000000..cf6f51ee98 --- /dev/null +++ b/cpp/include/raft/core/resource/cublas_handle.hpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::resource { + +class cublas_resource : public resource { + public: + cublas_resource(rmm::cuda_stream_view stream) + { + RAFT_CUBLAS_TRY_NO_THROW(cublasCreate(&cublas_res)); + RAFT_CUBLAS_TRY_NO_THROW(cublasSetStream(cublas_res, stream)); + } + + ~cublas_resource() override { RAFT_CUBLAS_TRY_NO_THROW(cublasDestroy(cublas_res)); } + + void* get_resource() override { return &cublas_res; } + + private: + cublasHandle_t cublas_res; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class cublas_resource_factory : public resource_factory { + public: + cublas_resource_factory(rmm::cuda_stream_view stream) : stream_(stream) {} + resource_type get_resource_type() override { return resource_type::CUBLAS_HANDLE; } + resource* make_resource() override { return new cublas_resource(stream_); } + + private: + rmm::cuda_stream_view stream_; +}; + +/** + * Load a cublasres_t from raft res if it exists, otherwise + * add it and return it. + * @param res + * @return + */ +inline cublasHandle_t get_cublas_handle(resources const& res) +{ + if (!res.has_resource_factory(resource_type::CUBLAS_HANDLE)) { + cudaStream_t stream = get_cuda_stream(res); + res.add_resource_factory(std::make_shared(stream)); + } + return *res.get_resource(resource_type::CUBLAS_HANDLE); +}; +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/cuda_event.hpp b/cpp/include/raft/core/resource/cuda_event.hpp new file mode 100644 index 0000000000..4859d95ee9 --- /dev/null +++ b/cpp/include/raft/core/resource/cuda_event.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +namespace raft::resource { + +class cuda_event_resource : public resource { + public: + cuda_event_resource() + { + RAFT_CUDA_TRY_NO_THROW(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); + } + void* get_resource() override { return &event_; } + + ~cuda_event_resource() override { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(event_)); } + + private: + cudaEvent_t event_; +}; +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/cuda_stream.hpp b/cpp/include/raft/core/resource/cuda_stream.hpp new file mode 100644 index 0000000000..2e01ce0123 --- /dev/null +++ b/cpp/include/raft/core/resource/cuda_stream.hpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace raft::resource { +class cuda_stream_resource : public resource { + public: + cuda_stream_resource(rmm::cuda_stream_view stream_view = rmm::cuda_stream_per_thread) + : stream(stream_view) + { + } + void* get_resource() override { return &stream; } + + ~cuda_stream_resource() override {} + + private: + rmm::cuda_stream_view stream; +}; + +/** + * Factory that knows how to construct a specific raft::resource to populate + * the resources instance. + */ +class cuda_stream_resource_factory : public resource_factory { + public: + cuda_stream_resource_factory(rmm::cuda_stream_view stream_view = rmm::cuda_stream_per_thread) + : stream(stream_view) + { + } + resource_type get_resource_type() override { return resource_type::CUDA_STREAM_VIEW; } + resource* make_resource() override { return new cuda_stream_resource(stream); } + + private: + rmm::cuda_stream_view stream; +}; + +/** + * Load a rmm::cuda_stream_view from a resources instance (and populate it on the res + * if needed). + * @param res raft res object for managing resources + * @return + */ +inline rmm::cuda_stream_view get_cuda_stream(resources const& res) +{ + if (!res.has_resource_factory(resource_type::CUDA_STREAM_VIEW)) { + res.add_resource_factory(std::make_shared()); + } + return *res.get_resource(resource_type::CUDA_STREAM_VIEW); +}; + +/** + * Load a rmm::cuda_stream_view from a resources instance (and populate it on the res + * if needed). + * @param res raft res object for managing resources + * @return + */ +inline void set_cuda_stream(resources const& res, rmm::cuda_stream_view stream_view) +{ + res.add_resource_factory(std::make_shared(stream_view)); +}; + +/** + * @brief synchronize a specific stream + */ +inline void sync_stream(const resources& res, rmm::cuda_stream_view stream) +{ + interruptible::synchronize(stream); +} + +/** + * @brief synchronize main stream on the resources instance + */ +inline void sync_stream(const resources& res) { sync_stream(res, get_cuda_stream(res)); } +} // namespace raft::resource \ No newline at end of file diff --git a/cpp/include/raft/core/resource/cuda_stream_pool.hpp b/cpp/include/raft/core/resource/cuda_stream_pool.hpp new file mode 100644 index 0000000000..452523d3af --- /dev/null +++ b/cpp/include/raft/core/resource/cuda_stream_pool.hpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace raft::resource { + +class cuda_stream_pool_resource : public resource { + public: + cuda_stream_pool_resource(std::shared_ptr stream_pool) + : stream_pool_(stream_pool) + { + } + + ~cuda_stream_pool_resource() override {} + void* get_resource() override { return &stream_pool_; } + + private: + std::shared_ptr stream_pool_{nullptr}; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class cuda_stream_pool_resource_factory : public resource_factory { + public: + cuda_stream_pool_resource_factory(std::shared_ptr stream_pool = {nullptr}) + : stream_pool_(stream_pool) + { + } + + resource_type get_resource_type() override { return resource_type::CUDA_STREAM_POOL; } + resource* make_resource() override { return new cuda_stream_pool_resource(stream_pool_); } + + private: + std::shared_ptr stream_pool_{nullptr}; +}; + +inline bool is_stream_pool_initialized(const resources& res) +{ + return *res.get_resource>( + resource_type::CUDA_STREAM_POOL) != nullptr; +} + +/** + * Load a cuda_stream_pool, and create a new one if it doesn't already exist + * @param res raft res object for managing resources + * @return + */ +inline const rmm::cuda_stream_pool& get_cuda_stream_pool(const resources& res) +{ + if (!res.has_resource_factory(resource_type::CUDA_STREAM_POOL)) { + res.add_resource_factory(std::make_shared()); + } + return *( + *res.get_resource>(resource_type::CUDA_STREAM_POOL)); +}; + +/** + * Explicitly set a stream pool on the current res. Note that this will overwrite + * an existing stream pool on the res. + * @param res + * @param stream_pool + */ +inline void set_cuda_stream_pool(const resources& res, + std::shared_ptr stream_pool) +{ + res.add_resource_factory(std::make_shared(stream_pool)); +}; + +inline std::size_t get_stream_pool_size(const resources& res) +{ + return is_stream_pool_initialized(res) ? get_cuda_stream_pool(res).get_pool_size() : 0; +} + +/** + * @brief return stream from pool + */ +inline rmm::cuda_stream_view get_stream_from_stream_pool(const resources& res) +{ + RAFT_EXPECTS(is_stream_pool_initialized(res), "ERROR: rmm::cuda_stream_pool was not initialized"); + return get_cuda_stream_pool(res).get_stream(); +} + +/** + * @brief return stream from pool at index + */ +inline rmm::cuda_stream_view get_stream_from_stream_pool(const resources& res, + std::size_t stream_idx) +{ + RAFT_EXPECTS(is_stream_pool_initialized(res), "ERROR: rmm::cuda_stream_pool was not initialized"); + return get_cuda_stream_pool(res).get_stream(stream_idx); +} + +/** + * @brief return stream from pool if size > 0, else main stream on res + */ +inline rmm::cuda_stream_view get_next_usable_stream(const resources& res) +{ + return is_stream_pool_initialized(res) ? get_stream_from_stream_pool(res) : get_cuda_stream(res); +} + +/** + * @brief return stream from pool at index if size > 0, else main stream on res + * + * @param[in] stream_idx the required index of the stream in the stream pool if available + */ +inline rmm::cuda_stream_view get_next_usable_stream(const resources& res, std::size_t stream_idx) +{ + return is_stream_pool_initialized(res) ? get_stream_from_stream_pool(res, stream_idx) + : get_cuda_stream(res); +} + +/** + * @brief synchronize the stream pool on the res + */ +inline void sync_stream_pool(const resources& res) +{ + for (std::size_t i = 0; i < get_stream_pool_size(res); i++) { + sync_stream(res, get_cuda_stream_pool(res).get_stream(i)); + } +} + +/** + * @brief synchronize subset of stream pool + * + * @param[in] stream_indices the indices of the streams in the stream pool to synchronize + */ +inline void sync_stream_pool(const resources& res, const std::vector stream_indices) +{ + RAFT_EXPECTS(is_stream_pool_initialized(res), "ERROR: rmm::cuda_stream_pool was not initialized"); + for (const auto& stream_index : stream_indices) { + sync_stream(res, get_cuda_stream_pool(res).get_stream(stream_index)); + } +} + +/** + * @brief ask stream pool to wait on last event in main stream + */ +inline void wait_stream_pool_on_stream(const resources& res) +{ + cudaEvent_t event = detail::get_cuda_stream_sync_event(res); + RAFT_CUDA_TRY(cudaEventRecord(event, get_cuda_stream(res))); + for (std::size_t i = 0; i < get_stream_pool_size(res); i++) { + RAFT_CUDA_TRY(cudaStreamWaitEvent(get_cuda_stream_pool(res).get_stream(i), event, 0)); + } +} +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/cusolver_dn_handle.hpp b/cpp/include/raft/core/resource/cusolver_dn_handle.hpp new file mode 100644 index 0000000000..7ed5634574 --- /dev/null +++ b/cpp/include/raft/core/resource/cusolver_dn_handle.hpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "cuda_stream.hpp" +#include +#include +#include +#include +#include + +namespace raft::resource { + +/** + * + */ +class cusolver_dn_resource : public resource { + public: + cusolver_dn_resource(rmm::cuda_stream_view stream) + { + RAFT_CUSOLVER_TRY_NO_THROW(cusolverDnCreate(&cusolver_res)); + RAFT_CUSOLVER_TRY_NO_THROW(cusolverDnSetStream(cusolver_res, stream)); + } + + void* get_resource() override { return &cusolver_res; } + + ~cusolver_dn_resource() override { RAFT_CUSOLVER_TRY_NO_THROW(cusolverDnDestroy(cusolver_res)); } + + private: + cusolverDnHandle_t cusolver_res; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class cusolver_dn_resource_factory : public resource_factory { + public: + cusolver_dn_resource_factory(rmm::cuda_stream_view stream) : stream_(stream) {} + resource_type get_resource_type() override { return resource_type::CUSOLVER_DN_HANDLE; } + resource* make_resource() override { return new cusolver_dn_resource(stream_); } + + private: + rmm::cuda_stream_view stream_; +}; + +/** + * Load a cusolverSpres_t from raft res if it exists, otherwise + * add it and return it. + * @param res + * @return + */ +inline cusolverDnHandle_t get_cusolver_dn_handle(resources const& res) +{ + if (!res.has_resource_factory(resource_type::CUSOLVER_DN_HANDLE)) { + cudaStream_t stream = get_cuda_stream(res); + res.add_resource_factory(std::make_shared(stream)); + } + return *res.get_resource(resource_type::CUSOLVER_DN_HANDLE); +}; +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/cusolver_sp_handle.hpp b/cpp/include/raft/core/resource/cusolver_sp_handle.hpp new file mode 100644 index 0000000000..1822955301 --- /dev/null +++ b/cpp/include/raft/core/resource/cusolver_sp_handle.hpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::resource { + +/** + * + */ +class cusolver_sp_resource : public resource { + public: + cusolver_sp_resource(rmm::cuda_stream_view stream) + { + RAFT_CUSOLVER_TRY_NO_THROW(cusolverSpCreate(&cusolver_res)); + RAFT_CUSOLVER_TRY_NO_THROW(cusolverSpSetStream(cusolver_res, stream)); + } + + void* get_resource() override { return &cusolver_res; } + + ~cusolver_sp_resource() override { RAFT_CUSOLVER_TRY_NO_THROW(cusolverSpDestroy(cusolver_res)); } + + private: + cusolverSpHandle_t cusolver_res; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class cusolver_sp_resource_factory : public resource_factory { + public: + cusolver_sp_resource_factory(rmm::cuda_stream_view stream) : stream_(stream) {} + resource_type get_resource_type() override { return resource_type::CUSOLVER_SP_HANDLE; } + resource* make_resource() override { return new cusolver_sp_resource(stream_); } + + private: + rmm::cuda_stream_view stream_; +}; + +/** + * Load a cusolverSpres_t from raft res if it exists, otherwise + * add it and return it. + * @param res + * @return + */ +inline cusolverSpHandle_t get_cusolver_sp_handle(resources const& res) +{ + if (!res.has_resource_factory(resource_type::CUSOLVER_SP_HANDLE)) { + cudaStream_t stream = get_cuda_stream(res); + res.add_resource_factory(std::make_shared(stream)); + } + return *res.get_resource(resource_type::CUSOLVER_SP_HANDLE); +}; +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/cusparse_handle.hpp b/cpp/include/raft/core/resource/cusparse_handle.hpp new file mode 100644 index 0000000000..133e01f164 --- /dev/null +++ b/cpp/include/raft/core/resource/cusparse_handle.hpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::resource { +class cusparse_resource : public resource { + public: + cusparse_resource(rmm::cuda_stream_view stream) + { + RAFT_CUSPARSE_TRY_NO_THROW(cusparseCreate(&cusparse_res)); + RAFT_CUSPARSE_TRY_NO_THROW(cusparseSetStream(cusparse_res, stream)); + } + + ~cusparse_resource() { RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroy(cusparse_res)); } + void* get_resource() override { return &cusparse_res; } + + private: + cusparseHandle_t cusparse_res; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class cusparse_resource_factory : public resource_factory { + public: + cusparse_resource_factory(rmm::cuda_stream_view stream) : stream_(stream) {} + resource_type get_resource_type() override { return resource_type::CUSPARSE_HANDLE; } + resource* make_resource() override { return new cusparse_resource(stream_); } + + private: + rmm::cuda_stream_view stream_; +}; + +/** + * Load a cusparseres_t from raft res if it exists, otherwise + * add it and return it. + * @param res + * @return + */ +inline cusparseHandle_t get_cusparse_handle(resources const& res) +{ + if (!res.has_resource_factory(resource_type::CUSPARSE_HANDLE)) { + rmm::cuda_stream_view stream = get_cuda_stream(res); + res.add_resource_factory(std::make_shared(stream)); + } + return *res.get_resource(resource_type::CUSPARSE_HANDLE); +}; +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/detail/stream_sync_event.hpp b/cpp/include/raft/core/resource/detail/stream_sync_event.hpp new file mode 100644 index 0000000000..1d02fef20d --- /dev/null +++ b/cpp/include/raft/core/resource/detail/stream_sync_event.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::resource::detail { + +/** + * Factory that knows how to construct a specific raft::resource to populate + * the res_t. + */ +class cuda_stream_sync_event_resource_factory : public resource_factory { + public: + resource_type get_resource_type() override { return resource_type::CUDA_STREAM_SYNC_EVENT; } + resource* make_resource() override { return new cuda_event_resource(); } +}; + +/** + * Load a cudaEvent from a resources instance (and populate it on the resources instance) + * if needed) for syncing the main cuda stream. + * @param res raft resources instance for managing resources + * @return + */ +inline cudaEvent_t& get_cuda_stream_sync_event(resources const& res) +{ + if (!res.has_resource_factory(resource_type::CUDA_STREAM_SYNC_EVENT)) { + res.add_resource_factory(std::make_shared()); + } + return *res.get_resource(resource_type::CUDA_STREAM_SYNC_EVENT); +}; + +} // namespace raft::resource::detail diff --git a/cpp/include/raft/core/resource/device_id.hpp b/cpp/include/raft/core/resource/device_id.hpp new file mode 100644 index 0000000000..76c57166b3 --- /dev/null +++ b/cpp/include/raft/core/resource/device_id.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +namespace raft::resource { + +class device_id_resource : public resource { + public: + device_id_resource() + : dev_id_([]() -> int { + int cur_dev = -1; + RAFT_CUDA_TRY_NO_THROW(cudaGetDevice(&cur_dev)); + return cur_dev; + }()) + { + } + void* get_resource() override { return &dev_id_; } + + ~device_id_resource() override {} + + private: + int dev_id_; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class device_id_resource_factory : public resource_factory { + public: + resource_type get_resource_type() override { return resource_type::DEVICE_ID; } + resource* make_resource() override { return new device_id_resource(); } +}; + +/** + * Load a device id from a res (and populate it on the res if needed). + * @param res raft res object for managing resources + * @return + */ +inline int get_device_id(resources const& res) +{ + if (!res.has_resource_factory(resource_type::DEVICE_ID)) { + res.add_resource_factory(std::make_shared()); + } + return *res.get_resource(resource_type::DEVICE_ID); +}; +} // namespace raft::resource \ No newline at end of file diff --git a/cpp/include/raft/core/resource/device_properties.hpp b/cpp/include/raft/core/resource/device_properties.hpp new file mode 100644 index 0000000000..d6193e7a95 --- /dev/null +++ b/cpp/include/raft/core/resource/device_properties.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::resource { + +class device_properties_resource : public resource { + public: + device_properties_resource(int dev_id) + { + RAFT_CUDA_TRY_NO_THROW(cudaGetDeviceProperties(&prop_, dev_id)); + } + void* get_resource() override { return &prop_; } + + ~device_properties_resource() override {} + + private: + cudaDeviceProp prop_; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class device_properties_resource_factory : public resource_factory { + public: + device_properties_resource_factory(int dev_id) : dev_id_(dev_id) {} + resource_type get_resource_type() override { return resource_type::DEVICE_PROPERTIES; } + resource* make_resource() override { return new device_properties_resource(dev_id_); } + + private: + int dev_id_; +}; + +/** + * Load a cudaDeviceProp from a res (and populate it on the res if needed). + * @param res raft res object for managing resources + * @return + */ +inline cudaDeviceProp& get_device_properties(resources const& res) +{ + if (!res.has_resource_factory(resource_type::DEVICE_PROPERTIES)) { + int dev_id = get_device_id(res); + res.add_resource_factory(std::make_shared(dev_id)); + } + return *res.get_resource(resource_type::DEVICE_PROPERTIES); +}; +} // namespace raft::resource \ No newline at end of file diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp new file mode 100644 index 0000000000..c763066c79 --- /dev/null +++ b/cpp/include/raft/core/resource/resource_types.hpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace raft::resource { + +/** + * @brief Resource types can apply to any resource and don't have to be host- or device-specific. + */ +enum resource_type { + // device-specific resource types + CUBLAS_HANDLE = 0, // cublas handle + CUSOLVER_DN_HANDLE, // cusolver dn handle + CUSOLVER_SP_HANDLE, // cusolver sp handle + CUSPARSE_HANDLE, // cusparse handle + CUDA_STREAM_VIEW, // view of a cuda stream + CUDA_STREAM_POOL, // cuda stream pool + CUDA_STREAM_SYNC_EVENT, // cuda event for syncing streams + COMMUNICATOR, // raft communicator + SUB_COMMUNICATOR, // raft sub communicator + DEVICE_PROPERTIES, // cuda device properties + DEVICE_ID, // cuda device id + THRUST_POLICY, // thrust execution policy + + LAST_KEY // reserved for the last key +}; + +/** + * @brief A resource constructs and contains an instance of + * some pre-determined object type and facades that object + * behind a common API. + */ +class resource { + public: + virtual void* get_resource() = 0; + + virtual ~resource() {} +}; + +class empty_resource : public resource { + public: + empty_resource() : resource() {} + + void* get_resource() override { return nullptr; } + + ~empty_resource() override {} +}; + +/** + * @brief A resource factory knows how to construct an instance of + * a specific raft::resource::resource. + */ +class resource_factory { + public: + /** + * @brief Return the resource_type associated with the current factory + * @return resource_type corresponding to the current factory + */ + virtual resource_type get_resource_type() = 0; + + /** + * @brief Construct an instance of the factory's underlying resource. + * @return resource instance + */ + virtual resource* make_resource() = 0; +}; + +/** + * @brief A resource factory knows how to construct an instance of + * a specific raft::resource::resource. + */ +class empty_resource_factory : public resource_factory { + public: + empty_resource_factory() : resource_factory() {} + /** + * @brief Return the resource_type associated with the current factory + * @return resource_type corresponding to the current factory + */ + resource_type get_resource_type() override { return resource_type::LAST_KEY; } + + /** + * @brief Construct an instance of the factory's underlying resource. + * @return resource instance + */ + resource* make_resource() override { return &res; } + + private: + empty_resource res; +}; + +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/sub_comms.hpp b/cpp/include/raft/core/resource/sub_comms.hpp new file mode 100644 index 0000000000..9c2c67deed --- /dev/null +++ b/cpp/include/raft/core/resource/sub_comms.hpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +namespace raft::resource { +class sub_comms_resource : public resource { + public: + sub_comms_resource() : communicators_() {} + void* get_resource() override { return &communicators_; } + + ~sub_comms_resource() override {} + + private: + std::unordered_map> communicators_; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class sub_comms_resource_factory : public resource_factory { + public: + resource_type get_resource_type() override { return resource_type::SUB_COMMUNICATOR; } + resource* make_resource() override { return new sub_comms_resource(); } +}; + +inline const comms::comms_t& get_subcomm(const resources& res, std::string key) +{ + if (!res.has_resource_factory(resource_type::SUB_COMMUNICATOR)) { + res.add_resource_factory(std::make_shared()); + } + + auto sub_comms = + res.get_resource>>( + resource_type::SUB_COMMUNICATOR); + auto sub_comm = sub_comms->at(key); + RAFT_EXPECTS(nullptr != sub_comm.get(), "ERROR: Subcommunicator was not initialized"); + + return *sub_comm; +} + +inline void set_subcomm(resources const& res, + std::string key, + std::shared_ptr subcomm) +{ + if (!res.has_resource_factory(resource_type::SUB_COMMUNICATOR)) { + res.add_resource_factory(std::make_shared()); + } + auto sub_comms = + res.get_resource>>( + resource_type::SUB_COMMUNICATOR); + sub_comms->insert(std::make_pair(key, subcomm)); +} +} // namespace raft::resource \ No newline at end of file diff --git a/cpp/include/raft/core/resource/thrust_policy.hpp b/cpp/include/raft/core/resource/thrust_policy.hpp new file mode 100644 index 0000000000..e3e3cf6aef --- /dev/null +++ b/cpp/include/raft/core/resource/thrust_policy.hpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +namespace raft::resource { +class thrust_policy_resource : public resource { + public: + thrust_policy_resource(rmm::cuda_stream_view stream_view) + : thrust_policy_(std::make_unique(stream_view)) + { + } + void* get_resource() override { return thrust_policy_.get(); } + + ~thrust_policy_resource() override {} + + private: + std::unique_ptr thrust_policy_; +}; + +/** + * Factory that knows how to construct a + * specific raft::resource to populate + * the res_t. + */ +class thrust_policy_resource_factory : public resource_factory { + public: + thrust_policy_resource_factory(rmm::cuda_stream_view stream_view) : stream_view_(stream_view) {} + resource_type get_resource_type() override { return resource_type::THRUST_POLICY; } + resource* make_resource() override { return new thrust_policy_resource(stream_view_); } + + private: + rmm::cuda_stream_view stream_view_; +}; + +/** + * Load a thrust policy from a res (and populate it on the res if needed). + * @param res raft res object for managing resources + * @return + */ +inline rmm::exec_policy& get_thrust_policy(resources const& res) +{ + if (!res.has_resource_factory(resource_type::THRUST_POLICY)) { + rmm::cuda_stream_view stream = get_cuda_stream(res); + res.add_resource_factory(std::make_shared(stream)); + } + return *res.get_resource(resource_type::THRUST_POLICY); +}; +} // namespace raft::resource \ No newline at end of file diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp new file mode 100644 index 0000000000..797fd5968d --- /dev/null +++ b/cpp/include/raft/core/resources.hpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "resource/resource_types.hpp" +#include +#include +#include +#include +#include + +namespace raft { + +/** + * @brief Resource container which allows lazy-loading and registration + * of resource_factory implementations, which in turn generate resource instances. + * + * This class is intended to be agnostic of the resources it contains and + * does not, itself, differentiate between host and device resources. Downstream + * accessor functions can then register and load resources as needed in order + * to keep its usage somewhat opaque to end-users. + * + * @code{.cpp} + * #include + * #include + * #include + * + * raft::resources res; + * auto stream = raft::resource::get_cuda_stream(res); + * auto cublas_handle = raft::resource::get_cublas_handle(res); + * @endcode + */ +class resources { + public: + template + using pair_res = std::pair>; + + using pair_res_factory = pair_res; + using pair_resource = pair_res; + + resources() + : factories_(resource::resource_type::LAST_KEY), resources_(resource::resource_type::LAST_KEY) + { + for (int i = 0; i < resource::resource_type::LAST_KEY; ++i) { + factories_.at(i) = std::make_pair(resource::resource_type::LAST_KEY, + std::make_shared()); + resources_.at(i) = std::make_pair(resource::resource_type::LAST_KEY, + std::make_shared()); + } + } + + resources(const resources&) = delete; + resources& operator=(const resources&) = delete; + resources(resources&&) = delete; + resources& operator=(resources&&) = delete; + + /** + * @brief Returns true if a resource_factory has been registered for the + * given resource_type, false otherwise. + * @param resource_type resource type to check + * @return true if resource_factory is registered for the given resource_type + */ + bool has_resource_factory(resource::resource_type resource_type) const + { + std::lock_guard _(mutex_); + return factories_.at(resource_type).first != resource::resource_type::LAST_KEY; + } + + /** + * @brief Register a resource_factory with the current instance. + * This will overwrite any existing resource factories. + * @param factory resource factory to register on the current instance + */ + void add_resource_factory(std::shared_ptr factory) const + { + std::lock_guard _(mutex_); + resource::resource_type rtype = factory.get()->get_resource_type(); + RAFT_EXPECTS(rtype != resource::resource_type::LAST_KEY, + "LAST_KEY is a placeholder and not a valid resource factory type."); + factories_.at(rtype) = std::make_pair(rtype, factory); + } + + /** + * @brief Retrieve a resource for the given resource_type and cast to given pointer type. + * Note that the resources are loaded lazily on-demand and resources which don't yet + * exist on the current instance will be created using the corresponding factory, if + * it exists. + * @tparam res_t pointer type for which retrieved resource will be casted + * @param resource_type resource type to retrieve + * @return the given resource, if it exists. + */ + template + res_t* get_resource(resource::resource_type resource_type) const + { + std::lock_guard _(mutex_); + + if (resources_.at(resource_type).first == resource::resource_type::LAST_KEY) { + RAFT_EXPECTS(factories_.at(resource_type).first != resource::resource_type::LAST_KEY, + "No resource factory has been registered for the given resource %d.", + resource_type); + resource::resource_factory* factory = factories_.at(resource_type).second.get(); + resources_.at(resource_type) = std::make_pair( + resource_type, std::shared_ptr(factory->make_resource())); + } + + resource::resource* res = resources_.at(resource_type).second.get(); + return reinterpret_cast(res->get_resource()); + } + + private: + mutable std::mutex mutex_; + mutable std::vector factories_; + mutable std::vector resources_; +}; +} // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh index 628b83a23c..8ed71864fd 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 5be8401a6f..8ca30a5c82 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -77,25 +77,25 @@ endfunction() if(BUILD_TESTS) ConfigureTest( - NAME CLUSTER_TEST PATH test/cluster/kmeans.cu test/cluster_solvers.cu test/cluster/linkage.cu - OPTIONAL DIST NN + NAME CLUSTER_TEST PATH test/cluster/kmeans.cu test/cluster/cluster_solvers.cu + test/cluster/linkage.cu OPTIONAL DIST NN ) ConfigureTest( NAME CORE_TEST PATH - test/common/logger.cpp + test/core/logger.cpp test/core/operators_device.cu test/core/operators_host.cpp - test/handle.cpp - test/interruptible.cu - test/nvtx.cpp - test/mdarray.cu - test/mdspan_utils.cu - test/memory_type.cpp - test/span.cpp - test/span.cu + test/core/handle.cpp + test/core/interruptible.cu + test/core/nvtx.cpp + test/core/mdarray.cu + test/core/mdspan_utils.cu + test/core/memory_type.cpp + test/core/span.cpp + test/core/span.cu test/test.cpp ) @@ -179,7 +179,7 @@ if(BUILD_TESTS) test/matrix/reverse.cu test/matrix/slice.cu test/matrix/triangular.cu - test/spectral_matrix.cu + test/sparse/spectral_matrix.cu ) ConfigureTest( @@ -198,8 +198,8 @@ if(BUILD_TESTS) ) ConfigureTest( - NAME SOLVERS_TEST PATH test/cluster_solvers_deprecated.cu test/eigen_solvers.cu test/lap/lap.cu - test/mst.cu OPTIONAL DIST + NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu + test/lap/lap.cu test/sparse/mst.cu OPTIONAL DIST ) ConfigureTest( @@ -290,7 +290,7 @@ if(BUILD_TESTS) ) ConfigureTest( - NAME UTILS_TEST PATH test/common/seive.cu test/cudart_utils.cpp test/device_atomics.cu - test/integer_utils.cpp test/pow2_utils.cu + NAME UTILS_TEST PATH test/core/seive.cu test/util/cudart_utils.cpp test/util/device_atomics.cu + test/util/integer_utils.cpp test/util/pow2_utils.cu ) endif() diff --git a/cpp/test/cluster_solvers.cu b/cpp/test/cluster/cluster_solvers.cu similarity index 96% rename from cpp/test/cluster_solvers.cu rename to cpp/test/cluster/cluster_solvers.cu index 26fbfec011..9293c78294 100644 --- a/cpp/test/cluster_solvers.cu +++ b/cpp/test/cluster/cluster_solvers.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,12 +66,7 @@ TEST(Raft, ModularitySolvers) using value_type = double; handle_t h; - ASSERT_EQ(0, - h. - - get_device() - - ); + ASSERT_EQ(0, h.get_device()); index_type neigvs{10}; index_type maxiter{100}; diff --git a/cpp/test/cluster_solvers_deprecated.cu b/cpp/test/cluster/cluster_solvers_deprecated.cu similarity index 96% rename from cpp/test/cluster_solvers_deprecated.cu rename to cpp/test/cluster/cluster_solvers_deprecated.cu index 167a710b34..dbc7722485 100644 --- a/cpp/test/cluster_solvers_deprecated.cu +++ b/cpp/test/cluster/cluster_solvers_deprecated.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/cluster/kmeans.cu b/cpp/test/cluster/kmeans.cu index 9644541a0c..abc4cd6e13 100644 --- a/cpp/test/cluster/kmeans.cu +++ b/cpp/test/cluster/kmeans.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,11 +58,10 @@ template class KmeansTest : public ::testing::TestWithParam> { protected: KmeansTest() - : stream(handle.get_stream()), - d_labels(0, stream), - d_labels_ref(0, stream), - d_centroids(0, stream), - d_sample_weight(0, stream) + : d_labels(0, handle.get_stream()), + d_labels_ref(0, handle.get_stream()), + d_centroids(0, handle.get_stream()), + d_sample_weight(0, handle.get_stream()) { } @@ -70,6 +69,7 @@ class KmeansTest : public ::testing::TestWithParam> { { testparams = ::testing::TestWithParam>::GetParam(); + auto stream = handle.get_stream(); int n_samples = testparams.n_row; int n_features = testparams.n_col; params.n_clusters = testparams.n_clusters; @@ -249,6 +249,7 @@ class KmeansTest : public ::testing::TestWithParam> { auto X = raft::make_device_matrix(handle, n_samples, n_features); auto labels = raft::make_device_vector(handle, n_samples); + auto stream = handle.get_stream(); raft::random::make_blobs(X.data_handle(), labels.data_handle(), @@ -323,7 +324,6 @@ class KmeansTest : public ::testing::TestWithParam> { protected: raft::handle_t handle; - cudaStream_t stream; KmeansInputs testparams; rmm::device_uvector d_labels; rmm::device_uvector d_labels_ref; diff --git a/cpp/test/cluster/linkage.cu b/cpp/test/cluster/linkage.cu index 53aa5c55e3..a36ad4abea 100644 --- a/cpp/test/cluster/linkage.cu +++ b/cpp/test/cluster/linkage.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -162,15 +162,18 @@ class LinkageTest : public ::testing::TestWithParam> { public: LinkageTest() : params(::testing::TestWithParam>::GetParam()), - stream(handle.get_stream()), - labels(params.n_row, stream), - labels_ref(params.n_row, stream) + labels(0, handle.get_stream()), + labels_ref(0, handle.get_stream()) { } protected: void basicTest() { + auto stream = handle.get_stream(); + + labels.resize(params.n_row, stream); + labels_ref.resize(params.n_row, stream); rmm::device_uvector data(params.n_row * params.n_col, stream); raft::copy(data.data(), params.data.data(), data.size(), stream); @@ -178,8 +181,6 @@ class LinkageTest : public ::testing::TestWithParam> { rmm::device_uvector out_children(params.n_row * 2, stream); - raft::handle_t handle; - auto data_view = raft::make_device_matrix_view( data.data(), params.n_row, params.n_col); auto dendrogram_view = @@ -205,7 +206,6 @@ class LinkageTest : public ::testing::TestWithParam> { protected: raft::handle_t handle; - cudaStream_t stream; LinkageInputs params; rmm::device_uvector labels, labels_ref; diff --git a/cpp/test/core/handle.cpp b/cpp/test/core/handle.cpp new file mode 100644 index 0000000000..2148742e83 --- /dev/null +++ b/cpp/test/core/handle.cpp @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { + +using namespace comms; +class mock_comms : public comms_iface { + public: + mock_comms(int n) : n_ranks(n) {} + ~mock_comms() {} + + int get_size() const override { return n_ranks; } + + int get_rank() const override { return 0; } + + std::unique_ptr comm_split(int color, int key) const + { + return std::unique_ptr(new mock_comms(0)); + } + + void barrier() const {} + + void get_request_id(request_t* req) const {} + + void isend(const void* buf, size_t size, int dest, int tag, request_t* request) const {} + + void irecv(void* buf, size_t size, int source, int tag, request_t* request) const {} + + void waitall(int count, request_t array_of_requests[]) const {} + + void allreduce(const void* sendbuff, + void* recvbuff, + size_t count, + datatype_t datatype, + op_t op, + cudaStream_t stream) const + { + } + + void bcast(void* buff, size_t count, datatype_t datatype, int root, cudaStream_t stream) const {} + + void bcast(const void* sendbuff, + void* recvbuff, + size_t count, + datatype_t datatype, + int root, + cudaStream_t stream) const + { + } + + void reduce(const void* sendbuff, + void* recvbuff, + size_t count, + datatype_t datatype, + op_t op, + int root, + cudaStream_t stream) const + { + } + + void allgather(const void* sendbuff, + void* recvbuff, + size_t sendcount, + datatype_t datatype, + cudaStream_t stream) const + { + } + + void allgatherv(const void* sendbuf, + void* recvbuf, + const size_t* recvcounts, + const size_t* displs, + datatype_t datatype, + cudaStream_t stream) const + { + } + + void gather(const void* sendbuff, + void* recvbuff, + size_t sendcount, + datatype_t datatype, + int root, + cudaStream_t stream) const + { + } + + void gatherv(const void* sendbuff, + void* recvbuff, + size_t sendcount, + const size_t* recvcounts, + const size_t* displs, + datatype_t datatype, + int root, + cudaStream_t stream) const + { + } + + void reducescatter(const void* sendbuff, + void* recvbuff, + size_t recvcount, + datatype_t datatype, + op_t op, + cudaStream_t stream) const + { + } + + status_t sync_stream(cudaStream_t stream) const { return status_t::SUCCESS; } + + // if a thread is sending & receiving at the same time, use device_sendrecv to avoid deadlock + void device_send(const void* buf, size_t size, int dest, cudaStream_t stream) const {} + + // if a thread is sending & receiving at the same time, use device_sendrecv to avoid deadlock + void device_recv(void* buf, size_t size, int source, cudaStream_t stream) const {} + + void device_sendrecv(const void* sendbuf, + size_t sendsize, + int dest, + void* recvbuf, + size_t recvsize, + int source, + cudaStream_t stream) const + { + } + + void device_multicast_sendrecv(const void* sendbuf, + std::vector const& sendsizes, + std::vector const& sendoffsets, + std::vector const& dests, + void* recvbuf, + std::vector const& recvsizes, + std::vector const& recvoffsets, + std::vector const& sources, + cudaStream_t stream) const + { + } + + void group_start() const {} + + void group_end() const {} + + private: + int n_ranks; +}; + +TEST(Raft, HandleDefault) +{ + handle_t h; + ASSERT_EQ(0, h.get_device()); + ASSERT_EQ(rmm::cuda_stream_per_thread, h.get_stream()); + ASSERT_NE(nullptr, h.get_cublas_handle()); + ASSERT_NE(nullptr, h.get_cusolver_dn_handle()); + ASSERT_NE(nullptr, h.get_cusolver_sp_handle()); + ASSERT_NE(nullptr, h.get_cusparse_handle()); +} + +TEST(Raft, Handle) +{ + // test stream pool creation + constexpr std::size_t n_streams = 4; + auto stream_pool = std::make_shared(n_streams); + handle_t h(rmm::cuda_stream_default, stream_pool); + ASSERT_EQ(n_streams, h.get_stream_pool_size()); + + // test non default stream handle + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + rmm::cuda_stream_view stream_view(stream); + handle_t handle(stream_view); + ASSERT_EQ(stream_view, handle.get_stream()); + handle.sync_stream(stream); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +TEST(Raft, DefaultConstructor) +{ + handle_t handle; + + // Make sure waiting on the default stream pool + // does not fail. + handle.wait_stream_pool_on_stream(); + handle.sync_stream_pool(); + + auto s1 = handle.get_next_usable_stream(); + auto s2 = handle.get_stream(); + auto s3 = handle.get_next_usable_stream(5); + + ASSERT_EQ(s1, s2); + ASSERT_EQ(s2, s3); + ASSERT_EQ(0, handle.get_stream_pool_size()); +} + +TEST(Raft, GetHandleFromPool) +{ + constexpr std::size_t n_streams = 4; + auto stream_pool = std::make_shared(n_streams); + handle_t parent(rmm::cuda_stream_default, stream_pool); + + for (std::size_t i = 0; i < n_streams; i++) { + auto worker_stream = parent.get_stream_from_stream_pool(i); + handle_t child(worker_stream); + ASSERT_EQ(parent.get_stream_from_stream_pool(i), child.get_stream()); + } + + parent.wait_stream_pool_on_stream(); +} + +TEST(Raft, Comms) +{ + handle_t handle; + auto comm1 = std::make_shared(std::unique_ptr(new mock_comms(2))); + handle.set_comms(comm1); + + ASSERT_EQ(handle.get_comms().get_size(), 2); +} + +TEST(Raft, SubComms) +{ + handle_t handle; + auto comm1 = std::make_shared(std::unique_ptr(new mock_comms(1))); + handle.set_subcomm("key1", comm1); + + auto comm2 = std::make_shared(std::unique_ptr(new mock_comms(2))); + handle.set_subcomm("key2", comm2); + + ASSERT_EQ(handle.get_subcomm("key1").get_size(), 1); + ASSERT_EQ(handle.get_subcomm("key2").get_size(), 2); +} + +} // namespace raft diff --git a/cpp/test/interruptible.cu b/cpp/test/core/interruptible.cu similarity index 98% rename from cpp/test/interruptible.cu rename to cpp/test/core/interruptible.cu index 92adfabd55..f54bb6f859 100644 --- a/cpp/test/interruptible.cu +++ b/cpp/test/core/interruptible.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/common/logger.cpp b/cpp/test/core/logger.cpp similarity index 98% rename from cpp/test/common/logger.cpp rename to cpp/test/core/logger.cpp index a8460e45ca..3f29c9f12c 100644 --- a/cpp/test/common/logger.cpp +++ b/cpp/test/core/logger.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/mdarray.cu b/cpp/test/core/mdarray.cu similarity index 99% rename from cpp/test/mdarray.cu rename to cpp/test/core/mdarray.cu index c292feb894..8e455bebfe 100644 --- a/cpp/test/mdarray.cu +++ b/cpp/test/core/mdarray.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/mdspan_utils.cu b/cpp/test/core/mdspan_utils.cu similarity index 99% rename from cpp/test/mdspan_utils.cu rename to cpp/test/core/mdspan_utils.cu index 7f1efb78bb..6eaecf78b4 100644 --- a/cpp/test/mdspan_utils.cu +++ b/cpp/test/core/mdspan_utils.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/memory_type.cpp b/cpp/test/core/memory_type.cpp similarity index 96% rename from cpp/test/memory_type.cpp rename to cpp/test/core/memory_type.cpp index 57d44ceefe..02aa8caa6c 100644 --- a/cpp/test/memory_type.cpp +++ b/cpp/test/core/memory_type.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/nvtx.cpp b/cpp/test/core/nvtx.cpp similarity index 96% rename from cpp/test/nvtx.cpp rename to cpp/test/core/nvtx.cpp index 635fe55012..e6c29fa3d8 100644 --- a/cpp/test/nvtx.cpp +++ b/cpp/test/core/nvtx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/common/seive.cu b/cpp/test/core/seive.cu similarity index 95% rename from cpp/test/common/seive.cu rename to cpp/test/core/seive.cu index 54a59d6251..8634abf3be 100644 --- a/cpp/test/common/seive.cu +++ b/cpp/test/core/seive.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/span.cpp b/cpp/test/core/span.cpp similarity index 99% rename from cpp/test/span.cpp rename to cpp/test/core/span.cpp index f8d9345a12..1a21b5ff47 100644 --- a/cpp/test/span.cpp +++ b/cpp/test/core/span.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/span.cu b/cpp/test/core/span.cu similarity index 99% rename from cpp/test/span.cu rename to cpp/test/core/span.cu index e9af9b857f..f16a18332b 100644 --- a/cpp/test/span.cu +++ b/cpp/test/core/span.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/test_span.hpp b/cpp/test/core/test_span.hpp similarity index 99% rename from cpp/test/test_span.hpp rename to cpp/test/core/test_span.hpp index 254c89f91c..27c50e9695 100644 --- a/cpp/test/test_span.hpp +++ b/cpp/test/core/test_span.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index 067b1b2c0e..cbfd97ebc6 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -519,10 +519,10 @@ class BigMatrixDistanceTest : public ::testing::Test { } protected: + raft::handle_t handle; int m = 48000; int n = 48000; int k = 1; - raft::handle_t handle; rmm::device_uvector x, dist; }; } // end namespace distance diff --git a/cpp/test/distance/fused_l2_nn.cu b/cpp/test/distance/fused_l2_nn.cu index 252f56607f..e746a2382d 100644 --- a/cpp/test/distance/fused_l2_nn.cu +++ b/cpp/test/distance/fused_l2_nn.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,6 +158,8 @@ class FusedL2NNTest : public ::testing::TestWithParam> { } protected: + raft::handle_t handle; + cudaStream_t stream; Inputs params; rmm::device_uvector x; rmm::device_uvector y; @@ -166,8 +168,6 @@ class FusedL2NNTest : public ::testing::TestWithParam> { rmm::device_uvector> min; rmm::device_uvector> min_ref; rmm::device_uvector workspace; - raft::handle_t handle; - cudaStream_t stream; virtual void generateGoldenResult() { diff --git a/cpp/test/handle.cpp b/cpp/test/handle.cpp deleted file mode 100644 index 2ebc38d03a..0000000000 --- a/cpp/test/handle.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace raft { - -TEST(Raft, HandleDefault) -{ - handle_t h; - ASSERT_EQ(0, h.get_device()); - ASSERT_EQ(rmm::cuda_stream_per_thread, h.get_stream()); - ASSERT_NE(nullptr, h.get_cublas_handle()); - ASSERT_NE(nullptr, h.get_cusolver_dn_handle()); - ASSERT_NE(nullptr, h.get_cusolver_sp_handle()); - ASSERT_NE(nullptr, h.get_cusparse_handle()); -} - -TEST(Raft, Handle) -{ - // test stream pool creation - constexpr std::size_t n_streams = 4; - auto stream_pool = std::make_shared(n_streams); - handle_t h(rmm::cuda_stream_default, stream_pool); - ASSERT_EQ(n_streams, h.get_stream_pool_size()); - - // test non default stream handle - cudaStream_t stream; - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - rmm::cuda_stream_view stream_view(stream); - handle_t handle(stream_view); - ASSERT_EQ(stream_view, handle.get_stream()); - handle.sync_stream(stream); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); -} - -TEST(Raft, GetHandleFromPool) -{ - constexpr std::size_t n_streams = 4; - auto stream_pool = std::make_shared(n_streams); - handle_t parent(rmm::cuda_stream_default, stream_pool); - - for (std::size_t i = 0; i < n_streams; i++) { - auto worker_stream = parent.get_stream_from_stream_pool(i); - handle_t child(worker_stream); - ASSERT_EQ(parent.get_stream_from_stream_pool(i), child.get_stream()); - } -} - -} // namespace raft diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/linalg/eigen_solvers.cu similarity index 98% rename from cpp/test/eigen_solvers.cu rename to cpp/test/linalg/eigen_solvers.cu index 68b431b894..3e7d923e2d 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/linalg/eigen_solvers.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/matrix/columnSort.cu b/cpp/test/matrix/columnSort.cu index 000a911efd..00205830c4 100644 --- a/cpp/test/matrix/columnSort.cu +++ b/cpp/test/matrix/columnSort.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -116,10 +116,10 @@ class ColumnSort : public ::testing::TestWithParam> { } protected: + raft::handle_t handle; columnSort params; rmm::device_uvector keyIn, keySorted, keySortGolden; rmm::device_uvector valueOut, goldenValOut; // valueOut are indexes - raft::handle_t handle; }; const std::vector> inputsf1 = {{0.000001f, 503, 2000, false}, diff --git a/cpp/test/matrix/linewise_op.cu b/cpp/test/matrix/linewise_op.cu index 9ce1371944..a791cbc0f0 100644 --- a/cpp/test/matrix/linewise_op.cu +++ b/cpp/test/matrix/linewise_op.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,8 +43,8 @@ struct LinewiseTestParams { template struct LinewiseTest : public ::testing::TestWithParam { - const LinewiseTestParams params; const raft::handle_t handle; + const LinewiseTestParams params; rmm::cuda_stream_view stream; LinewiseTest() diff --git a/cpp/test/neighbors/epsilon_neighborhood.cu b/cpp/test/neighbors/epsilon_neighborhood.cu index 4f33db489e..36d7cb25ff 100644 --- a/cpp/test/neighbors/epsilon_neighborhood.cu +++ b/cpp/test/neighbors/epsilon_neighborhood.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -72,13 +72,13 @@ class EpsNeighTest : public ::testing::TestWithParam> { false); } + const raft::handle_t handle; EpsInputs param; cudaStream_t stream = 0; rmm::device_uvector data; rmm::device_uvector adj; rmm::device_uvector labels, vd; IdxT batchSize; - const raft::handle_t handle; }; // class EpsNeighTest const std::vector> inputsfi = { diff --git a/cpp/test/neighbors/selection.cu b/cpp/test/neighbors/selection.cu index d793ea46ee..2f95ed1b3a 100644 --- a/cpp/test/neighbors/selection.cu +++ b/cpp/test/neighbors/selection.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,10 +49,10 @@ std::ostream& operator<<(std::ostream& os, const SelectTestSpec& ss) } template -auto gen_simple_ids(int n_inputs, int input_len) -> std::vector +auto gen_simple_ids(int n_inputs, int input_len, const raft::handle_t& handle) -> std::vector { std::vector out(n_inputs * input_len); - auto s = rmm::cuda_stream_default; + auto s = handle.get_stream(); rmm::device_uvector out_d(out.size(), s); iota_fill(out_d.data(), IdxT(n_inputs), IdxT(input_len), s); update_host(out.data(), out_d.data(), out.size(), s); @@ -65,14 +65,16 @@ struct SelectInOutSimple { public: bool not_supported = false; - SelectInOutSimple(const SelectTestSpec& spec, + SelectInOutSimple(std::shared_ptr handle, + const SelectTestSpec& spec, const std::vector& in_dists, const std::vector& out_dists, const std::vector& out_ids) : in_dists_(in_dists), - in_ids_(gen_simple_ids(spec.n_inputs, spec.input_len)), + in_ids_(gen_simple_ids(spec.n_inputs, spec.input_len, *handle.get())), out_dists_(out_dists), - out_ids_(out_ids) + out_ids_(out_ids), + handle_(handle) { } @@ -82,6 +84,7 @@ struct SelectInOutSimple { auto get_out_ids() -> std::vector& { return out_ids_; } private: + std::shared_ptr handle_; std::vector in_dists_; std::vector in_ids_; std::vector out_dists_; @@ -93,14 +96,17 @@ struct SelectInOutComputed { public: bool not_supported = false; - SelectInOutComputed(const SelectTestSpec& spec, + SelectInOutComputed(std::shared_ptr handle, + const SelectTestSpec& spec, knn::SelectKAlgo algo, const std::vector& in_dists, const std::optional>& in_ids = std::nullopt) - : in_dists_(in_dists), - in_ids_(in_ids.value_or(gen_simple_ids(spec.n_inputs, spec.input_len))), + : handle_(handle), + in_dists_(in_dists), + in_ids_(in_ids.value_or(gen_simple_ids(spec.n_inputs, spec.input_len, *handle.get()))), out_dists_(spec.n_inputs * spec.k), out_ids_(spec.n_inputs * spec.k) + { // check if the size is supported by the algorithm switch (algo) { @@ -119,7 +125,7 @@ struct SelectInOutComputed { default: break; } - auto stream = rmm::cuda_stream_default; + auto stream = handle_.get()->get_stream(); rmm::device_uvector in_dists_d(in_dists_.size(), stream); rmm::device_uvector in_ids_d(in_ids_.size(), stream); @@ -156,6 +162,7 @@ struct SelectInOutComputed { auto get_out_ids() -> std::vector& { return out_ids_; } private: + std::shared_ptr handle_; std::vector in_dists_; std::vector in_ids_; std::vector out_dists_; @@ -205,11 +212,12 @@ struct SelectInOutComputed { }; template -using Params = std::tuple; +using Params = std::tuple>; template typename ParamsReader> class SelectionTest : public testing::TestWithParam::ParamsIn> { protected: + std::shared_ptr handle_; const SelectTestSpec spec; const knn::SelectKAlgo algo; @@ -218,10 +226,11 @@ class SelectionTest : public testing::TestWithParam::InOut> ps) - : spec(std::get<0>(ps)), + : handle_(std::get<3>(ps)), + spec(std::get<0>(ps)), algo(std::get<1>(ps)), ref(std::get<2>(ps)), - res(spec, algo, ref.get_in_dists(), ref.get_in_ids()) + res(handle_, spec, algo, ref.get_in_dists(), ref.get_in_ids()) { } @@ -238,12 +247,13 @@ class SelectionTest : public testing::TestWithParam())); + ASSERT_TRUE(hostVecMatch(ref.get_out_dists(), res.get_out_dists(), Compare())); // If the dists (keys) are the same, different corresponding ids may end up in the selection due // to non-deterministic nature of some implementations. - auto& in_ids = ref.get_in_ids(); - auto& in_dists = ref.get_in_dists(); + auto& in_ids = ref.get_in_ids(); + auto& in_dists = ref.get_in_dists(); + auto compare_ids = [&in_ids, &in_dists](const IdxT& i, const IdxT& j) { if (i == j) return true; auto ix_i = size_t(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin()); @@ -265,17 +275,20 @@ struct params_simple { using InOut = SelectInOutSimple; using Inputs = std::tuple, std::vector, std::vector>; - using ParamsIn = std::tuple; + using Handle = std::shared_ptr; + using ParamsIn = std::tuple; static auto read(ParamsIn ps) -> Params { - auto ins = std::get<0>(ps); - auto algo = std::get<1>(ps); + auto ins = std::get<0>(ps); + auto algo = std::get<1>(ps); + auto handle = std::get<2>(ps); return std::make_tuple( std::get<0>(ins), algo, SelectInOutSimple( - std::get<0>(ins), std::get<1>(ins), std::get<2>(ins), std::get<3>(ins))); + handle, std::get<0>(ins), std::get<1>(ins), std::get<2>(ins), std::get<3>(ins)), + handle); } }; @@ -345,32 +358,36 @@ INSTANTIATE_TEST_CASE_P(SelectionTest, testing::Values(knn::SelectKAlgo::FAISS, knn::SelectKAlgo::RADIX_8_BITS, knn::SelectKAlgo::RADIX_11_BITS, - knn::SelectKAlgo::WARP_SORT))); + knn::SelectKAlgo::WARP_SORT), + testing::Values(std::make_shared()))); template struct with_ref { template struct params_random { using InOut = SelectInOutComputed; - using ParamsIn = std::tuple; + using Handle = std::shared_ptr; + using ParamsIn = std::tuple; static auto read(ParamsIn ps) -> Params { - auto spec = std::get<0>(ps); - auto algo = std::get<1>(ps); + auto spec = std::get<0>(ps); + auto algo = std::get<1>(ps); + auto handle = std::get<2>(ps); + std::vector dists(spec.input_len * spec.n_inputs); - raft::handle_t handle; { - auto s = handle.get_stream(); + auto s = (*handle.get()).get_stream(); rmm::device_uvector dists_d(spec.input_len * spec.n_inputs, s); raft::random::RngState r(42); - normal(handle, r, dists_d.data(), dists_d.size(), KeyT(10.0), KeyT(100.0)); + normal(*(handle.get()), r, dists_d.data(), dists_d.size(), KeyT(10.0), KeyT(100.0)); update_host(dists.data(), dists_d.data(), dists_d.size(), s); s.synchronize(); } - return std::make_tuple(spec, algo, SelectInOutComputed(spec, RefAlgo, dists)); + return std::make_tuple( + spec, algo, SelectInOutComputed(handle, spec, RefAlgo, dists), handle); } }; }; @@ -416,11 +433,11 @@ auto inputs_random_largesize = testing::Values(SelectTestSpec{100, 100000, 1, tr SelectTestSpec{100, 100000, 100, true, false}, SelectTestSpec{100, 100000, 200, true}, SelectTestSpec{100000, 100, 100, false}, - SelectTestSpec{1, 1000000000, 1, true}, - SelectTestSpec{1, 1000000000, 16, false, false}, - SelectTestSpec{1, 1000000000, 64, false}, - SelectTestSpec{1, 1000000000, 128, true, false}, - SelectTestSpec{1, 1000000000, 256, false, false}); + SelectTestSpec{1, 100000000, 1, true}, + SelectTestSpec{1, 100000000, 16, false, false}, + SelectTestSpec{1, 100000000, 64, false}, + SelectTestSpec{1, 100000000, 128, true, false}, + SelectTestSpec{1, 100000000, 256, false, false}); auto inputs_random_largek = testing::Values(SelectTestSpec{100, 100000, 1000, true}, SelectTestSpec{100, 100000, 2000, true}, @@ -436,7 +453,8 @@ INSTANTIATE_TEST_CASE_P(SelectionTest, testing::Combine(inputs_random_longlist, testing::Values(knn::SelectKAlgo::RADIX_8_BITS, knn::SelectKAlgo::RADIX_11_BITS, - knn::SelectKAlgo::WARP_SORT))); + knn::SelectKAlgo::WARP_SORT), + testing::Values(std::make_shared()))); typedef SelectionTest::params_random> ReferencedRandomDoubleSizeT; @@ -446,7 +464,8 @@ INSTANTIATE_TEST_CASE_P(SelectionTest, testing::Combine(inputs_random_longlist, testing::Values(knn::SelectKAlgo::RADIX_8_BITS, knn::SelectKAlgo::RADIX_11_BITS, - knn::SelectKAlgo::WARP_SORT))); + knn::SelectKAlgo::WARP_SORT), + testing::Values(std::make_shared()))); typedef SelectionTest::params_random> ReferencedRandomDoubleInt; @@ -454,7 +473,8 @@ TEST_P(ReferencedRandomDoubleInt, LargeSize) { run(); } INSTANTIATE_TEST_CASE_P(SelectionTest, ReferencedRandomDoubleInt, testing::Combine(inputs_random_largesize, - testing::Values(knn::SelectKAlgo::WARP_SORT))); + testing::Values(knn::SelectKAlgo::WARP_SORT), + testing::Values(std::make_shared()))); /** TODO: Fix test failure in RAFT CI * diff --git a/cpp/test/random/make_blobs.cu b/cpp/test/random/make_blobs.cu index 741b374c8c..ea7283977c 100644 --- a/cpp/test/random/make_blobs.cu +++ b/cpp/test/random/make_blobs.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -147,8 +147,8 @@ class MakeBlobsTest : public ::testing::TestWithParam> { } protected: - MakeBlobsInputs params; raft::handle_t handle; + MakeBlobsInputs params; cudaStream_t stream = 0; device_vector mean_var; diff --git a/cpp/test/random/multi_variable_gaussian.cu b/cpp/test/random/multi_variable_gaussian.cu index 04626a53c7..b2b99027d6 100644 --- a/cpp/test/random/multi_variable_gaussian.cu +++ b/cpp/test/random/multi_variable_gaussian.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,9 +79,10 @@ template template class MVGTest : public ::testing::TestWithParam> { - protected: + public: MVGTest() - : workspace_d(0, handle.get_stream()), + : params(::testing::TestWithParam>::GetParam()), + workspace_d(0, handle.get_stream()), P_d(0, handle.get_stream()), x_d(0, handle.get_stream()), X_d(0, handle.get_stream()), @@ -90,6 +91,7 @@ class MVGTest : public ::testing::TestWithParam> { { } + protected: void SetUp() override { // getting params @@ -195,15 +197,15 @@ class MVGTest : public ::testing::TestWithParam> { } protected: + raft::handle_t handle; MVGInputs params; - std::vector P, x, X; rmm::device_uvector workspace_d, P_d, x_d, X_d, Rand_cov, Rand_mean; + std::vector P, x, X; int dim, nPoints; typename detail::multi_variable_gaussian::Decomposer method; Correlation corr; detail::multi_variable_gaussian* mvg = NULL; T tolerance; - raft::handle_t handle; }; // end of MVGTest class template @@ -220,7 +222,7 @@ class MVGMdspanTest : public ::testing::TestWithParam> { } } - protected: + public: MVGMdspanTest() : workspace_d(0, handle.get_stream()), P_d(0, handle.get_stream()), @@ -323,13 +325,14 @@ class MVGMdspanTest : public ::testing::TestWithParam> { } protected: + raft::handle_t handle; + MVGInputs params; std::vector P, x, X; rmm::device_uvector workspace_d, P_d, x_d, X_d, Rand_cov, Rand_mean; int dim, nPoints; Correlation corr; T tolerance; - raft::handle_t handle; }; // end of MVGTest class ///@todo find out the reason that Un-correlated covs are giving problems (in qr) diff --git a/cpp/test/mst.cu b/cpp/test/sparse/mst.cu similarity index 99% rename from cpp/test/mst.cu rename to cpp/test/sparse/mst.cu index d11f0b5842..7c7d264f3f 100644 --- a/cpp/test/mst.cu +++ b/cpp/test/sparse/mst.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include -#include "test_utils.cuh" +#include "../test_utils.cuh" #include #include #include diff --git a/cpp/test/spectral_matrix.cu b/cpp/test/sparse/spectral_matrix.cu similarity index 98% rename from cpp/test/spectral_matrix.cu rename to cpp/test/sparse/spectral_matrix.cu index 867b1e9daf..02856cb378 100644 --- a/cpp/test/spectral_matrix.cu +++ b/cpp/test/sparse/spectral_matrix.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu index 59a2c6e081..287bb85886 100644 --- a/cpp/test/stats/cov.cu +++ b/cpp/test/stats/cov.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -103,10 +103,10 @@ class CovTest : public ::testing::TestWithParam> { } protected: - CovInputs params; - rmm::device_uvector data, mean_act, cov_act, cov_cm, cov_cm_ref; cublasHandle_t handle; cudaStream_t stream = 0; + CovInputs params; + rmm::device_uvector data, mean_act, cov_act, cov_cm, cov_cm_ref; }; ///@todo: add stable=false after it has been implemented diff --git a/cpp/test/stats/regression_metrics.cu b/cpp/test/stats/regression_metrics.cu index 86ac03c8b3..b3e0df32f8 100644 --- a/cpp/test/stats/regression_metrics.cu +++ b/cpp/test/stats/regression_metrics.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,8 +106,8 @@ class RegressionTest : public ::testing::TestWithParam> { } protected: - RegressionInputs params; raft::handle_t handle; + RegressionInputs params; cudaStream_t stream = 0; double mean_abs_error = 0; double mean_squared_error = 0; diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu index 876926b71a..354a9c29cc 100644 --- a/cpp/test/stats/silhouette_score.cu +++ b/cpp/test/stats/silhouette_score.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -192,6 +192,7 @@ class silhouetteScoreTest : public ::testing::TestWithParam d_X; @@ -203,7 +204,6 @@ class silhouetteScoreTest : public ::testing::TestWithParam d_X(X.size(), stream); - rmm::device_uvector d_X_embedded(X_embedded.size(), stream); + auto stream = handle.get_stream(); + d_X.resize(X.size(), stream); + d_X_embedded.resize(X_embedded.size(), stream); raft::update_device(d_X.data(), X.data(), X.size(), stream); raft::update_device(d_X_embedded.data(), X_embedded.data(), X_embedded.size(), stream); auto n_sample = 50; @@ -338,6 +338,11 @@ class TrustworthinessScoreTest : public ::testing::Test { void TearDown() override {} protected: + raft::handle_t handle; + + rmm::device_uvector d_X; + rmm::device_uvector d_X_embedded; + double score; }; diff --git a/cpp/test/cudart_utils.cpp b/cpp/test/util/cudart_utils.cpp similarity index 98% rename from cpp/test/cudart_utils.cpp rename to cpp/test/util/cudart_utils.cpp index 7e8585c7c7..e6b1aa9676 100644 --- a/cpp/test/cudart_utils.cpp +++ b/cpp/test/util/cudart_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/device_atomics.cu b/cpp/test/util/device_atomics.cu similarity index 97% rename from cpp/test/device_atomics.cu rename to cpp/test/util/device_atomics.cu index 4e56b8d486..5e8a67c8f6 100644 --- a/cpp/test/device_atomics.cu +++ b/cpp/test/util/device_atomics.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/integer_utils.cpp b/cpp/test/util/integer_utils.cpp similarity index 96% rename from cpp/test/integer_utils.cpp rename to cpp/test/util/integer_utils.cpp index 46fa8d348d..ed5dddf72d 100644 --- a/cpp/test/integer_utils.cpp +++ b/cpp/test/util/integer_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/pow2_utils.cu b/cpp/test/util/pow2_utils.cu similarity index 98% rename from cpp/test/pow2_utils.cu rename to cpp/test/util/pow2_utils.cu index 9e9bd80673..e29e4eeb9c 100644 --- a/cpp/test/pow2_utils.cu +++ b/cpp/test/util/pow2_utils.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/docs/source/build.md b/docs/source/build.md index 2eba3af450..c88cf6c162 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -130,7 +130,7 @@ For example, to run the distance tests: It can take sometime to compile all of the tests. You can build individual tests by providing a semicolon-separated list to the `--limit-tests` option in `build.sh`: ```bash -./build.sh libraft tests --limit-tests=NEIGHBORS_TEST;DISTANCE_TEST;MATRIX_TEST +./build.sh libraft tests -n --limit-tests=NEIGHBORS_TEST;DISTANCE_TEST;MATRIX_TEST ``` ### Benchmarks @@ -143,7 +143,7 @@ The benchmarks are broken apart by algorithm category, so you will find several It can take sometime to compile all of the benchmarks. You can build individual benchmarks by providing a semicolon-separated list to the `--limit-bench` option in `build.sh`: ```bash -./build.sh libraft bench --limit-bench=NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH +./build.sh libraft bench -n --limit-bench=NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH ``` ### C++ Using Cmake Directly diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index b37d5dc1af..2f54753cc6 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -1,5 +1,13 @@ # Developer Guide +## General +Please start by reading the [Contributor Guide](contributing.md). + +## Performance +1. In performance critical sections of the code, favor `cudaDeviceGetAttribute` over `cudaDeviceGetProperties`. See corresponding CUDA devblog [here](https://devblogs.nvidia.com/cuda-pro-tip-the-fast-way-to-query-device-properties/) to know more. +2. If an algo requires you to launch GPU work in multiple cuda streams, do not create multiple `raft::resources` objects, one for each such work stream. Instead, use the stream pool configured on the given `raft::resources` instance's `raft::resources::get_stream_from_stream_pool()` to pick up the right cuda stream. Refer to the section on [CUDA Resources](#resource-management) and the section on [Threading](#threading-model) for more details. TIP: use `raft::resources::get_stream_pool_size()` to know how many such streams are available at your disposal. + + ## Local Development Developing features and fixing bugs for the RAFT library itself is straightforward and only requires building and installing the relevant RAFT artifacts. @@ -8,11 +16,239 @@ The process for working on a CUDA/C++ feature which might span RAFT and one or m If building a feature which spans projects and not using the source build in cmake, the RAFT changes (both C++ and Python) will need to be installed into the environment of the consuming project before they can be used. The ideal integration of RAFT into consuming projects will enable both the source build in the consuming project only for this case but also rely on a more stable packaging (such as conda packaging) otherwise. -## API stability + +## Threading Model + +With the exception of the `raft::resources`, RAFT algorithms should maintain thread-safety and are, in general, +assumed to be single threaded. This means they should be able to be called from multiple host threads so +long as different instances of `raft::resources` are used. + +Exceptions are made for algorithms that can take advantage of multiple CUDA streams within multiple host threads +in order to oversubscribe or increase occupancy on a single GPU. In these cases, the use of multiple host +threads within RAFT algorithms should be used only to maintain concurrency of the underlying CUDA streams. +Multiple host threads should be used sparingly, be bounded, and should steer clear of performing CPU-intensive +computations. + +A good example of an acceptable use of host threads within a RAFT algorithm might look like the following + +```cpp +#include +#include +#include +raft::resources res; + +... + +sync_stream(res); + +... + +int n_streams = get_stream_pool_size(res); + +#pragma omp parallel for num_threads(n_threads) +for(int i = 0; i < n; i++) { + int thread_num = omp_get_thread_num() % n_threads; + cudaStream_t s = get_stream_from_stream_pool(res, thread_num); + ... possible light cpu pre-processing ... + my_kernel1<<>>(...); + ... + ... some possible async d2h / h2d copies ... + my_kernel2<<>>(...); + ... + sync_stream(res, s); + ... possible light cpu post-processing ... +} +``` + +In the example above, if there is no CPU pre-processing at the beginning of the for-loop, an event can be registered in +each of the streams within the for-loop to make them wait on the stream from the handle. If there is no CPU post-processing +at the end of each for-loop iteration, `sync_stream(res, s)` can be replaced with a single `sync_stream_pool(res)` +after the for-loop. + +To avoid compatibility issues between different threading models, the only threading programming allowed in RAFT is OpenMP. +Though RAFT's build enables OpenMP by default, RAFT algorithms should still function properly even when OpenMP has been +disabled. If the CPU pre- and post-processing were not needed in the example above, OpenMP would not be needed. + +The use of threads in third-party libraries is allowed, though they should still avoid depending on a specific OpenMP runtime. + +## Public Interface + +### General guidelines +Functions exposed via the C++ API must be stateless. Things that are OK to be exposed on the interface: +1. Any [POD](https://en.wikipedia.org/wiki/Passive_data_structure) - see [std::is_pod](https://en.cppreference.com/w/cpp/types/is_pod) as a reference for C++11 POD types. +2. `raft::resources` - since it stores resource-related state which has nothing to do with model/algo state. +3. Avoid using pointers to POD types (explicitly putting it out, even though it can be considered as a POD) and pass the structures by reference instead. + Internal to the C++ API, these stateless functions are free to use their own temporary classes, as long as they are not exposed on the interface. +4. Accept single- (`raft::span`) and multi-dimensional views (`raft::mdspan`) and validate their metadata wherever possible. +5. Prefer `std::optional` for any optional arguments (e.g. do not accept `nullptr`) +6. All public APIs should be lightweight wrappers around calls to private APIs inside the `detail` namespace. + +### API stability Since RAFT is a core library with multiple consumers, it's important that the public APIs maintain stability across versions and any changes to them are done with caution, adding new functions and deprecating the old functions over a couple releases as necessary. -The public APIs should be lightweight wrappers around calls to private APIs inside the `detail` namespace. +### Stateless C++ APIs + +Using the IVF-PQ algorithm as an example, the following way of exposing its API would be wrong according to the guidelines in this section, since it exposes a non-POD C++ class object in the C++ API: +```cpp +template +class ivf_pq { + ivf_pq_params params_; + raft::resources const& res_; + +public: + ivf_pq(raft::resources const& res); + void train(raft::device_matrix dataset); + void search(raft::device_matrix queries, + raft::device_matrix out_inds, + raft::device_matrix out_dists); +}; +``` + +An alternative correct way to expose this could be: +```cpp +namespace raft::ivf_pq { + +template +void ivf_pq_train(raft::resources const& res, const raft::ivf_pq_params ¶ms, raft::ivf_pq_index &index, +raft::device_matrix dataset); + +template +void ivf_pq_search(raft::resources const& res, raft::ivf_pq_params const¶ms, raft::ivf_pq_index const & index, +raft::device_matrix queries, +raft::device_matrix out_inds, +raft::device_matrix out_dists); +} +``` + +### Other functions on state + +These guidelines also mean that it is the responsibility of C++ API to expose methods to load and store (aka marshalling) such a data structure. Further continuing the IVF-PQ example, the following methods could achieve this: +```cpp +namespace raft::ivf_pq { + void save(raft::ivf_pq_index const& model, std::ostream &os); + void load(raft::ivf_pq_index& model, std::istream &is); +} +``` + + +## Coding style + +### Code format +#### Introduction +RAFT relies on `clang-format` to enforce code style across all C++ and CUDA source code. The coding style is based on the [Google style guide](https://google.github.io/styleguide/cppguide.html#Formatting). The only digressions from this style are the following. +1. Do not split empty functions/records/namespaces. +2. Two-space indentation everywhere, including the line continuations. +3. Disable reflowing of comments. + The reasons behind these deviations from the Google style guide are given in comments [here](../../cpp/.clang-format). + +#### How is the check done? +All formatting checks are done by this python script: [run-clang-format.py](../../cpp/scripts/run-clang-format.py) which is effectively a wrapper over `clang-format`. An error is raised if the code diverges from the format suggested by clang-format. It is expected that the developers run this script to detect and fix formatting violations before creating PR. + +##### As part of CI +[run-clang-format.py](../../cpp/scripts/run-clang-format.py) is executed as part of our `ci/checks/style.sh` CI test. If there are any formatting violations, PR author is expected to fix those to get CI passing. Steps needed to fix the formatting violations are described in the subsequent sub-section. + +##### Manually +Developers can also manually (or setup this command as part of git pre-commit hook) run this check by executing: +```bash +python ./cpp/scripts/run-clang-format.py +``` +From the root of the RAFT repository. + +#### How to know the formatting violations? +When there are formatting errors, [run-clang-format.py](../../cpp/scripts/run-clang-format.py) prints a `diff` command, showing where there are formatting differences. Unfortunately, unlike `flake8`, `clang-format` does NOT print descriptions of the violations, but instead directly formats the code. So, the only way currently to know about formatting differences is to run the diff command as suggested by this script against each violating source file. + +#### How to fix the formatting violations? +When there are formatting violations, [run-clang-format.py](../../cpp/scripts/run-clang-format.py) prints at the end, the exact command that can be run by developers to fix them. This is the easiest way to fix formatting errors. [This screencast](https://asciinema.org/a/287367) shows how developers can check for formatting violations in their branches and also how to fix those, before sending out PRs. + +In short, to bulk-fix all the formatting violations, execute the following command: +```bash +python ./cpp/scripts/run-clang-format.py -inplace +``` +From the root of the RAFT repository. + +#### clang-format version? +To avoid spurious code style violations we specify the exact clang-format version required, currently `11.1.0`. This is enforced by the [run-clang-format.py](../../cpp/scripts/run-clang-format.py) script itself. Refer [here](../../cpp/README.md#dependencies) for the list of build-time dependencies. + +#### Additional scripts +Along with clang, there are an include checker and copyright checker scripts for checking style, which can be performed as part of CI, as well as manually. + +##### #include style +[include_checker.py](../../cpp/scripts/include_checker.py) is used to enforce the include style as follows: +1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. +2. `#include <...>` should be used for referencing everything else + +Manually, run the following to bulk-fix include style issues: +```bash +python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list of folders which you want to fix] +``` + +##### Copyright header +[copyright.py](../../ci/checks/copyright.py) checks the Copyright header for all git-modified files + +Manually, you can run the following to bulk-fix the header if only the years need to be updated: +```bash +python ./ci/checks/copyright.py --update-current-year +``` +Keep in mind that this only applies to files tracked by git and having been modified. + +## Error handling +Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` and `RAFT_CUSOLVER_TRY`. These macros take care of checking the return values of the used API calls and generate an exception when the command is not successful. If you need to avoid an exception, e.g. inside a destructor, use `RAFT_CUDA_TRY_NO_THROW`, `RAFT_CUBLAS_TRY_NO_THROW ` and `RAFT_CUSOLVER_TRY_NO_THROW`. These macros log the error but do not throw an exception. + +## Logging + +### Introduction +Anything and everything about logging is defined inside [logger.hpp](../../cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. + +### Usage +```cpp +#include + +// Inside your method or function, use any of these macros +RAFT_LOG_TRACE("Hello %s!", "world"); +RAFT_LOG_DEBUG("Hello %s!", "world"); +RAFT_LOG_INFO("Hello %s!", "world"); +RAFT_LOG_WARN("Hello %s!", "world"); +RAFT_LOG_ERROR("Hello %s!", "world"); +RAFT_LOG_CRITICAL("Hello %s!", "world"); +``` + +### Changing logging level +There are 7 logging levels with each successive level becoming quieter: +1. RAFT_LEVEL_TRACE +2. RAFT_LEVEL_DEBUG +3. RAFT_LEVEL_INFO +4. RAFT_LEVEL_WARN +5. RAFT_LEVEL_ERROR +6. RAFT_LEVEL_CRITICAL +7. RAFT_LEVEL_OFF + Pass one of these as per your needs into the `set_level()` method as follows: +```cpp +raft::logger::get.set_level(RAFT_LEVEL_WARN); +// From now onwards, this will print only WARN and above kind of messages +``` + +### Changing logging pattern +Pass the [format string](https://github.com/gabime/spdlog/wiki/3.-Custom-formatting) as follows in order use a different logging pattern than the default. +```cpp +raft::logger::get.set_pattern(YourFavoriteFormat); +``` +One can also use the corresponding `get_pattern()` method to know the current format as well. + +### Temporarily changing the logging pattern +Sometimes, we need to temporarily change the log pattern (eg: for reporting decision tree structure). This can be achieved in a RAII-like approach as follows: +```cpp +{ + PatternSetter _(MyNewTempFormat); + // new log format is in effect from here onwards + doStuff(); + // once the above temporary object goes out-of-scope, the old format will be restored +} +``` + +### Tips +* Do NOT end your logging messages with a newline! It is automatically added by spdlog. +* The `RAFT_LOG_TRACE()` is by default not compiled due to the `RAFT_ACTIVE_LEVEL` macro setup, for performance reasons. If you need it to be enabled, change this macro accordingly during compilation time ## Common Design Considerations @@ -26,9 +262,170 @@ The public APIs should be lightweight wrappers around calls to private APIs insi ## Testing -It's important for RAFT to maintain a high test coverage in order to minimize the potential for downstream projects to encounter unexpected build or runtime behavior as a result of changes. A well-defined public API can help maintain compile-time stability but means more focus should be placed on testing the functional requirements and verifying execution on the various edge cases within RAFT itself. Ideally, bug fixes and new features should be able to be made to RAFT independently of the consuming projects. +It's important for RAFT to maintain a high test coverage of the public APIs in order to minimize the potential for downstream projects to encounter unexpected build or runtime behavior as a result of changes. +A well-defined public API can help maintain compile-time stability but means more focus should be placed on testing the functional requirements and verifying execution on the various edge cases within RAFT itself. Ideally, bug fixes and new features should be able to be made to RAFT independently of the consuming projects. ## Documentation -Public APIs always require documentation, since those will be exposed directly to users. In addition to summarizing the purpose of each class / function in the public API, the arguments (and relevant templates) should be documented along with brief usage examples. +Public APIs always require documentation since those will be exposed directly to users. For C++, we use [doxygen](http://www.doxygen.nl) and for Python/cython we use [pydoc](https://docs.python.org/3/library/pydoc.html). In addition to summarizing the purpose of each class / function in the public API, the arguments (and relevant templates) should be documented along with brief usage examples. + +## Asynchronous operations and stream ordering +All RAFT algorithms should be as asynchronous as possible avoiding the use of the default stream (aka as NULL or `0` stream). Implementations that require only one CUDA Stream should use the stream from `raft::resources`: + +```cpp +#include +#include + +void foo(const raft::resources& res, ...) +{ + cudaStream_t stream = get_cuda_stream(res); +} +``` +When multiple streams are needed, e.g. to manage a pipeline, use the internal streams available in `raft::resources` (see [CUDA Resources](#cuda-resources)). If multiple streams are used all operations still must be ordered according to `raft::resource::get_cuda_stream()` (from `raft/core/resource/cuda_stream.hpp`). Before any operation in any of the internal CUDA streams is started, all previous work in `raft::resource::get_cuda_stream()` must have completed. Any work enqueued in `raft::resource::get_cuda_stream()` after a RAFT function returns should not start before all work enqueued in the internal streams has completed. E.g. if a RAFT algorithm is called like this: +```cpp +#include +#include +void foo(const double* srcdata, double* result) +{ + cudaStream_t stream; + CUDA_RT_CALL( cudaStreamCreate( &stream ) ); + raft::resources res; + set_cuda_stream(res, stream); + + ... + + RAFT_CUDA_TRY( cudaMemcpyAsync( srcdata, h_srcdata.data(), n*sizeof(double), cudaMemcpyHostToDevice, stream ) ); + + raft::algo(raft::resources, dopredict, srcdata, result, ... ); + + RAFT_CUDA_TRY( cudaMemcpyAsync( h_result.data(), result, m*sizeof(int), cudaMemcpyDeviceToHost, stream ) ); + + ... +} +``` +No work in any stream should start in `raft::algo` before the `cudaMemcpyAsync` in `stream` launched before the call to `raft::algo` is done. And all work in all streams used in `raft::algo` should be done before the `cudaMemcpyAsync` in `stream` launched after the call to `raft::algo` starts. + +This can be ensured by introducing interstream dependencies with CUDA events and `cudaStreamWaitEvent`. For convenience, the header `raft/core/device_resources.hpp` provides the class `raft::stream_syncer` which lets all `raft::resources` internal CUDA streams wait on `raft::resource::get_cuda_stream()` in its constructor and in its destructor and lets `raft::resource::get_cuda_stream()` wait on all work enqueued in the `raft::resources` internal CUDA streams. The intended use would be to create a `raft::stream_syncer` object as the first thing in an entry function of the public RAFT API: + +```cpp +namespace raft { + void algo(const raft::resources& res, ...) + { + raft::streamSyncer _(res); + } +} +``` +This ensures the stream ordering behavior described above. + +### Using Thrust +To ensure that thrust algorithms are executed in the intended stream the `thrust::cuda::par` execution policy should be used. To ensure that thrust algorithms allocate temporary memory via the provided device memory allocator, use the `rmm::exec_policy` available in `raft/core/resource/thrust_policy.hpp`, which can be used through `raft::resources`: +```cpp +#include +#include +void foo(const raft::resources& res, ...) +{ + auto execution_policy = get_thrust_policy(res); + thrust::for_each(execution_policy, ... ); +} +``` + +## Resource Management + +Do not create reusable CUDA resources directly in implementations of RAFT algorithms. Instead, use the existing resources in `raft::resources` to avoid constant creation and deletion of reusable resources such as CUDA streams, CUDA events or library handles. Please file a feature request if a resource handle is missing in `raft::resources`. +The resources can be obtained like this +```cpp +#include +#include +#include +void foo(const raft::resources& h, ...) +{ + cublasHandle_t cublasHandle = get_cublas_handle(h); + const int num_streams = get_stream_pool_size(h); + const int stream_idx = ... + cudaStream_t stream = get_stream_from_stream_pool(stream_idx); + ... +} +``` + +The example below shows one way to create `n_stream` number of internal cuda streams with an `rmm::stream_pool` which can later be used by the algos inside RAFT. +```cpp +#include +#include +#include +int main(int argc, char** argv) +{ + int n_streams = argc > 1 ? atoi(argv[1]) : 0; + raft::resources res; + set_cuda_stream_pool(res, std::make_shared(n_streams)); + + foo(res, ...); +} +``` + +## Multi-GPU + +The multi-GPU paradigm of RAFT is **O**ne **P**rocess per **G**PU (OPG). Each algorithm should be implemented in a way that it can run with a single GPU without any specific dependencies to a particular communication library. A multi-GPU implementation should use the methods offered by the class `raft::comms::comms_t` from [raft/core/comms.hpp] for inter-rank/GPU communication. It is the responsibility of the user of cuML to create an initialized instance of `raft::comms::comms_t`. + +E.g. with a CUDA-aware MPI, a RAFT user could use code like this to inject an initialized instance of `raft::comms::mpi_comms` into a `raft::resources`: + +```cpp +#include +#include +#include +#include +... +int main(int argc, char * argv[]) +{ + MPI_Init(&argc, &argv); + int rank = -1; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + int local_rank = -1; + { + MPI_Comm local_comm; + MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &local_comm); + + MPI_Comm_rank(local_comm, &local_rank); + + MPI_Comm_free(&local_comm); + } + + cudaSetDevice(local_rank); + + mpi_comms raft_mpi_comms; + MPI_Comm_dup(MPI_COMM_WORLD, &raft_mpi_comms); + + { + raft::device_resources res; + initialize_mpi_comms(res, raft_mpi_comms); + + ... + + raft::algo(res, ... ); + } + + MPI_Comm_free(&raft_mpi_comms); + + MPI_Finalize(); + return 0; +} +``` + +A RAFT developer can assume the following: +* A instance of `raft::comms::comms_t` was correctly initialized. +* All processes that are part of `raft::comms::comms_t` call into the RAFT algorithm cooperatively. + +The initialized instance of `raft::comms::comms_t` can be accessed from the `raft::resources` instance: + +```cpp +#include +#include +void foo(const raft::resources& res, ...) +{ + const raft::comms_t& communicator = get_comms(res); + const int rank = communicator.get_rank(); + const int size = communicator.get_size(); + ... +} +``` diff --git a/python/pylibraft/pylibraft/test/test_refine.py b/python/pylibraft/pylibraft/test/test_refine.py index c7b8624bf1..2f3bef2e0c 100644 --- a/python/pylibraft/pylibraft/test/test_refine.py +++ b/python/pylibraft/pylibraft/test/test_refine.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/raft-dask/setup.py b/python/raft-dask/setup.py index bd21136103..7009a9ab44 100644 --- a/python/raft-dask/setup.py +++ b/python/raft-dask/setup.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ "numpy", "numba>=0.49", "joblib>=0.11", - "dask-cuda>=23.02", + "dask-cuda>=23.2*", "dask>=2022.12.0", f"ucx-py{cuda_suffix}", "distributed>=2022.12.0",