Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove direct UCX and NCCL dependencies #5038

Merged
merged 9 commits into from
Dec 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions conda/environments/all_cuda-115_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ dependencies:
- libraft-distance=23.02.*
- libraft-headers=23.02.*
- libraft-nn=23.02.*
- nccl>=2.9.9
- nltk
- nvcc_linux-64=11.5
- pip
Expand All @@ -42,9 +41,6 @@ dependencies:
- statsmodels
- sysroot_linux-64==2.17
- treelite=3.0.1
- ucx-proc=*=gpu
- ucx-py=0.29.*
- ucx>=1.13.0
- umap-learn
- pip:
- git+https://github.com/dask/dask-glm@main
Expand Down
6 changes: 0 additions & 6 deletions conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
{% set cuda_version='.'.join(environ.get('CUDA', 'unknown').split('.')[:2]) %}
{% set cuda_major=cuda_version.split('.')[0] %}
{% set py_version=environ.get('CONDA_PY', 36) %}
{% set ucx_py_version=environ.get('UCX_PY_VERSION') %}

package:
name: cuml
Expand Down Expand Up @@ -43,8 +42,6 @@ requirements:
- pylibraft {{ minor_version }}
- raft-dask {{ minor_version }}
- cudatoolkit {{ cuda_version }}.*
- ucx-py {{ ucx_py_version }}
- ucx-proc=*=gpu
- cuda-python >=11.7.1,<12.0
run:
- python x.x
Expand All @@ -56,9 +53,6 @@ requirements:
- raft-dask {{ minor_version }}
- cupy>=7.8.0,<12.0.0a0
- treelite=3.0.1
- nccl>=2.9.9
- ucx-py {{ ucx_py_version }}
- ucx-proc=*=gpu
- dask>=2022.12.0
- distributed>=2022.12.0
- joblib >=0.11
Expand Down
9 changes: 0 additions & 9 deletions conda/recipes/libcuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
{% set cuda_version = '.'.join(environ.get('CUDA', '9.2').split('.')[:2]) %}
{% set cuda_major = cuda_version.split('.')[0] %}
{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
{% set ucx_py_version=environ.get('UCX_PY_VERSION') %}

package:
name: libcuml-split
Expand Down Expand Up @@ -38,12 +37,8 @@ requirements:
- {{ compiler('cuda') }} {{ cuda_version }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- nccl {{ nccl_version }}
- cudf {{ minor_version }}
- cudatoolkit {{ cuda_version }}.*
- ucx {{ ucx_version }}
- ucx-py {{ ucx_py_version }}
- ucx-proc=*=gpu
- libcumlprims {{ minor_version }}
- libraft-headers {{ minor_version }}
- libraft-distance {{ minor_version }}
Expand Down Expand Up @@ -74,10 +69,6 @@ outputs:
- libraft-distance {{ minor_version }}
- libraft-nn {{ minor_version }}
- cudf {{ minor_version }}
- nccl {{ nccl_version }}
- ucx >={{ ucx_version }}
- ucx-py {{ ucx_py_version }}
- ucx-proc=*=gpu
- treelite {{ treelite_version }}
- faiss-proc=*=cuda
- libfaiss {{ libfaiss_version }} *_cuda
Expand Down
9 changes: 0 additions & 9 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ option(BUILD_CUML_MG_TESTS "Build cuML multigpu algorithm tests" OFF)
option(BUILD_PRIMS_TESTS "Build ml-prim tests" ON)
option(BUILD_CUML_EXAMPLES "Build C++ API usage examples" ON)
option(BUILD_CUML_BENCH "Build cuML C++ benchmark tests" ON)
option(BUILD_CUML_STD_COMMS "Build the standard NCCL+UCX Communicator" ON)
option(BUILD_CUML_MPI_COMMS "Build the MPI+NCCL Communicator (used for testing)" OFF)
option(CUDA_ENABLE_KERNEL_INFO "Enable kernel resource usage info" OFF)
option(CUDA_ENABLE_LINE_INFO "Enable lineinfo in nvcc" OFF)
Expand Down Expand Up @@ -84,7 +83,6 @@ message(VERBOSE "CUML_CPP: Building cuML multigpu algorithm tests: ${BUILD_CUML_
message(VERBOSE "CUML_CPP: Building ml-prims tests: ${BUILD_PRIMS_TESTS}")
message(VERBOSE "CUML_CPP: Building C++ API usage examples: ${BUILD_CUML_EXAMPLES}")
message(VERBOSE "CUML_CPP: Building cuML C++ benchmark tests: ${BUILD_CUML_BENCH}")
message(VERBOSE "CUML_CPP: Building the standard NCCL+UCX Communicator: ${BUILD_CUML_STD_COMMS}")
message(VERBOSE "CUML_CPP: Building the MPI+NCCL Communicator (used for testing): ${BUILD_CUML_MPI_COMMS}")
message(VERBOSE "CUML_CPP: Enabling detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
message(VERBOSE "CUML_CPP: Disabling OpenMP: ${DISABLE_OPENMP}")
Expand Down Expand Up @@ -193,7 +191,6 @@ if(SINGLEGPU)
message(STATUS "CUML_CPP: Detected SINGLEGPU build option")
message(STATUS "CUML_CPP: Disabling Multi-GPU components and comms libraries")
set(BUILD_CUML_MG_TESTS OFF)
set(BUILD_CUML_STD_COMMS OFF)
set(BUILD_CUML_MPI_COMMS OFF)
set(ENABLE_CUMLPRIMS_MG OFF)
set(WITH_UCX OFF)
Expand Down Expand Up @@ -236,11 +233,6 @@ if(all_algo OR treeshap_algo)
endif()
endif()

if(NOT SINGLEGPU)
include(cmake/thirdparty/get_nccl.cmake)
include(cmake/thirdparty/get_ucx.cmake)
endif()

if(ENABLE_CUMLPRIMS_MG)
include(cmake/thirdparty/get_cumlprims_mg.cmake)
endif()
Expand Down Expand Up @@ -550,7 +542,6 @@ if(BUILD_CUML_CPP_LIBRARY)
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/metrics>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src_prims>
$<$<OR:$<BOOL:${BUILD_CUML_STD_COMMS}>,$<BOOL:${BUILD_CUML_MPI_COMMS}>>:${NCCL_INCLUDE_DIRS}>
$<$<BOOL:${BUILD_CUML_MPI_COMMS}>:${MPI_CXX_INCLUDE_PATH}>
INTERFACE
$<INSTALL_INTERFACE:include>
Expand Down
10 changes: 8 additions & 2 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ The `test` directory has subdirectories that reflect this distinction between th
2. CUDA (>= 11.0)
3. gcc (>=9.3.0)
4. clang-format (= 11.1.0) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=11` and `clang-tools=11` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager.
5. UCX with CUDA support [optional](>=1.7) - enables point-to-point messaging in the cuML communicator.

### Building cuML:

Expand All @@ -34,7 +33,7 @@ Current cmake offers the following configuration options:
| BUILD_CUML_CPP_LIBRARY | [ON, OFF] | ON | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
| BUILD_CUML_C_LIBRARY | [ON, OFF] | ON | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
| BUILD_CUML_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_test`. |
| BUILD_CUML_MG_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_mg_test`. Requires MPI to be installed. When enabled, BUILD_CUML_MPI_COMMS will be automatically set to ON. |
| BUILD_CUML_MG_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_mg_test`. Requires MPI to be installed. When enabled, BUILD_CUML_MPI_COMMS will be automatically set to ON. See section about additional requirements.|
| BUILD_PRIMS_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `prims_test`. |
| BUILD_CUML_EXAMPLES | [ON, OFF] | ON | Enable/disable building cuML C++ API usage examples. |
| BUILD_CUML_BENCH | [ON, OFF] | ON | Enable/disable building of cuML C++ benchark. |
Expand Down Expand Up @@ -66,6 +65,13 @@ $ cmake --build . -j --target ml_mg # Build ml_mg_test multi GPU algorit
$ cmake --build . -j --target prims # Build prims_test ML primitive unit tests binary
```

### MultiGPU Tests Requirements Note:

To build the MultiGPU tests (CMake option `BUILD_CUML_MG_TESTS`), the following dependencies are required:

- MPI (OpenMPI recommended)
- NCCL, version corresponding to [RAFT's requirement](https://github.com/rapidsai/raft/blob/branch-23.02/conda/recipes/raft-dask/meta.yaml#L49.

### Third Party Modules

The external folder contains submodules that cuML depends on.
Expand Down
38 changes: 0 additions & 38 deletions cpp/cmake/thirdparty/get_nccl.cmake

This file was deleted.

5 changes: 5 additions & 0 deletions cpp/cmake/thirdparty/get_raft.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ function(find_and_configure_raft)
string(APPEND RAFT_COMPONENTS " nn")
endif()

# We need RAFT::distributed for MG tests
if(BUILD_CUML_MG_TESTS)
string(APPEND RAFT_COMPONENTS " distributed")
endif()

if(PKG_USE_RAFT_DIST AND PKG_USE_RAFT_NN)
set(RAFT_COMPILE_LIBRARIES ON)
else()
Expand Down
30 changes: 0 additions & 30 deletions cpp/cmake/thirdparty/get_ucx.cmake

This file was deleted.

20 changes: 11 additions & 9 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

function(ConfigureTest)

set(options OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The more I think about this, I believe we are still going to need to link against NCCL to use raft/comms/mpi_comms.hpp but only for the mg tests and nowhere else. The mpi_comms.hpp are essentially creating a NCCL cluster using MPI and then handing off to NCCL everywhere (think of it like replacing Dask w/ MPI).

Maybe we could use the raft::distributed COMPONENT for that? (or maybe not, since it also brings in UCX...).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Components are neat, added raft::distributed linkage and notes of added dependencies (MPI, NCCL) for C++ MG tests

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are cuml's MG tests using libraft for NCCL in the same way that raft-dask uses libraft for UCX? I'm not quite sure how this code works; does cuml's MG code not directly use NCCL, but the MG tests do?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cuml's MG googletests should be using MPI, however they should still be tied to the comms interface so shouldn't need to know NCCL exists at all (this NCCL can be provided transitively through the raft::distributed component). Note also that wherher the underlying MPI configuration has been configured with UCX underneath is up to the user running the tests (so the raft MPI communicator will use ucx by proxy and not directly)

Cuml's mg pytests all use raft-dask.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right the python tests are fine. I guess the difference is that with the pytests raft-dask is a Python package whose underlying extension modules are linked to NCCL, and then cuml Python only interacts with it at the Python layer, whereas the cuml C++ tests are either linking directly to libraft or building against raft's headers, and in either case the distributed components are not precompiled anyway so there's always NCCL linking required?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we are using PIMPL for the comms, the mg tests shouldnt need nccl during compile time. But yes, the mg tests binary will need to have raft::distributed on the link libraries (which should pass nccl through)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup OK cool. makes sense. Dante has already made the necessary change here by adding the distributed component in get_raft.cmake, so I think we're good here.

set(options OPTIONAL CUMLPRIMS MPI ML_INCLUDE RAFT_DISTRIBUTED)
set(oneValueArgs PREFIX NAME PATH)
set(multiValueArgs TARGETS CONFIGURATIONS)

Expand Down Expand Up @@ -50,9 +50,9 @@ function(ConfigureTest)
GTest::gtest_main
${OpenMP_CXX_LIB_NAMES}
Threads::Threads
$<$<BOOL:${ConfigureTest_NCCL}>:NCCL::NCCL>
$<$<BOOL:${ConfigureTest_CUMLPRIMS}>:cumlprims_mg::cumlprims_mg>
$<$<BOOL:${ConfigureTest_MPI}>:${MPI_CXX_LIBRARIES}>
$<$<BOOL:${ConfigureTest_RAFT_DISTANCE}>:raft::distributed>
${TREELITE_LIBS}
$<TARGET_NAME_IF_EXISTS:conda_env>
)
Expand Down Expand Up @@ -197,16 +197,18 @@ endif()

if(BUILD_CUML_MG_TESTS)

ConfigureTest(PREFIX MG NAME KMEANS_TEST PATH mg/kmeans_test.cu OPTIONAL NCCL CUMLPRIMS ML_INCLUDE)
# This test needs to be rewritten to use the MPI comms, not the std comms, and moved
# to RAFT: https://github.com/rapidsai/cuml/issues/5058
#ConfigureTest(PREFIX MG NAME KMEANS_TEST PATH mg/kmeans_test.cu OPTIONAL NCCL CUMLPRIMS ML_INCLUDE)
if(MPI_CXX_FOUND)
# (please keep the filenames in alphabetical order)
ConfigureTest(PREFIX MG NAME KNN_TEST PATH mg/knn.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
ConfigureTest(PREFIX MG NAME KNN_CLASSIFY_TEST PATH mg/knn_classify.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
ConfigureTest(PREFIX MG NAME KNN_REGRESS_TEST PATH mg/knn_regress.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
ConfigureTest(PREFIX MG NAME MAIN_TEST PATH mg/main.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
ConfigureTest(PREFIX MG NAME PCA_TEST PATH mg/pca.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
ConfigureTest(PREFIX MG NAME KNN_TEST PATH mg/knn.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
ConfigureTest(PREFIX MG NAME KNN_CLASSIFY_TEST PATH mg/knn_classify.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
ConfigureTest(PREFIX MG NAME KNN_REGRESS_TEST PATH mg/knn_regress.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
ConfigureTest(PREFIX MG NAME MAIN_TEST PATH mg/main.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
ConfigureTest(PREFIX MG NAME PCA_TEST PATH mg/pca.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
else(MPI_CXX_FOUND)
message("OpenMPI not found. Skipping test '${CUML_MG_TEST_TARGET}'")
message("OpenMPI not found. Skipping MultiGPU tests '${CUML_MG_TEST_TARGET}'")
endif()
endif()

Expand Down
4 changes: 0 additions & 4 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ dependencies:
- output_types: conda
packages:
- cudf=23.02.*
- nccl>=2.9.9
- raft-dask=23.02.*
- pylibraft=23.02.*
py_run:
Expand All @@ -117,9 +116,6 @@ dependencies:
- cupy>=7.8.0,<12.0.0a0
- dask-cuda=23.02.*
- dask-cudf=23.02.*
- ucx>=1.13.0
- ucx-py=0.29.*
- ucx-proc=*=gpu
specific:
- output_types: requirements
matrices:
Expand Down