rapidsai · rapids-bot · Dec 17, 2022 · Nov 30, 2022 · Nov 30, 2022 · Nov 30, 2022
@@ -27,7 +27,6 @@ dependencies:
 - libraft-distance=23.02.*
 - libraft-headers=23.02.*
 - libraft-nn=23.02.*
-- nccl>=2.9.9
 - nltk
 - nvcc_linux-64=11.5
 - pip
@@ -42,9 +41,6 @@ dependencies:
 - statsmodels
 - sysroot_linux-64==2.17
 - treelite=3.0.1
-- ucx-proc=*=gpu
-- ucx-py=0.29.*
-- ucx>=1.13.0
 - umap-learn
 - pip:
   - git+https://github.com/dask/dask-glm@main

@@ -7,7 +7,6 @@
 {% set cuda_version='.'.join(environ.get('CUDA', 'unknown').split('.')[:2]) %}
 {% set cuda_major=cuda_version.split('.')[0] %}
 {% set py_version=environ.get('CONDA_PY', 36) %}
-{% set ucx_py_version=environ.get('UCX_PY_VERSION') %}
 
 package:
   name: cuml
@@ -43,8 +42,6 @@ requirements:
     - pylibraft {{ minor_version }}
     - raft-dask {{ minor_version }}
     - cudatoolkit {{ cuda_version }}.*
-    - ucx-py {{ ucx_py_version }}
-    - ucx-proc=*=gpu
     - cuda-python >=11.7.1,<12.0
   run:
     - python x.x
@@ -56,9 +53,6 @@ requirements:
     - raft-dask {{ minor_version }}
     - cupy>=7.8.0,<12.0.0a0
     - treelite=3.0.1
-    - nccl>=2.9.9
-    - ucx-py {{ ucx_py_version }}
-    - ucx-proc=*=gpu
     - dask>=2022.12.0
     - distributed>=2022.12.0
     - joblib >=0.11

@@ -7,7 +7,6 @@
 {% set cuda_version = '.'.join(environ.get('CUDA', '9.2').split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
 {% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
-{% set ucx_py_version=environ.get('UCX_PY_VERSION') %}
 
 package:
   name: libcuml-split
@@ -38,12 +37,8 @@ requirements:
     - {{ compiler('cuda') }} {{ cuda_version }}
     - sysroot_{{ target_platform }} {{ sysroot_version }}
   host:
-    - nccl {{ nccl_version }}
     - cudf {{ minor_version }}
     - cudatoolkit {{ cuda_version }}.*
-    - ucx {{ ucx_version }}
-    - ucx-py {{ ucx_py_version }}
-    - ucx-proc=*=gpu
     - libcumlprims {{ minor_version }}
     - libraft-headers {{ minor_version }}
     - libraft-distance {{ minor_version }}
@@ -74,10 +69,6 @@ outputs:
         - libraft-distance {{ minor_version }}
         - libraft-nn {{ minor_version }}
         - cudf {{ minor_version }}
-        - nccl {{ nccl_version }}
-        - ucx >={{ ucx_version }}
-        - ucx-py {{ ucx_py_version }}
-        - ucx-proc=*=gpu
         - treelite {{ treelite_version }}
         - faiss-proc=*=cuda
         - libfaiss {{ libfaiss_version }} *_cuda

@@ -51,7 +51,6 @@ option(BUILD_CUML_MG_TESTS "Build cuML multigpu algorithm tests" OFF)
 option(BUILD_PRIMS_TESTS "Build ml-prim tests" ON)
 option(BUILD_CUML_EXAMPLES "Build C++ API usage examples" ON)
 option(BUILD_CUML_BENCH "Build cuML C++ benchmark tests" ON)
-option(BUILD_CUML_STD_COMMS "Build the standard NCCL+UCX Communicator" ON)
 option(BUILD_CUML_MPI_COMMS "Build the MPI+NCCL Communicator (used for testing)" OFF)
 option(CUDA_ENABLE_KERNEL_INFO "Enable kernel resource usage info" OFF)
 option(CUDA_ENABLE_LINE_INFO "Enable lineinfo in nvcc" OFF)
@@ -84,7 +83,6 @@ message(VERBOSE "CUML_CPP: Building cuML multigpu algorithm tests: ${BUILD_CUML_
 message(VERBOSE "CUML_CPP: Building ml-prims tests: ${BUILD_PRIMS_TESTS}")
 message(VERBOSE "CUML_CPP: Building C++ API usage examples: ${BUILD_CUML_EXAMPLES}")
 message(VERBOSE "CUML_CPP: Building cuML C++ benchmark tests: ${BUILD_CUML_BENCH}")
-message(VERBOSE "CUML_CPP: Building the standard NCCL+UCX Communicator: ${BUILD_CUML_STD_COMMS}")
 message(VERBOSE "CUML_CPP: Building the MPI+NCCL Communicator (used for testing): ${BUILD_CUML_MPI_COMMS}")
 message(VERBOSE "CUML_CPP: Enabling detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
 message(VERBOSE "CUML_CPP: Disabling OpenMP: ${DISABLE_OPENMP}")
@@ -193,7 +191,6 @@ if(SINGLEGPU)
   message(STATUS "CUML_CPP: Detected SINGLEGPU build option")
   message(STATUS "CUML_CPP: Disabling Multi-GPU components and comms libraries")
   set(BUILD_CUML_MG_TESTS OFF)
-  set(BUILD_CUML_STD_COMMS OFF)
   set(BUILD_CUML_MPI_COMMS OFF)
   set(ENABLE_CUMLPRIMS_MG OFF)
   set(WITH_UCX OFF)
@@ -236,11 +233,6 @@ if(all_algo OR treeshap_algo)
   endif()
 endif()
 
-if(NOT SINGLEGPU)
-  include(cmake/thirdparty/get_nccl.cmake)
-  include(cmake/thirdparty/get_ucx.cmake)
-endif()
-
 if(ENABLE_CUMLPRIMS_MG)
   include(cmake/thirdparty/get_cumlprims_mg.cmake)
 endif()
@@ -550,7 +542,6 @@ if(BUILD_CUML_CPP_LIBRARY)
       $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
       $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/metrics>
       $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src_prims>
-      $<$<OR:$<BOOL:${BUILD_CUML_STD_COMMS}>,$<BOOL:${BUILD_CUML_MPI_COMMS}>>:${NCCL_INCLUDE_DIRS}>
       $<$<BOOL:${BUILD_CUML_MPI_COMMS}>:${MPI_CXX_INCLUDE_PATH}>
     INTERFACE
       $<INSTALL_INTERFACE:include>

@@ -19,7 +19,6 @@ The `test` directory has subdirectories that reflect this distinction between th
 2. CUDA (>= 11.0)
 3. gcc (>=9.3.0)
 4. clang-format (= 11.1.0) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=11` and `clang-tools=11` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager.
-5. UCX with CUDA support [optional](>=1.7) - enables point-to-point messaging in the cuML communicator.
 
 ### Building cuML:
 
@@ -34,7 +33,7 @@ Current cmake offers the following configuration options:
 | BUILD_CUML_CPP_LIBRARY | [ON, OFF]  | ON  | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
 | BUILD_CUML_C_LIBRARY | [ON, OFF]  | ON  | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
 | BUILD_CUML_TESTS | [ON, OFF]  | ON  |  Enable/disable building cuML algorithm test executable `ml_test`.  |
-| BUILD_CUML_MG_TESTS | [ON, OFF]  | ON  |  Enable/disable building cuML algorithm test executable `ml_mg_test`. Requires MPI to be installed. When enabled, BUILD_CUML_MPI_COMMS will be automatically set to ON. |
+| BUILD_CUML_MG_TESTS | [ON, OFF]  | ON  |  Enable/disable building cuML algorithm test executable `ml_mg_test`. Requires MPI to be installed. When enabled, BUILD_CUML_MPI_COMMS will be automatically set to ON. See section about additional requirements.|
 | BUILD_PRIMS_TESTS | [ON, OFF]  | ON  | Enable/disable building cuML algorithm test executable `prims_test`.  |
 | BUILD_CUML_EXAMPLES | [ON, OFF]  | ON  | Enable/disable building cuML C++ API usage examples.  |
 | BUILD_CUML_BENCH | [ON, OFF]  | ON  | Enable/disable building of cuML C++ benchark. |
@@ -66,6 +65,13 @@ $ cmake --build . -j --target  ml_mg        # Build ml_mg_test multi GPU algorit
 $ cmake --build . -j --target  prims        # Build prims_test ML primitive unit tests binary
 ```
 
+### MultiGPU Tests Requirements Note:
+
+To build the MultiGPU tests (CMake option `BUILD_CUML_MG_TESTS`), the following dependencies are required:
+
+- MPI (OpenMPI recommended)
+- NCCL, version corresponding to [RAFT's requirement](https://github.com/rapidsai/raft/blob/branch-23.02/conda/recipes/raft-dask/meta.yaml#L49.
+
 ### Third Party Modules
 
 The external folder contains submodules that cuML depends on.

@@ -38,6 +38,11 @@ function(find_and_configure_raft)
       string(APPEND RAFT_COMPONENTS " nn")
     endif()
 
+    # We need RAFT::distributed for MG tests
+    if(BUILD_CUML_MG_TESTS)
+      string(APPEND RAFT_COMPONENTS " distributed")
+    endif()
+
     if(PKG_USE_RAFT_DIST AND PKG_USE_RAFT_NN)
       set(RAFT_COMPILE_LIBRARIES ON)
     else()

@@ -16,7 +16,7 @@
 
 function(ConfigureTest)
 
-  set(options OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
+  set(options OPTIONAL CUMLPRIMS MPI ML_INCLUDE RAFT_DISTRIBUTED)
   set(oneValueArgs PREFIX NAME PATH)
   set(multiValueArgs TARGETS CONFIGURATIONS)
 
@@ -50,9 +50,9 @@ function(ConfigureTest)
     GTest::gtest_main
     ${OpenMP_CXX_LIB_NAMES}
     Threads::Threads
-    $<$<BOOL:${ConfigureTest_NCCL}>:NCCL::NCCL>
     $<$<BOOL:${ConfigureTest_CUMLPRIMS}>:cumlprims_mg::cumlprims_mg>
     $<$<BOOL:${ConfigureTest_MPI}>:${MPI_CXX_LIBRARIES}>
+    $<$<BOOL:${ConfigureTest_RAFT_DISTANCE}>:raft::distributed>
     ${TREELITE_LIBS}
     $<TARGET_NAME_IF_EXISTS:conda_env>
   )
@@ -197,16 +197,18 @@ endif()
 
 if(BUILD_CUML_MG_TESTS)
 
-  ConfigureTest(PREFIX MG NAME KMEANS_TEST PATH mg/kmeans_test.cu OPTIONAL NCCL CUMLPRIMS ML_INCLUDE)
+  # This test needs to be rewritten to use the MPI comms, not the std comms, and moved
+  # to RAFT: https://github.com/rapidsai/cuml/issues/5058
+  #ConfigureTest(PREFIX MG NAME KMEANS_TEST PATH mg/kmeans_test.cu OPTIONAL NCCL CUMLPRIMS ML_INCLUDE)
   if(MPI_CXX_FOUND)
     # (please keep the filenames in alphabetical order)
-    ConfigureTest(PREFIX MG NAME KNN_TEST PATH mg/knn.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
-    ConfigureTest(PREFIX MG NAME KNN_CLASSIFY_TEST PATH mg/knn_classify.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
-    ConfigureTest(PREFIX MG NAME KNN_REGRESS_TEST PATH mg/knn_regress.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
-    ConfigureTest(PREFIX MG NAME MAIN_TEST PATH mg/main.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
-    ConfigureTest(PREFIX MG NAME PCA_TEST PATH mg/pca.cu OPTIONAL NCCL CUMLPRIMS MPI ML_INCLUDE)
+    ConfigureTest(PREFIX MG NAME KNN_TEST PATH mg/knn.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
+    ConfigureTest(PREFIX MG NAME KNN_CLASSIFY_TEST PATH mg/knn_classify.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
+    ConfigureTest(PREFIX MG NAME KNN_REGRESS_TEST PATH mg/knn_regress.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
+    ConfigureTest(PREFIX MG NAME MAIN_TEST PATH mg/main.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
+    ConfigureTest(PREFIX MG NAME PCA_TEST PATH mg/pca.cu OPTIONAL CUMLPRIMS MPI RAFT_DISTRIBUTED ML_INCLUDE)
   else(MPI_CXX_FOUND)
-   message("OpenMPI not found. Skipping test '${CUML_MG_TEST_TARGET}'")
+   message("OpenMPI not found. Skipping MultiGPU tests '${CUML_MG_TEST_TARGET}'")
   endif()
 endif()
 

@@ -102,7 +102,6 @@ dependencies:
       - output_types: conda
         packages:
           - cudf=23.02.*
-          - nccl>=2.9.9
           - raft-dask=23.02.*
           - pylibraft=23.02.*
   py_run:
@@ -117,9 +116,6 @@ dependencies:
           - cupy>=7.8.0,<12.0.0a0
           - dask-cuda=23.02.*
           - dask-cudf=23.02.*
-          - ucx>=1.13.0
-          - ucx-py=0.29.*
-          - ucx-proc=*=gpu
     specific:
       - output_types: requirements
         matrices: