Skip to content

Commit

Permalink
SST: Add UCX SST Dataplane
Browse files Browse the repository at this point in the history
This commit implements a UCX SST dataplane.

The UCX dataplane was added without support for PRELOAD functionality as discussed previously.
Currently this commit disables PRELOAD functionality by commenting out the portion of the code. We need to decide how we want to ensure that PRELOAD
is disabled when using the UCX dataplane.

Another open topic is the priority of the UCX dataplane (compared to other DPs), if others are available. Currently we are hardcoding the priority to a value of 10.

The dataplane was tested with UCX v1.11 and openmpi v4.1.4 (compiled with UCX support) with the following cases:
  1. SST ctests.
      99% tests passed, 1 tests failed out of 168
          The following tests FAILED:
          1056 - Engine.Staging.TestOnDemandMPI.ADIOS2OnDemandMPI.Sst.MPI (Timeout)
  2. Gray Scott from ADIOS2-EXAMPLES

Signed-off-by: Sameeh Jubran <sameeh.j@gmail.com>
Signed-off-by: Michael Laufer <mlaufer103@gmail.com>
Signed-off-by: Erick Fredj <efredj@toganetworks.com>
Signed-off-by: Mark Wasserman <mark.wasserman@toganetworks.com>
  • Loading branch information
sameehj committed Dec 21, 2022
1 parent c2dd562 commit be1a375
Show file tree
Hide file tree
Showing 11 changed files with 938 additions and 12 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ foreach(opt IN LISTS ADIOS2_CONFIG_OPTS)
endif()
endforeach()

if (ADIOS2_HAVE_SST AND ADIOS2_SST_HAVE_LIBFABRIC)
if (ADIOS2_HAVE_SST AND (ADIOS2_SST_HAVE_LIBFABRIC OR ADIOS2_SST_HAVE_UCX))
message(" RDMA Transport for Staging: Available")
else()
message(" RDMA Transport for Staging: Unconfigured")
Expand Down
4 changes: 4 additions & 0 deletions cmake/DetectOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ if(ADIOS2_USE_SST AND NOT WIN32)
if(ADIOS2_HAVE_MPI)
set(ADIOS2_SST_HAVE_MPI TRUE)
endif()
find_package(UCX)
if(UCX_FOUND)
set(ADIOS2_SST_HAVE_UCX TRUE)
endif()
endif()

# DAOS
Expand Down
79 changes: 79 additions & 0 deletions cmake/FindUCX.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#------------------------------------------------------------------------------#
# Distributed under the OSI-approved Apache License, Version 2.0. See
# accompanying file Copyright.txt for details.
#------------------------------------------------------------------------------#
#
# FindUCX
# -----------
#
# Try to find the UCX library
#
# This module defines the following variables:
#
# UCX_FOUND - System has UCX
# UCX_INCLUDE_DIRS - The UCX include directories
# UCX_LIBRARIES - The libraries needed to use UCX
#
# and the following imported targets:
# ucx::ucx - The UCX library target
#
# You can also set the following variable to help guide the search:
# UCX_ROOT - The install prefix for UCX containing the
# include and lib folders
# Note: this can be set as a CMake variable or an
# environment variable. If specified as a CMake
# variable, it will override any setting specified
# as an environment variable.

# This is a bit of a wierd pattern but it allows to bypass pkg-config and
# manually specify library information
if(NOT (PC_UCX_FOUND STREQUAL "IGNORE"))
find_package(PkgConfig)
if(PKG_CONFIG_FOUND)
set(_UCX_CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH})
if(UCX_ROOT)
list(INSERT CMAKE_PREFIX_PATH 0 "${UCX_ROOT}")
elseif(NOT ENV{UCX_ROOT} STREQUAL "")
list(INSERT CMAKE_PREFIX_PATH 0 "$ENV{UCX_ROOT}")
endif()
set(PKG_CONFIG_USE_UCX_CMAKE_PREFIX_PATH ON)

pkg_check_modules(PC_UCX ucx)

set(CMAKE_PREFIX_PATH ${_UCX_CMAKE_PREFIX_PATH})
unset(_UCX_CMAKE_PREFIX_PATH)

if(PC_UCX_FOUND)
if(BUILD_SHARED_LIBS)
set(_PC_TYPE)
else()
set(_PC_TYPE _STATIC)
endif()
set(UCX_INCLUDE_DIRS ${PC_UCX${_PC_TYPE}_INCLUDE_DIRS})
set(UCX_LIBRARIES ${PC_UCX${_PC_TYPE}_LINK_LIBRARIES})
endif()
endif()
endif()

include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set LIBXML2_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(UCX DEFAULT_MSG UCX_LIBRARIES)

if(UCX_FOUND)
message("Found UCX: ")
if(NOT TARGET ucx::ucx)
add_library(ucx::ucx INTERFACE IMPORTED)
if(UCX_INCLUDE_DIRS)
set_target_properties(ucx::ucx PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${UCX_INCLUDE_DIRS}"
)
message("'${UCX_INCLUDE_DIRS}'")
endif()
if(UCX_LIBRARIES)
set_target_properties(ucx::ucx PROPERTIES
INTERFACE_LINK_LIBRARIES "${UCX_LIBRARIES}"
)
endif()
endif()
endif()
5 changes: 5 additions & 0 deletions cmake/adios2-config-common.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,11 @@ if(NOT @BUILD_SHARED_LIBS@)
find_dependency(LIBFABRIC)
endif()

set(ADIOS2_SST_HAVE_UCX @ADIOS2_SST_HAVE_UCX@)
if(ADIOS2_SST_HAVE_UCX)
find_dependency(UCX)
endif()

find_dependency(EVPath)
add_library(adios2::thirdparty::EVPath INTERFACE IMPORTED)
set_target_properties(adios2::thirdparty::EVPath PROPERTIES
Expand Down
10 changes: 5 additions & 5 deletions docs/user_guide/source/engines/sst.rst
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ the underlying network communication mechanism to use for exchanging
data in SST. Generally this is chosen by SST based upon what is
available on the current platform. However, specifying this engine
parameter allows overriding SST's choice. Current allowed values are
**"MPI"**, **"RDMA"**, and **"WAN"**. (**ib** and **fabric** are accepted as
**"UCX"**,**"MPI"**, **"RDMA"**, and **"WAN"**. (**ib** and **fabric** are accepted as
equivalent to **RDMA** and **evpath** is equivalent to **WAN**.)
Generally both the reader and writer should be using the same network
transport, and the network transport chosen may be dictated by the
Expand Down Expand Up @@ -280,15 +280,15 @@ single reader, but only upon request (with a request being initiated
by the reader doing BeginStep()). Normal reader-side rules (like
BeginStep timeouts) and writer-side rules (like queue limit behavior) apply.

============================= ===================== ================================================
============================= ===================== ====================================================
**Key** **Value Format** **Default** and Examples
============================= ===================== ================================================
============================= ===================== ====================================================
RendezvousReaderCount integer **1**
RegistrationMethod string **File**, Screen
QueueLimit integer **0** (no queue limits)
QueueFullPolicy string **Block**, Discard
ReserveQueueLimit integer **0** (no queue limits)
DataTransport string **default varies by platform**, MPI, RDMA, WAN
DataTransport string **default varies by platform**, UCX, MPI, RDMA, WAN
WANDataTransport string **sockets**, enet, ib
ControlTransport string **TCP**, Scalable
NetworkInterface string **NULL**
Expand All @@ -299,4 +299,4 @@ BeginStep timeouts) and writer-side rules (like queue limit behavior) apply.
OpenTimeoutSecs integer **60**
SpeculativePreloadMode string **AUTO**, ON, OFF
SpecAutoNodeThreshold integer **1**
============================= ===================== ================================================
============================= ===================== =====================================================
2 changes: 1 addition & 1 deletion docs/user_guide/source/setting_up/source/cmake.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ VAR VALUE Description
``ADIOS2_USE_HDF5`` **ON**/OFF `HDF5 <https://www.hdfgroup.org>`_ engine. If HDF5 is not on the syspath, it can be set using ``-DHDF5_ROOT=/path/to/hdf5``
``ADIOS2_USE_Python`` **ON**/OFF Python bindings. Python 3 will be used if found. If you want to specify a particular python version use ``-DPYTHON_EXECUTABLE=/path/to/interpreter/python``
``ADIOS2_USE_Fortran`` **ON**/OFF Bindings for Fortran 90 or above.
``ADIOS2_USE_SST`` **ON**/OFF Simplified Staging Engine (SST) and its dependencies, requires MPI. Can optionally use LibFabric for RDMA transport. Specify the LibFabric install manually with the -DLIBFABRIC_ROOT=... option.
``ADIOS2_USE_SST`` **ON**/OFF Simplified Staging Engine (SST) and its dependencies, requires MPI. Can optionally use LibFabric/UCX for RDMA transport. You can specify the LibFabric/UCX path manually with the -DLIBFABRIC_ROOT=... or -DUCX_ROOT=... option.
``ADIOS2_USE_BZip2`` **ON**/OFF `BZIP2 <http://www.bzip.org>`_ compression.
``ADIOS2_USE_ZFP`` **ON**/OFF `ZFP <https://github.com/LLNL/zfp>`_ compression (experimental).
``ADIOS2_USE_SZ`` **ON**/OFF `SZ <https://github.com/disheng222/SZ>`_ compression (experimental).
Expand Down
8 changes: 8 additions & 0 deletions source/adios2/toolkit/sst/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ if(ADIOS2_SST_HAVE_LIBFABRIC)
endif()
endif()

if(ADIOS2_SST_HAVE_UCX)
target_sources(sst PRIVATE dp/ucx_dp.c)
target_link_libraries(sst
PRIVATE ucx::ucx)
set(CMAKE_REQUIRED_INCLUDES ${UCX_INCLUDE_DIRS})
endif()

if(ADIOS2_HAVE_DAOS)
target_sources(sst PRIVATE dp/daos_dp.c)
target_link_libraries(sst PRIVATE DAOS::DAOS)
Expand Down Expand Up @@ -56,6 +63,7 @@ set_target_properties(sst PROPERTIES
#------------------------------------------------------------------------------#
set(SST_CONFIG_OPTS
LIBFABRIC
UCX
FI_GNI
CRAY_DRC
NVStream
Expand Down
4 changes: 4 additions & 0 deletions source/adios2/toolkit/sst/cp/cp_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ void CP_validateParams(SstStream Stream, SstParams Params, int Writer)
{
Params->DataTransport = strdup("rdma");
}
else if (strcmp(SelectedTransport, "ucx") == 0)
{
Params->DataTransport = strdup("ucx");
}
else
{
Params->DataTransport = strdup(SelectedTransport);
Expand Down
18 changes: 13 additions & 5 deletions source/adios2/toolkit/sst/cp/cp_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -872,11 +872,19 @@ WS_ReaderInfo WriterParticipateInReaderOpen(SstStream Stream)
CP_WSR_Stream->ReaderStatus = Opening;
if (ReturnData->SpecPreload == SpecPreloadOn)
{

CP_WSR_Stream->PreloadMode = SstPreloadSpeculative;
CP_WSR_Stream->PreloadModeActiveTimestep = 0;
CP_verbose(Stream, PerStepVerbose,
"Setting SpeculativePreload ON for new reader\n");
if (strcmp(Stream->DP_Interface->DPName, "ucx") != 0)
{
CP_WSR_Stream->PreloadMode = SstPreloadSpeculative;
CP_WSR_Stream->PreloadModeActiveTimestep = 0;
CP_verbose(Stream, PerStepVerbose,
"Setting SpeculativePreload ON for new reader\n");
}
else
{
CP_verbose(Stream, CriticalVerbose,
"Cannot run ucx dataplane with Speculative Preload on, forcing Speculative Preload Off\n");
ReturnData->SpecPreload = SpecPreloadOff;
}
}

int MySuccess = initWSReader(CP_WSR_Stream, ReturnData->ReaderCohortSize,
Expand Down
8 changes: 8 additions & 0 deletions source/adios2/toolkit/sst/dp/dp.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#ifdef SST_HAVE_LIBFABRIC
extern CP_DP_Interface LoadRdmaDP();
#endif /* SST_HAVE_LIBFABRIC */
#ifdef SST_HAVE_UCX
extern CP_DP_Interface LoadUcxDP();
#endif /* SST_HAVE_UCX */
#ifdef SST_HAVE_DAOS
extern CP_DP_Interface LoadDaosDP();
#endif /* SST_HAVE_LIBFABRIC */
Expand Down Expand Up @@ -66,6 +69,11 @@ CP_DP_Interface SelectDP(CP_Services Svcs, void *CP_Stream,
AddDPPossibility(Svcs, CP_Stream, List, LoadRdmaDP(), "rdma", Params);
#endif /* SST_HAVE_LIBFABRIC */

#ifdef SST_HAVE_UCX
List =
AddDPPossibility(Svcs, CP_Stream, List, LoadUcxDP(), "ucx", Params);
#endif /* SST_HAVE_UCX */

#ifdef SST_HAVE_DAOS
List =
AddDPPossibility(Svcs, CP_Stream, List, LoadDaosDP(), "daos", Params);
Expand Down
Loading

0 comments on commit be1a375

Please sign in to comment.