Skip to content

Commit

Permalink
Enable detection of undesired stream usage (#12089)
Browse files Browse the repository at this point in the history
This PR builds on #11875 and partially addresses #11943. This PR allows us to run all tests on a precise stream (the newly introduced `cudf::test::get_default_stream()`) and then verify that all CUDA APIs end up invoked on that stream. This implements the feature required in #11943, but to apply it universally across libcudf will require the API changes that will expose streams so I plan to make those changes incrementally after this PR is merged.

The preload library is now compiled twice, once to overload `cudf::get_default_stream` and once to overload `cudf::test::get_default_stream`. For now there is still some manual coordination associated with determining which one should be used with a given test, but once #12451 is merged and we start running all tests via ctest instead of direct invocation of the test executables we can start encoding this information in the CMake configuration of the tests by associating the require environment variables directly with the test executable using `set_tests_properties`.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Ray Douglass (https://github.com/raydouglass)
  - Nghia Truong (https://github.com/ttnghia)

URL: #12089
  • Loading branch information
vyasr authored Mar 14, 2023
1 parent 3584739 commit 55ed347
Show file tree
Hide file tree
Showing 10 changed files with 264 additions and 85 deletions.
9 changes: 5 additions & 4 deletions ci/test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ trap "EXITCODE=1" ERR
set +e

# Get library for finding incorrect default stream usage.
STREAM_IDENTIFY_LIB="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage.so"
STREAM_IDENTIFY_LIB_MODE_CUDF="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage_mode_cudf.so"
STREAM_IDENTIFY_LIB_MODE_TESTING="${CONDA_PREFIX}/lib/libcudf_identify_stream_usage_mode_testing.so"

echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB}"
echo "STREAM_IDENTIFY_LIB=${STREAM_IDENTIFY_LIB_MODE_CUDF}"

# Run libcudf and libcudf_kafka gtests from libcudf-tests package
rapids-logger "Run gtests"
Expand All @@ -31,10 +32,10 @@ for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
# This one test is specifically designed to test using a thrust device
# vector, so we expect and allow it to include default stream usage.
gtest_filter="SpanTest.CanConstructFromDeviceContainers"
GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
GTEST_CUDF_STREAM_MODE="new_cudf_default" LD_PRELOAD=${STREAM_IDENTIFY_LIB_MODE_CUDF} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="-${gtest_filter}" && \
${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} --gtest_filter="${gtest_filter}"
else
GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
GTEST_CUDF_STREAM_MODE="new_cudf_default" LD_PRELOAD=${STREAM_IDENTIFY_LIB_MODE_CUDF} ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR}
fi
done

Expand Down
7 changes: 5 additions & 2 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ outputs:
test:
commands:
- test -f $PREFIX/lib/libcudf.so
- test -f $PREFIX/lib/libcudftestutil.a
- test -f $PREFIX/lib/libcudftestutil.so
- test -f $PREFIX/lib/libcudf_identify_stream_usage_mode_cudf.so
- test -f $PREFIX/lib/libcudf_identify_stream_usage_mode_testing.so
- test -f $PREFIX/include/cudf/aggregation.hpp
- test -f $PREFIX/include/cudf/ast/detail/expression_parser.hpp
- test -f $PREFIX/include/cudf/ast/detail/operators.hpp
Expand Down Expand Up @@ -294,11 +296,12 @@ outputs:
- test -f $PREFIX/include/cudf_test/column_wrapper.hpp
- test -f $PREFIX/include/cudf_test/cudf_gtest.hpp
- test -f $PREFIX/include/cudf_test/cxxopts.hpp
- test -f $PREFIX/include/cudf_test/default_stream.hpp
- test -f $PREFIX/include/cudf_test/detail/column_utilities.hpp
- test -f $PREFIX/include/cudf_test/file_utilities.hpp
- test -f $PREFIX/include/cudf_test/io_metadata_utilities.hpp
- test -f $PREFIX/include/cudf_test/iterator_utilities.hpp
- test -f $PREFIX/include/cudf_test/stream_checking_resource_adapter.hpp
- test -f $PREFIX/include/cudf_test/stream_checking_resource_adaptor.hpp
- test -f $PREFIX/include/cudf_test/table_utilities.hpp
- test -f $PREFIX/include/cudf_test/timestamp_utilities.cuh
- test -f $PREFIX/include/cudf_test/type_list_utilities.hpp
Expand Down
41 changes: 28 additions & 13 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -740,10 +740,15 @@ add_library(cudf::cudf ALIAS cudf)

if(CUDF_BUILD_TESTUTIL)
add_library(
cudftestutil STATIC
# This library must also be compiled as a dynamic library to support
# LD_PRELOAD injection of symbols. We currently leverage this for
# stream-related library validation and may make use of it for other
# similar features in the future.
cudftestutil SHARED
tests/io/metadata_utilities.cpp
tests/utilities/base_fixture.cpp
tests/utilities/column_utilities.cu
tests/utilities/default_stream.cpp
tests/utilities/table_utilities.cu
tests/utilities/tdigest_utilities.cu
)
Expand Down Expand Up @@ -790,18 +795,27 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
)
endif()

# Libraries for stream-related testing.
add_library(cudf_identify_stream_usage SHARED tests/utilities/identify_stream_usage.cpp)
# Libraries for stream-related testing. We build the library twice, one with STREAM_MODE_TESTING
# on and one with it set to off. Each test will then be configured to use the appropriate library
# depending via ctest and whether it has been updated to expose public stream APIs.
foreach(_mode cudf testing)
set(_tgt "cudf_identify_stream_usage_mode_${_mode}")
add_library(${_tgt} SHARED tests/utilities/identify_stream_usage.cpp)

set_target_properties(
${_tgt}
PROPERTIES # set target compile options
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON
)
target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm)
add_library(cudf::${_tgt} ALIAS ${_tgt})

set_target_properties(
cudf_identify_stream_usage
PROPERTIES # set target compile options
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON
)
target_link_libraries(cudf_identify_stream_usage PUBLIC CUDA::cudart rmm::rmm)
add_library(cudf::cudf_identify_stream_usage ALIAS cudf_identify_stream_usage)
if("${_mode}" STREQUAL "testing")
target_compile_definitions(${_tgt} PUBLIC STREAM_MODE_TESTING)
endif()
endforeach()
endif()

# ##################################################################################################
Expand Down Expand Up @@ -877,7 +891,8 @@ if(CUDF_BUILD_TESTUTIL)
endif()

if(CUDF_BUILD_STREAMS_TEST_UTIL)
install(TARGETS cudf_identify_stream_usage DESTINATION ${lib_dir})
install(TARGETS cudf_identify_stream_usage_mode_cudf DESTINATION ${lib_dir})
install(TARGETS cudf_identify_stream_usage_mode_testing DESTINATION ${lib_dir})
endif()

set(doc_string
Expand Down
63 changes: 42 additions & 21 deletions cpp/include/cudf_test/base_fixture.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,8 +23,9 @@
#include <cudf/utilities/traits.hpp>
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/cxxopts.hpp>
#include <cudf_test/default_stream.hpp>
#include <cudf_test/file_utilities.hpp>
#include <cudf_test/stream_checking_resource_adapter.hpp>
#include <cudf_test/stream_checking_resource_adaptor.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/arena_memory_resource.hpp>
Expand Down Expand Up @@ -308,16 +309,33 @@ inline auto parse_cudf_test_opts(int argc, char** argv)
const char* env_rmm_mode = std::getenv("GTEST_CUDF_RMM_MODE"); // Overridden by CLI options
const char* env_stream_mode =
std::getenv("GTEST_CUDF_STREAM_MODE"); // Overridden by CLI options
auto default_rmm_mode = env_rmm_mode ? env_rmm_mode : "pool";
auto default_stream_mode = env_stream_mode ? env_stream_mode : "default";
const char* env_stream_error_mode =
std::getenv("GTEST_CUDF_STREAM_ERROR_MODE"); // Overridden by CLI options
auto default_rmm_mode = env_rmm_mode ? env_rmm_mode : "pool";
auto default_stream_mode = env_stream_mode ? env_stream_mode : "default";
auto default_stream_error_mode = env_stream_error_mode ? env_stream_error_mode : "error";
options.allow_unrecognised_options().add_options()(
"rmm_mode",
"RMM allocation mode",
cxxopts::value<std::string>()->default_value(default_rmm_mode));
// `new_cudf_default` means that cudf::get_default_stream has been patched,
// so we raise errors anywhere that a CUDA default stream is observed
// instead of cudf::get_default_stream(). This corresponds to compiling
// identify_stream_usage with STREAM_MODE_TESTING=OFF (must do both at the
// same time).
// `new_testing_default` means that cudf::test::get_default_stream has been
// patched, so we raise errors anywhere that _any_ other stream is
// observed. This corresponds to compiling identify_stream_usage with
// STREAM_MODE_TESTING=ON (must do both at the same time).
options.allow_unrecognised_options().add_options()(
"stream_mode",
"Whether to use a non-default stream",
cxxopts::value<std::string>()->default_value(default_stream_mode));
options.allow_unrecognised_options().add_options()(
"stream_error_mode",
"Whether to error or print to stdout when a non-default stream is observed and stream_mode "
"is not \"default\"",
cxxopts::value<std::string>()->default_value(default_stream_error_mode));
return options.parse(argc, argv);
} catch (const cxxopts::OptionException& e) {
CUDF_FAIL("Error parsing command line options");
Expand All @@ -334,21 +352,24 @@ inline auto parse_cudf_test_opts(int argc, char** argv)
* function parses the command line to customize test behavior, like the
* allocation mode used for creating the default memory resource.
*/
#define CUDF_TEST_PROGRAM_MAIN() \
int main(int argc, char** argv) \
{ \
::testing::InitGoogleTest(&argc, argv); \
auto const cmd_opts = parse_cudf_test_opts(argc, argv); \
auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>(); \
auto resource = cudf::test::create_memory_resource(rmm_mode); \
rmm::mr::set_current_device_resource(resource.get()); \
\
auto const stream_mode = cmd_opts["stream_mode"].as<std::string>(); \
rmm::cuda_stream const new_default_stream{}; \
if (stream_mode == "custom") { \
auto adapter = make_stream_checking_resource_adaptor(resource.get()); \
rmm::mr::set_current_device_resource(&adapter); \
} \
\
return RUN_ALL_TESTS(); \
#define CUDF_TEST_PROGRAM_MAIN() \
int main(int argc, char** argv) \
{ \
::testing::InitGoogleTest(&argc, argv); \
auto const cmd_opts = parse_cudf_test_opts(argc, argv); \
auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>(); \
auto resource = cudf::test::create_memory_resource(rmm_mode); \
rmm::mr::set_current_device_resource(resource.get()); \
\
auto const stream_mode = cmd_opts["stream_mode"].as<std::string>(); \
if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { \
auto const stream_error_mode = cmd_opts["stream_error_mode"].as<std::string>(); \
auto const error_on_invalid_stream = (stream_error_mode == "error"); \
auto const check_default_stream = (stream_mode == "new_cudf_default"); \
auto adaptor = make_stream_checking_resource_adaptor( \
resource.get(), error_on_invalid_stream, check_default_stream); \
rmm::mr::set_current_device_resource(&adaptor); \
} \
\
return RUN_ALL_TESTS(); \
}
41 changes: 41 additions & 0 deletions cpp/include/cudf_test/default_stream.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace test {

/**
* @brief Get the default stream to use for tests.
*
* The standard behavior of this function is to return cudf's default stream
* (cudf::get_default_stream). This function is primarily provided as an
* overload target for preload libraries (via LD_PRELOAD) so that the default
* stream used for tests may be modified for tracking purposes. All tests of
* public APIs that accept streams should pass `cudf::test::get_default_stream`
* as the stream argument so that a preload library changing the behavior of
* this function will trigger those tests to run on a different stream than
* `cudf::get_default_stream`.
*
* @return The default stream to use for tests.
*/
rmm::cuda_stream_view const get_default_stream();

} // namespace test
} // namespace cudf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,8 @@
*/
#pragma once

#include <cudf_test/default_stream.hpp>

#include <rmm/mr/device/device_memory_resource.hpp>

/**
Expand All @@ -33,7 +35,12 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res
*
* @param upstream The resource used for allocating/deallocating device memory
*/
stream_checking_resource_adaptor(Upstream* upstream) : upstream_{upstream}
stream_checking_resource_adaptor(Upstream* upstream,
bool error_on_invalid_stream,
bool check_default_stream)
: upstream_{upstream},
error_on_invalid_stream_{error_on_invalid_stream},
check_default_stream_{check_default_stream}
{
CUDF_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
}
Expand Down Expand Up @@ -87,7 +94,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
{
verify_non_default_stream(stream);
verify_stream(stream);
return upstream_->allocate(bytes, stream);
}

Expand All @@ -102,7 +109,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res
*/
void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override
{
verify_non_default_stream(stream);
verify_stream(stream);
upstream_->deallocate(ptr, bytes, stream);
}

Expand Down Expand Up @@ -131,25 +138,44 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res
*/
std::pair<std::size_t, std::size_t> do_get_mem_info(rmm::cuda_stream_view stream) const override
{
verify_non_default_stream(stream);
verify_stream(stream);
return upstream_->get_mem_info(stream);
}

/**
* @brief Throw an error if given one of CUDA's default stream specifiers.
* @brief Throw an error if the provided stream is invalid.
*
* A stream is invalid if:
* - check_default_stream_ is true and this function is passed one of CUDA's
* default stream specifiers, or
* - check_default_stream_ is false and this function is passed any stream
* other than the result of cudf::test::get_default_stream().
*
* @throws `std::runtime_error` if provided a default stream
* @throws `std::runtime_error` if provided an invalid stream
*/
void verify_non_default_stream(rmm::cuda_stream_view const stream) const
void verify_stream(rmm::cuda_stream_view const stream) const
{
auto cstream{stream.value()};
if (cstream == cudaStreamDefault || (cstream == cudaStreamLegacy) ||
(cstream == cudaStreamPerThread)) {
throw std::runtime_error("Attempted to perform an operation on a default stream!");
auto const invalid_stream =
check_default_stream_ ? ((cstream == cudaStreamDefault) || (cstream == cudaStreamLegacy) ||
(cstream == cudaStreamPerThread))
: (cstream != cudf::test::get_default_stream().value());

if (invalid_stream) {
if (error_on_invalid_stream_) {
throw std::runtime_error("Attempted to perform an operation on an unexpected stream!");
} else {
std::cout << "Attempted to perform an operation on an unexpected stream!" << std::endl;
}
}
}

Upstream* upstream_; // the upstream resource used for satisfying allocation requests
Upstream* upstream_; // the upstream resource used for satisfying allocation requests
bool error_on_invalid_stream_; // If true, throw an exception when the wrong stream is detected.
// If false, simply print to stdout.
bool check_default_stream_; // If true, throw an exception when the default stream is observed.
// If false, throw an exception when anything other than
// cudf::test::get_default_stream() is observed.
};

/**
Expand All @@ -160,7 +186,9 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res
* @param upstream Pointer to the upstream resource
*/
template <typename Upstream>
stream_checking_resource_adaptor<Upstream> make_stream_checking_resource_adaptor(Upstream* upstream)
stream_checking_resource_adaptor<Upstream> make_stream_checking_resource_adaptor(
Upstream* upstream, bool error_on_invalid_stream, bool check_default_stream)
{
return stream_checking_resource_adaptor<Upstream>{upstream};
return stream_checking_resource_adaptor<Upstream>{
upstream, error_on_invalid_stream, check_default_stream};
}
4 changes: 2 additions & 2 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,8 @@ ConfigureTest(
# tests by manually invoking the executable, so we'll have to manually pass this environment
# variable in that setup.
set_tests_properties(
STREAM_IDENTIFICATION_TEST PROPERTIES ENVIRONMENT
LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage>
STREAM_IDENTIFICATION_TEST
PROPERTIES ENVIRONMENT LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage_mode_cudf>
)

# ##################################################################################################
Expand Down
Loading

0 comments on commit 55ed347

Please sign in to comment.