From b0bce46c9e6f145b5a5e4423a63e49191f56d744 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 22 Feb 2021 17:22:30 +0100 Subject: [PATCH 1/5] Switch tests over to compute-sanitizer. --- .buildkite/pipeline.yml | 4 +-- Artifacts.toml | 57 ++++++++++++++++++----------------- deps/bindeps.jl | 17 +++++------ test/codegen.jl | 8 ++--- test/cudadrv/module.jl | 6 ++-- test/cudadrv/pool.jl | 2 +- test/cutensor/contractions.jl | 5 +++ test/examples.jl | 6 ++++ test/exceptions.jl | 6 ++++ test/execution.jl | 8 ++--- test/initialization.jl | 2 +- test/pool.jl | 2 +- test/runtests.jl | 27 ++++++++--------- test/setup.jl | 8 ++--- 14 files changed, 87 insertions(+), 71 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 39a277a44d..6c57c79913 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -27,7 +27,7 @@ steps: version: 1.6-nightly - JuliaCI/julia-test#v1: julia_args: "-g2" - test_args: "--memcheck" + test_args: "--sanitize" - JuliaCI/julia-coverage#v1: codecov: true dirs: @@ -38,8 +38,6 @@ steps: queue: "juliagpu" cuda: "11.2" # older versions of CUDA have issues cap: "recent" # test as much as possible - env: - JULIA_CUDA_MEMORY_POOL: 'none' # CUDA's memory pool requires compute-sanitizer if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 120 diff --git a/Artifacts.toml b/Artifacts.toml index d518be731a..dce3ddc503 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -85,99 +85,102 @@ lazy = true [[CUDA110]] arch = "powerpc64le" -git-tree-sha1 = "b22672705ca4f00c784a3f9d58619408d4af9de0" +git-tree-sha1 = "25a70e995c5457a9b3c7dd7ff8a62d14acc2abc5" libc = "glibc" os = "linux" lazy = true [[CUDA110.download]] - sha256 = "e86a67aa8b1b2cd73d78572401efa75f9bb26f6a259f12d0471c64b74fbe204f" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+3/CUDA.v11.0.3.powerpc64le-linux-gnu.tar.gz" + sha256 = "120ee6f20fc3c3c59611cf3c5b1584ed14658bb5d1bf9fd1b25a14182247d262" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+4/CUDA.v11.0.3.powerpc64le-linux-gnu.tar.gz" [[CUDA110]] arch = "x86_64" -git-tree-sha1 = "6b1a60793e5e98abdcfc3724cfa22b2a5348dc09" +git-tree-sha1 = "74e3e04bdbf56ccf276cd8dd896ad07033846fae" libc = "glibc" os = "linux" lazy = true [[CUDA110.download]] - sha256 = "520e690529f67afe6aabdd8d18dc34d18acf5020cb3dc1fd4e904998d9e17aba" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+3/CUDA.v11.0.3.x86_64-linux-gnu.tar.gz" + sha256 = "291e84f0d598ecbcbe438b1d42022583d061ad5f4eece2b1c06d600332b0367e" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+4/CUDA.v11.0.3.x86_64-linux-gnu.tar.gz" [[CUDA110]] arch = "x86_64" -git-tree-sha1 = "2d09da4d71a0762750dee0861e28029d38b08d1e" +git-tree-sha1 = "1ab27f582deafbc99077d540a01141e620620177" os = "windows" lazy = true [[CUDA110.download]] - sha256 = "d11ca219e9b91725c6677f36b339459d149ffdcfa3f5e51928fb133158caa15a" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+3/CUDA.v11.0.3.x86_64-w64-mingw32.tar.gz" + sha256 = "0ea0100ee7fa6d67c8d63ea44e719d76f6f70ce1ab5f657d7c97f30fae173af5" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+4/CUDA.v11.0.3.x86_64-w64-mingw32.tar.gz" + [[CUDA111]] arch = "powerpc64le" -git-tree-sha1 = "44dba03dc848a148c9d2430354bf7e52e216364c" +git-tree-sha1 = "8837163c5563af77039b4a04a49b6e2c3f123ab4" libc = "glibc" os = "linux" lazy = true [[CUDA111.download]] - sha256 = "ac85a364080ea8b97e77fb83967046c54099f7c63769577fa39a1311b68add81" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+2/CUDA.v11.1.1.powerpc64le-linux-gnu.tar.gz" + sha256 = "847f43a4f68c2b08c6275c988ff7c7e5414ad477a625ac78f6e4970969fccc48" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+3/CUDA.v11.1.1.powerpc64le-linux-gnu.tar.gz" [[CUDA111]] arch = "x86_64" -git-tree-sha1 = "48c41dccb8db0c9aa9483267cb33719207abe4c1" +git-tree-sha1 = "4670dd02df5210bd53199f14ec9f8cc027d889e0" libc = "glibc" os = "linux" lazy = true [[CUDA111.download]] - sha256 = "b7242ce10b3fb06d886725209d5b19d565c15c7e244eb84b50262f281a04291c" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+2/CUDA.v11.1.1.x86_64-linux-gnu.tar.gz" + sha256 = "84a9574db7bfb0a59dd03ef1a85874d3f33a7686507d89312700f5c519307cba" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+3/CUDA.v11.1.1.x86_64-linux-gnu.tar.gz" [[CUDA111]] arch = "x86_64" -git-tree-sha1 = "ad4cf0816c2c327477c512f476649bfde7ada206" +git-tree-sha1 = "86505c4367204e1769e6341380841f7f589a2f4d" os = "windows" lazy = true [[CUDA111.download]] - sha256 = "026a92bcb8d7a5ff6f2e6e262ed8d8387164314941f0dc1b3228e383e04a60a0" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+2/CUDA.v11.1.1.x86_64-w64-mingw32.tar.gz" + sha256 = "a56db28c70e9736f9ea024f3afa7fdedf899b7c998808db7d8a368e0a1208ed9" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+3/CUDA.v11.1.1.x86_64-w64-mingw32.tar.gz" + [[CUDA112]] arch = "powerpc64le" -git-tree-sha1 = "3141108f3144f5170dacc12749a61c14101b42b5" +git-tree-sha1 = "ef3928da3f9b68a5213a93f91da0d27e32c01e50" libc = "glibc" os = "linux" lazy = true [[CUDA112.download]] - sha256 = "d7d6c399c77cabc75f1387869ca8bbef93cb6a745004993b34306e0b23d5bd18" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+0/CUDA.v11.2.1.powerpc64le-linux-gnu.tar.gz" + sha256 = "770235b69868b88e6db4efc30a8659e9708f3b432028e2032ba589cf2c3efaf8" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+1/CUDA.v11.2.1.powerpc64le-linux-gnu.tar.gz" [[CUDA112]] arch = "x86_64" -git-tree-sha1 = "43b02b66f55952515d3cc933404d027fb904cd8b" +git-tree-sha1 = "18f4e83091aec02d8229c2b009a45a5c22b47664" libc = "glibc" os = "linux" lazy = true [[CUDA112.download]] - sha256 = "70089c452bf923c4951048d336ac32ed28ee3672f8667bc7595fdc6190bf1990" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+0/CUDA.v11.2.1.x86_64-linux-gnu.tar.gz" + sha256 = "6da495c82fae19e0aae8691addc72829376547543324358f39e16835cb208e6e" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+1/CUDA.v11.2.1.x86_64-linux-gnu.tar.gz" [[CUDA112]] arch = "x86_64" -git-tree-sha1 = "8b7275b36a973e6345a76b2931ddf397228e34ca" +git-tree-sha1 = "4765905e93e1e93ca8d2eb52a1e8cec5de4627b1" os = "windows" lazy = true [[CUDA112.download]] - sha256 = "ed69a6b9630fc83e75856486fd157903c6e93e1d70e0fc7e6c67ca0dacea2b15" - url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+0/CUDA.v11.2.1.x86_64-w64-mingw32.tar.gz" + sha256 = "6dc0ae6aab8b878864bf926fd9446c71f92f689e6115d6dcedc54ac492d30ea3" + url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+1/CUDA.v11.2.1.x86_64-w64-mingw32.tar.gz" + # CUDNN diff --git a/deps/bindeps.jl b/deps/bindeps.jl index d1e5218aa6..3e4ffcc5dc 100644 --- a/deps/bindeps.jl +++ b/deps/bindeps.jl @@ -33,7 +33,7 @@ Returns the CUDA release part of the version as returned by [`version`](@ref). toolkit_release() = @after_init(VersionNumber(__toolkit_version[].major, __toolkit_version[].minor)) const __nvdisasm = Ref{String}() -const __memcheck = Ref{Union{Nothing,String}}() +const __compute_sanitizer = Ref{Union{Nothing,String}}() const __libdevice = Ref{String}() const __libcudadevrt = Ref{String}() const __libcupti = Ref{Union{Nothing,String}}() @@ -47,10 +47,10 @@ const __libcudnn = Ref{Union{Nothing,String}}(nothing) const __libcutensor = Ref{Union{Nothing,String}}(nothing) nvdisasm() = @after_init(__nvdisasm[]) -function memcheck() +function compute_sanitizer() @after_init begin - @assert has_memcheck() "This functionality is unavailabe as CUDA-MEMCHECK is missing." - __memcheck[] + @assert has_compute_sanitizer() "This functionality is unavailabe as compute-sanitizer is missing." + __compute_sanitizer[] end end libdevice() = @after_init(__libdevice[]) @@ -68,8 +68,8 @@ function libnvtx() end end -export has_memcheck, has_cupti, has_nvtx -has_memcheck() = @after_init(__memcheck[]) !== nothing +export has_compute_sanitizer, has_cupti, has_nvtx +has_compute_sanitizer() = @after_init(__compute_sanitizer[]) !== nothing has_cupti() = @after_init(__libcupti[]) !== nothing has_nvtx() = @after_init(__libnvtx[]) !== nothing @@ -173,8 +173,7 @@ function use_artifact_cuda() __nvdisasm[] = artifact_binary(artifact.dir, "nvdisasm") @assert isfile(__nvdisasm[]) - __memcheck[] = artifact_binary(artifact.dir, "cuda-memcheck") - @assert isfile(__memcheck[]) + __compute_sanitizer[] = artifact_binary(artifact.dir, "compute-sanitizer") __libcupti[] = artifact_cuda_library(artifact.dir, "cupti", artifact.version) @assert isfile(__libcupti[]) @@ -221,7 +220,7 @@ function use_local_cuda() __nvdisasm[] = path end - __memcheck[] = find_cuda_binary("cuda-memcheck", cuda_dirs) + __compute_sanitizer[] = find_cuda_binary("compute-sanitizer", cuda_dirs) cuda_version = parse_toolkit_version("nvdisasm", __nvdisasm[]) if cuda_version === nothing diff --git a/test/codegen.jl b/test/codegen.jl index 5f7d42daed..e0acbf142d 100644 --- a/test/codegen.jl +++ b/test/codegen.jl @@ -140,8 +140,8 @@ end valid_kernel() = return invalid_kernel() = 1 - @not_if_memcheck @test CUDA.code_sass(devnull, valid_kernel, Tuple{}) == nothing - @not_if_memcheck @test_throws CUDA.KernelError CUDA.code_sass(devnull, invalid_kernel, Tuple{}) + @not_if_sanitize @test CUDA.code_sass(devnull, valid_kernel, Tuple{}) == nothing + @not_if_sanitize @test_throws CUDA.KernelError CUDA.code_sass(devnull, invalid_kernel, Tuple{}) end @testset "function name mangling" begin @@ -149,13 +149,13 @@ end @eval kernel_341(ptr) = (@inbounds unsafe_store!(ptr, $(Symbol("dummy_^"))(unsafe_load(ptr))); nothing) - @not_if_memcheck CUDA.code_sass(devnull, kernel_341, Tuple{Ptr{Int}}) + @not_if_sanitize CUDA.code_sass(devnull, kernel_341, Tuple{Ptr{Int}}) end @testset "device runtime" begin kernel() = (CUDA.cudaGetLastError(); return) - @not_if_memcheck CUDA.code_sass(devnull, kernel, Tuple{}) + @not_if_sanitize CUDA.code_sass(devnull, kernel, Tuple{}) end end diff --git a/test/cudadrv/module.jl b/test/cudadrv/module.jl index 351555e6f8..def2d81fc9 100644 --- a/test/cudadrv/module.jl +++ b/test/cudadrv/module.jl @@ -24,7 +24,7 @@ let @test md != md2 end -@not_if_memcheck @test_throws_cuerror CUDA.ERROR_INVALID_IMAGE CuModule("foobar") +@not_if_sanitize @test_throws_cuerror CUDA.ERROR_INVALID_IMAGE CuModule("foobar") @testset "globals" begin md = CuModuleFile(joinpath(@__DIR__, "ptx/global.ptx")) @@ -54,11 +54,11 @@ end # TODO: test with valid object code # NOTE: apparently, on Windows cuLinkAddData! _does_ accept object data containing \0 if !Sys.iswindows() - @not_if_memcheck @test_throws_cuerror CUDA.ERROR_UNKNOWN add_data!(link, "vadd_parent", UInt8[0]) + @not_if_sanitize @test_throws_cuerror CUDA.ERROR_UNKNOWN add_data!(link, "vadd_parent", UInt8[0]) end end -@not_if_memcheck @testset "error log" begin +@not_if_sanitize @testset "error log" begin @test_throws_message contains("ptxas fatal") CuError CuModule(".version 3.1") link = CuLink() diff --git a/test/cudadrv/pool.jl b/test/cudadrv/pool.jl index c4594f9f61..d3c9c744d0 100644 --- a/test/cudadrv/pool.jl +++ b/test/cudadrv/pool.jl @@ -1,4 +1,4 @@ -@not_if_memcheck let +@not_if_sanitize let dev = device() pool = memory_pool(dev) diff --git a/test/cutensor/contractions.jl b/test/cutensor/contractions.jl index 84a7a200bb..6928b61ce3 100644 --- a/test/cutensor/contractions.jl +++ b/test/cutensor/contractions.jl @@ -2,6 +2,9 @@ using CUDA.CUTENSOR using CUDA using LinearAlgebra +# these tests perform a lot of harmless-but-invalid API calls, poluting sanitizer logs +@not_if_sanitize begin + eltypes = ( (Float32, Float32, Float32, Float32), (Float32, Float32, Float32, Float16), (ComplexF32, ComplexF32, ComplexF32, ComplexF32), @@ -196,3 +199,5 @@ can_pin = !Sys.iswindows() end end end + +end diff --git a/test/examples.jl b/test/examples.jl index d6902071e6..fa03f93a8a 100644 --- a/test/examples.jl +++ b/test/examples.jl @@ -1,3 +1,7 @@ +# NVIDIA bug 3263616: compute-sanitizer crashes when generating host backtraces, +# but --show-backtrace=no does not survive execve. +@not_if_sanitize begin + # these tests spawn subprocesses, so reset the current context to conserve memory CUDA.release() == v"11.2" || CUDA.device_reset!() @@ -28,3 +32,5 @@ cd(examples_dir) do @test success(pipeline(`$cmd $example`, stderr=stderr)) end end + +end diff --git a/test/exceptions.jl b/test/exceptions.jl index 1141879117..5f7aaad6ce 100644 --- a/test/exceptions.jl +++ b/test/exceptions.jl @@ -1,3 +1,7 @@ +# NVIDIA bug 3263616: compute-sanitizer crashes when generating host backtraces, +# but --show-backtrace=no does not survive execve. +@not_if_sanitize begin + # these tests spawn subprocesses, so reset the current context to conserve memory CUDA.release() == v"11.2" || CUDA.device_reset!() @@ -83,3 +87,5 @@ let (code, out, err) = julia_script(script, `-g2`) end end + +end diff --git a/test/execution.jl b/test/execution.jl index 96afc68509..26ed2e4e8f 100644 --- a/test/execution.jl +++ b/test/execution.jl @@ -36,7 +36,7 @@ end @testset "compilation params" begin @cuda dummy() - @not_if_memcheck @test_throws CuError @cuda threads=2 maxthreads=1 dummy() + @not_if_sanitize @test_throws CuError @cuda threads=2 maxthreads=1 dummy() @cuda threads=2 dummy() end @@ -58,14 +58,14 @@ end CUDA.code_warntype(devnull, dummy, Tuple{}) CUDA.code_llvm(devnull, dummy, Tuple{}) CUDA.code_ptx(devnull, dummy, Tuple{}) - @not_if_memcheck CUDA.code_sass(devnull, dummy, Tuple{}) + @not_if_sanitize CUDA.code_sass(devnull, dummy, Tuple{}) @device_code_lowered @cuda dummy() @device_code_typed @cuda dummy() @device_code_warntype io=devnull @cuda dummy() @device_code_llvm io=devnull @cuda dummy() @device_code_ptx io=devnull @cuda dummy() - @not_if_memcheck @device_code_sass io=devnull @cuda dummy() + @not_if_sanitize @device_code_sass io=devnull @cuda dummy() mktempdir() do dir @device_code dir=dir @cuda dummy() @@ -77,7 +77,7 @@ end @test occursin("julia_dummy", sprint(io->(@device_code_llvm io=io optimize=false @cuda dummy()))) @test occursin("julia_dummy", sprint(io->(@device_code_llvm io=io @cuda dummy()))) @test occursin("julia_dummy", sprint(io->(@device_code_ptx io=io @cuda dummy()))) - @not_if_memcheck @test occursin("julia_dummy", sprint(io->(@device_code_sass io=io @cuda dummy()))) + @not_if_sanitize @test occursin("julia_dummy", sprint(io->(@device_code_sass io=io @cuda dummy()))) # make sure invalid kernels can be partially reflected upon let diff --git a/test/initialization.jl b/test/initialization.jl index 34eb7952dc..c1629b6d4d 100644 --- a/test/initialization.jl +++ b/test/initialization.jl @@ -3,7 +3,7 @@ # the API shouldn't have been initialized @test CuCurrentContext() == nothing -@not_if_memcheck @test CuCurrentDevice() == nothing +@not_if_sanitize @test CuCurrentDevice() == nothing task_cb = Any[nothing for tid in 1:Threads.nthreads()] CUDA.attaskswitch() do diff --git a/test/pool.jl b/test/pool.jl index 6a69679663..3652109b01 100644 --- a/test/pool.jl +++ b/test/pool.jl @@ -1,6 +1,6 @@ CUDA.alloc(0) -@not_if_memcheck @test_throws OutOfGPUMemoryError CuArray{Int}(undef, 10^20) +@not_if_sanitize @test_throws OutOfGPUMemoryError CuArray{Int}(undef, 10^20) @testset "@allocated" begin @test (CUDA.@allocated CuArray{Int32}(undef,1)) == 4 diff --git a/test/runtests.jl b/test/runtests.jl index da4ab05783..0bb05debd7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -35,7 +35,7 @@ if do_help --quickfail Fail the entire run as soon as a single test errored. --jobs=N Launch `N` processes to perform tests (default: Threads.nthreads()). --gpus=N Expose `N` GPUs to test processes (default: 1). - --memcheck[=tool] Run the tests under `cuda-memcheck`. + --sanitize[=tool] Run the tests under `compute-sanitizer`. --snoop=FILE Snoop on compiled methods and save to `FILE`. Remaining arguments filter the tests that will be executed.""") @@ -43,7 +43,7 @@ if do_help end _, jobs = extract_flag!(ARGS, "--jobs", Threads.nthreads()) _, gpus = extract_flag!(ARGS, "--gpus", 1) -do_memcheck, memcheck_tool = extract_flag!(ARGS, "--memcheck", "memcheck") +do_sanitize, sanitize_tool = extract_flag!(ARGS, "--sanitize", "memcheck") do_snoop, snoop_path = extract_flag!(ARGS, "--snoop") do_thorough, _ = extract_flag!(ARGS, "--thorough") do_quickfail, _ = extract_flag!(ARGS, "--quickfail") @@ -96,6 +96,11 @@ if do_list end exit(0) end +## no options should remain +optlike_args = filter(startswith("-"), ARGS) +if !isempty(optlike_args) + error("Unknown test options `$(join(optlike_args, " "))` (try `--help` for usage instructions)") +end ## the remaining args filter tests if !isempty(ARGS) filter!(tests) do test @@ -161,13 +166,6 @@ is_debug = ccall(:jl_is_debugbuild, Cint, ()) != 0 if VERSION < v"1.5-" || first(picks).cap < v"7.0" push!(skip_tests, "device/wmma") end -if do_memcheck - # CUFFT causes internal failures in cuda-memcheck - push!(skip_tests, "cufft") - # CUTENSOR tests result in illegal memory accesses unregistering memory - push!(skip_tests, "cutensor") - # there's also a bunch of `memcheck || ...` expressions in the tests themselves -end if Sys.ARCH == :aarch64 # CUFFT segfaults on ARM push!(skip_tests, "cufft") @@ -206,10 +204,11 @@ if Base.JLOptions().project != C_NULL end const test_exename = popfirst!(test_exeflags.exec) function addworker(X; kwargs...) - exename = if do_memcheck - memcheck = CUDA.memcheck() - @info "Running under $(readchomp(`$memcheck --version`))" - `$memcheck --tool $memcheck_tool $test_exename` + exename = if do_sanitize + sanitizer = CUDA.compute_sanitizer() + @info "Running under $(readchomp(`$sanitizer --version`))" + # NVIDIA bug 3263616: compute-sanitizer crashes when generating host backtraces + `$sanitizer --tool $sanitize_tool --launch-timeout=0 --show-backtrace=no --target-processes=all $test_exename` else test_exename end @@ -283,7 +282,7 @@ function print_testworker_stats(test, wrkr, resp) end end global print_testworker_started = (name, wrkr)->begin - if do_memcheck + if do_sanitize lock(print_lock) try printstyled(name, color=:white) diff --git a/test/setup.jl b/test/setup.jl index b32ca68be4..5d75d14a21 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -10,10 +10,10 @@ testf(f, xs...; kwargs...) = TestSuite.compare(f, CuArray, xs...; kwargs...) using Random -# detect cuda-memcheck, to disable testts that are known to fail under cuda-memcheck -# (e.g. those using CUPTI) or result in verbose output (deliberate API errors) -macro not_if_memcheck(ex) - haskey(ENV, "CUDA_MEMCHECK") || return esc(ex) +# detect compute-sanitizer, to disable incompatible tests (e.g. using CUPTI), +# and to skip tests that are known to generate innocuous API errors +macro not_if_sanitize(ex) + any(contains("NV_SANITIZER"), keys(ENV)) || return esc(ex) quote @test_skip $ex end From dc810b53dfd22f56674f886163bc26a47a1a4b40 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 9 Mar 2021 09:00:52 +0100 Subject: [PATCH 2/5] Don't use artifacts so that we really use CUDA 11.2 for debug tests. --- .buildkite/pipeline.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6c57c79913..dce4ec858a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -38,6 +38,9 @@ steps: queue: "juliagpu" cuda: "11.2" # older versions of CUDA have issues cap: "recent" # test as much as possible + env: + JULIA_CUDA_VERSION: '11.2' + JULIA_CUDA_USE_BINARYBUILDER: 'true' if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 120 From 6b20aa469056df59c613e12849d4ca6967e1eb68 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 9 Mar 2021 11:21:04 +0100 Subject: [PATCH 3/5] Don't use the stream-ordered allocator with compute-sanitizer. Running under the sanitizer uses a lot of device memory, so we need to be able to reset the device after every testset. --- .buildkite/pipeline.yml | 3 ++- src/pool.jl | 3 +++ src/state.jl | 3 +-- test/examples.jl | 2 +- test/exceptions.jl | 2 +- test/initialization.jl | 2 +- test/setup.jl | 4 +--- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index dce4ec858a..1330d6fc33 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -27,7 +27,7 @@ steps: version: 1.6-nightly - JuliaCI/julia-test#v1: julia_args: "-g2" - test_args: "--sanitize" + test_args: "--sanitize --quickfail" - JuliaCI/julia-coverage#v1: codecov: true dirs: @@ -39,6 +39,7 @@ steps: cuda: "11.2" # older versions of CUDA have issues cap: "recent" # test as much as possible env: + JULIA_CUDA_MEMORY_POOL: 'none' # compute-sanitizer uses a lot of memory, so we need device_reset! JULIA_CUDA_VERSION: '11.2' JULIA_CUDA_USE_BINARYBUILDER: 'true' if: build.message !~ /\[skip tests\]/ diff --git a/src/pool.jl b/src/pool.jl index 4ace097716..1810c60ae4 100644 --- a/src/pool.jl +++ b/src/pool.jl @@ -223,6 +223,9 @@ const pools = PerDevice{AbstractPool}(dev->begin pool end) +# NVIDIA bug #3240770 +@memoize any_stream_ordered() = any(dev->pools[dev].stream_ordered, devices()) + ## interface diff --git a/src/state.jl b/src/state.jl index 46ef1f535f..16f1dc6f23 100644 --- a/src/state.jl +++ b/src/state.jl @@ -344,8 +344,7 @@ so it is generally not needed to subscribe to the reset hook specifically. this package. """ function device_reset!(dev::CuDevice=device()) - stream_ordered = any(dev->pools[dev].stream_ordered, devices()) - if stream_ordered # NVIDIA bug #3240770 + if any_stream_ordered() @error """Due to a bug in CUDA, resetting the device is not possible on CUDA 11.2 when using the stream-ordered memory allocator. If you are calling this function to free memory, that may not be required anymore diff --git a/test/examples.jl b/test/examples.jl index fa03f93a8a..e9859322fe 100644 --- a/test/examples.jl +++ b/test/examples.jl @@ -3,7 +3,7 @@ @not_if_sanitize begin # these tests spawn subprocesses, so reset the current context to conserve memory -CUDA.release() == v"11.2" || CUDA.device_reset!() +CUDA.any_stream_ordered() || CUDA.device_reset!() function find_sources(path::String, sources=String[]) if isdir(path) diff --git a/test/exceptions.jl b/test/exceptions.jl index 5f7aaad6ce..5198c93478 100644 --- a/test/exceptions.jl +++ b/test/exceptions.jl @@ -3,7 +3,7 @@ @not_if_sanitize begin # these tests spawn subprocesses, so reset the current context to conserve memory -CUDA.release() == v"11.2" || CUDA.device_reset!() +CUDA.any_stream_ordered() || CUDA.device_reset!() @testset "stack traces at different debug levels" begin diff --git a/test/initialization.jl b/test/initialization.jl index c1629b6d4d..f9bf3f5bcb 100644 --- a/test/initialization.jl +++ b/test/initialization.jl @@ -68,7 +68,7 @@ end reset_cb() -if CUDA.release() != v"11.2" +if !CUDA.any_stream_ordered() # NVIDIA bug #3240770 device_reset!() diff --git a/test/setup.jl b/test/setup.jl index 5d75d14a21..a1d2257c7a 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -102,9 +102,7 @@ function runtests(f, name, time_source=:cuda, snoop=nothing) end res = vcat(collect(data), cpu_rss, gpu_rss) - if CUDA.release() != v"11.2" # NVIDIA bug #3240770 - device_reset!() - end + CUDA.any_stream_ordered() || device_reset!() res finally if snoop !== nothing From 392ac92d254117b27e24f3c66b88be5b1cbc611a Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 9 Mar 2021 15:58:45 +0100 Subject: [PATCH 4/5] Don't use multiple jobs for the test suite under compute-sanitizer. --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 1330d6fc33..002730e63e 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -27,7 +27,7 @@ steps: version: 1.6-nightly - JuliaCI/julia-test#v1: julia_args: "-g2" - test_args: "--sanitize --quickfail" + test_args: "--sanitize --quickfail --jobs=1" - JuliaCI/julia-coverage#v1: codecov: true dirs: From 86a6dc09a8dbb94a1b2bea409962d6b7e089ba57 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 10 Mar 2021 08:46:09 +0100 Subject: [PATCH 5/5] Disable a hanging test. --- test/sorting.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/sorting.jl b/test/sorting.jl index 68eaa7679a..b7155183f6 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -214,7 +214,8 @@ function test_sort(T, N, f=identity; kwargs...) end -@testset "interface" begin +# FIXME: these tests hang when running under compute-sanitizer on CUDA 11.2 with -g2 +@not_if_sanitize @testset "interface" begin # pre-sorted test_sort!(Int, 1000000) test_sort!(Int32, 1000000)