Skip to content

Commit

Permalink
Merge pull request #730 from JuliaGPU/tb/compute_sanitizer
Browse files Browse the repository at this point in the history
Switch tests over to compute-sanitizer.
  • Loading branch information
maleadt authored Mar 10, 2021
2 parents eaa47ed + 86a6dc0 commit fe62319
Show file tree
Hide file tree
Showing 17 changed files with 100 additions and 79 deletions.
6 changes: 4 additions & 2 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ steps:
version: 1.6-nightly
- JuliaCI/julia-test#v1:
julia_args: "-g2"
test_args: "--memcheck"
test_args: "--sanitize --quickfail --jobs=1"
- JuliaCI/julia-coverage#v1:
codecov: true
dirs:
Expand All @@ -39,7 +39,9 @@ steps:
cuda: "11.2" # older versions of CUDA have issues
cap: "recent" # test as much as possible
env:
JULIA_CUDA_MEMORY_POOL: 'none' # CUDA's memory pool requires compute-sanitizer
JULIA_CUDA_MEMORY_POOL: 'none' # compute-sanitizer uses a lot of memory, so we need device_reset!
JULIA_CUDA_VERSION: '11.2'
JULIA_CUDA_USE_BINARYBUILDER: 'true'
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 120

Expand Down
57 changes: 30 additions & 27 deletions Artifacts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,99 +85,102 @@ lazy = true

[[CUDA110]]
arch = "powerpc64le"
git-tree-sha1 = "b22672705ca4f00c784a3f9d58619408d4af9de0"
git-tree-sha1 = "25a70e995c5457a9b3c7dd7ff8a62d14acc2abc5"
libc = "glibc"
os = "linux"
lazy = true

[[CUDA110.download]]
sha256 = "e86a67aa8b1b2cd73d78572401efa75f9bb26f6a259f12d0471c64b74fbe204f"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+3/CUDA.v11.0.3.powerpc64le-linux-gnu.tar.gz"
sha256 = "120ee6f20fc3c3c59611cf3c5b1584ed14658bb5d1bf9fd1b25a14182247d262"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+4/CUDA.v11.0.3.powerpc64le-linux-gnu.tar.gz"

[[CUDA110]]
arch = "x86_64"
git-tree-sha1 = "6b1a60793e5e98abdcfc3724cfa22b2a5348dc09"
git-tree-sha1 = "74e3e04bdbf56ccf276cd8dd896ad07033846fae"
libc = "glibc"
os = "linux"
lazy = true

[[CUDA110.download]]
sha256 = "520e690529f67afe6aabdd8d18dc34d18acf5020cb3dc1fd4e904998d9e17aba"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+3/CUDA.v11.0.3.x86_64-linux-gnu.tar.gz"
sha256 = "291e84f0d598ecbcbe438b1d42022583d061ad5f4eece2b1c06d600332b0367e"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+4/CUDA.v11.0.3.x86_64-linux-gnu.tar.gz"

[[CUDA110]]
arch = "x86_64"
git-tree-sha1 = "2d09da4d71a0762750dee0861e28029d38b08d1e"
git-tree-sha1 = "1ab27f582deafbc99077d540a01141e620620177"
os = "windows"
lazy = true

[[CUDA110.download]]
sha256 = "d11ca219e9b91725c6677f36b339459d149ffdcfa3f5e51928fb133158caa15a"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+3/CUDA.v11.0.3.x86_64-w64-mingw32.tar.gz"
sha256 = "0ea0100ee7fa6d67c8d63ea44e719d76f6f70ce1ab5f657d7c97f30fae173af5"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.0.3+4/CUDA.v11.0.3.x86_64-w64-mingw32.tar.gz"


[[CUDA111]]
arch = "powerpc64le"
git-tree-sha1 = "44dba03dc848a148c9d2430354bf7e52e216364c"
git-tree-sha1 = "8837163c5563af77039b4a04a49b6e2c3f123ab4"
libc = "glibc"
os = "linux"
lazy = true

[[CUDA111.download]]
sha256 = "ac85a364080ea8b97e77fb83967046c54099f7c63769577fa39a1311b68add81"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+2/CUDA.v11.1.1.powerpc64le-linux-gnu.tar.gz"
sha256 = "847f43a4f68c2b08c6275c988ff7c7e5414ad477a625ac78f6e4970969fccc48"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+3/CUDA.v11.1.1.powerpc64le-linux-gnu.tar.gz"

[[CUDA111]]
arch = "x86_64"
git-tree-sha1 = "48c41dccb8db0c9aa9483267cb33719207abe4c1"
git-tree-sha1 = "4670dd02df5210bd53199f14ec9f8cc027d889e0"
libc = "glibc"
os = "linux"
lazy = true

[[CUDA111.download]]
sha256 = "b7242ce10b3fb06d886725209d5b19d565c15c7e244eb84b50262f281a04291c"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+2/CUDA.v11.1.1.x86_64-linux-gnu.tar.gz"
sha256 = "84a9574db7bfb0a59dd03ef1a85874d3f33a7686507d89312700f5c519307cba"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+3/CUDA.v11.1.1.x86_64-linux-gnu.tar.gz"

[[CUDA111]]
arch = "x86_64"
git-tree-sha1 = "ad4cf0816c2c327477c512f476649bfde7ada206"
git-tree-sha1 = "86505c4367204e1769e6341380841f7f589a2f4d"
os = "windows"
lazy = true

[[CUDA111.download]]
sha256 = "026a92bcb8d7a5ff6f2e6e262ed8d8387164314941f0dc1b3228e383e04a60a0"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+2/CUDA.v11.1.1.x86_64-w64-mingw32.tar.gz"
sha256 = "a56db28c70e9736f9ea024f3afa7fdedf899b7c998808db7d8a368e0a1208ed9"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.1.1+3/CUDA.v11.1.1.x86_64-w64-mingw32.tar.gz"


[[CUDA112]]
arch = "powerpc64le"
git-tree-sha1 = "3141108f3144f5170dacc12749a61c14101b42b5"
git-tree-sha1 = "ef3928da3f9b68a5213a93f91da0d27e32c01e50"
libc = "glibc"
os = "linux"
lazy = true

[[CUDA112.download]]
sha256 = "d7d6c399c77cabc75f1387869ca8bbef93cb6a745004993b34306e0b23d5bd18"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+0/CUDA.v11.2.1.powerpc64le-linux-gnu.tar.gz"
sha256 = "770235b69868b88e6db4efc30a8659e9708f3b432028e2032ba589cf2c3efaf8"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+1/CUDA.v11.2.1.powerpc64le-linux-gnu.tar.gz"

[[CUDA112]]
arch = "x86_64"
git-tree-sha1 = "43b02b66f55952515d3cc933404d027fb904cd8b"
git-tree-sha1 = "18f4e83091aec02d8229c2b009a45a5c22b47664"
libc = "glibc"
os = "linux"
lazy = true

[[CUDA112.download]]
sha256 = "70089c452bf923c4951048d336ac32ed28ee3672f8667bc7595fdc6190bf1990"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+0/CUDA.v11.2.1.x86_64-linux-gnu.tar.gz"
sha256 = "6da495c82fae19e0aae8691addc72829376547543324358f39e16835cb208e6e"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+1/CUDA.v11.2.1.x86_64-linux-gnu.tar.gz"

[[CUDA112]]
arch = "x86_64"
git-tree-sha1 = "8b7275b36a973e6345a76b2931ddf397228e34ca"
git-tree-sha1 = "4765905e93e1e93ca8d2eb52a1e8cec5de4627b1"
os = "windows"
lazy = true

[[CUDA112.download]]
sha256 = "ed69a6b9630fc83e75856486fd157903c6e93e1d70e0fc7e6c67ca0dacea2b15"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+0/CUDA.v11.2.1.x86_64-w64-mingw32.tar.gz"
sha256 = "6dc0ae6aab8b878864bf926fd9446c71f92f689e6115d6dcedc54ac492d30ea3"
url = "https://github.com/JuliaBinaryWrappers/CUDA_jll.jl/releases/download/CUDA-v11.2.1+1/CUDA.v11.2.1.x86_64-w64-mingw32.tar.gz"



# CUDNN
Expand Down
17 changes: 8 additions & 9 deletions deps/bindeps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Returns the CUDA release part of the version as returned by [`version`](@ref).
toolkit_release() = @after_init(VersionNumber(__toolkit_version[].major, __toolkit_version[].minor))

const __nvdisasm = Ref{String}()
const __memcheck = Ref{Union{Nothing,String}}()
const __compute_sanitizer = Ref{Union{Nothing,String}}()
const __libdevice = Ref{String}()
const __libcudadevrt = Ref{String}()
const __libcupti = Ref{Union{Nothing,String}}()
Expand All @@ -47,10 +47,10 @@ const __libcudnn = Ref{Union{Nothing,String}}(nothing)
const __libcutensor = Ref{Union{Nothing,String}}(nothing)

nvdisasm() = @after_init(__nvdisasm[])
function memcheck()
function compute_sanitizer()
@after_init begin
@assert has_memcheck() "This functionality is unavailabe as CUDA-MEMCHECK is missing."
__memcheck[]
@assert has_compute_sanitizer() "This functionality is unavailabe as compute-sanitizer is missing."
__compute_sanitizer[]
end
end
libdevice() = @after_init(__libdevice[])
Expand All @@ -68,8 +68,8 @@ function libnvtx()
end
end

export has_memcheck, has_cupti, has_nvtx
has_memcheck() = @after_init(__memcheck[]) !== nothing
export has_compute_sanitizer, has_cupti, has_nvtx
has_compute_sanitizer() = @after_init(__compute_sanitizer[]) !== nothing
has_cupti() = @after_init(__libcupti[]) !== nothing
has_nvtx() = @after_init(__libnvtx[]) !== nothing

Expand Down Expand Up @@ -173,8 +173,7 @@ function use_artifact_cuda()

__nvdisasm[] = artifact_binary(artifact.dir, "nvdisasm")
@assert isfile(__nvdisasm[])
__memcheck[] = artifact_binary(artifact.dir, "cuda-memcheck")
@assert isfile(__memcheck[])
__compute_sanitizer[] = artifact_binary(artifact.dir, "compute-sanitizer")

__libcupti[] = artifact_cuda_library(artifact.dir, "cupti", artifact.version)
@assert isfile(__libcupti[])
Expand Down Expand Up @@ -221,7 +220,7 @@ function use_local_cuda()
__nvdisasm[] = path
end

__memcheck[] = find_cuda_binary("cuda-memcheck", cuda_dirs)
__compute_sanitizer[] = find_cuda_binary("compute-sanitizer", cuda_dirs)

cuda_version = parse_toolkit_version("nvdisasm", __nvdisasm[])
if cuda_version === nothing
Expand Down
3 changes: 3 additions & 0 deletions src/pool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ const pools = PerDevice{AbstractPool}(dev->begin
pool
end)

# NVIDIA bug #3240770
@memoize any_stream_ordered() = any(dev->pools[dev].stream_ordered, devices())


## interface

Expand Down
3 changes: 1 addition & 2 deletions src/state.jl
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,7 @@ so it is generally not needed to subscribe to the reset hook specifically.
this package.
"""
function device_reset!(dev::CuDevice=device())
stream_ordered = any(dev->pools[dev].stream_ordered, devices())
if stream_ordered # NVIDIA bug #3240770
if any_stream_ordered()
@error """Due to a bug in CUDA, resetting the device is not possible on CUDA 11.2 when using the stream-ordered memory allocator.
If you are calling this function to free memory, that may not be required anymore
Expand Down
8 changes: 4 additions & 4 deletions test/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,22 +140,22 @@ end
valid_kernel() = return
invalid_kernel() = 1

@not_if_memcheck @test CUDA.code_sass(devnull, valid_kernel, Tuple{}) == nothing
@not_if_memcheck @test_throws CUDA.KernelError CUDA.code_sass(devnull, invalid_kernel, Tuple{})
@not_if_sanitize @test CUDA.code_sass(devnull, valid_kernel, Tuple{}) == nothing
@not_if_sanitize @test_throws CUDA.KernelError CUDA.code_sass(devnull, invalid_kernel, Tuple{})
end

@testset "function name mangling" begin
@eval @noinline $(Symbol("dummy_^"))(x) = x

@eval kernel_341(ptr) = (@inbounds unsafe_store!(ptr, $(Symbol("dummy_^"))(unsafe_load(ptr))); nothing)

@not_if_memcheck CUDA.code_sass(devnull, kernel_341, Tuple{Ptr{Int}})
@not_if_sanitize CUDA.code_sass(devnull, kernel_341, Tuple{Ptr{Int}})
end

@testset "device runtime" begin
kernel() = (CUDA.cudaGetLastError(); return)

@not_if_memcheck CUDA.code_sass(devnull, kernel, Tuple{})
@not_if_sanitize CUDA.code_sass(devnull, kernel, Tuple{})
end

end
6 changes: 3 additions & 3 deletions test/cudadrv/module.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ let
@test md != md2
end

@not_if_memcheck @test_throws_cuerror CUDA.ERROR_INVALID_IMAGE CuModule("foobar")
@not_if_sanitize @test_throws_cuerror CUDA.ERROR_INVALID_IMAGE CuModule("foobar")

@testset "globals" begin
md = CuModuleFile(joinpath(@__DIR__, "ptx/global.ptx"))
Expand Down Expand Up @@ -54,11 +54,11 @@ end
# TODO: test with valid object code
# NOTE: apparently, on Windows cuLinkAddData! _does_ accept object data containing \0
if !Sys.iswindows()
@not_if_memcheck @test_throws_cuerror CUDA.ERROR_UNKNOWN add_data!(link, "vadd_parent", UInt8[0])
@not_if_sanitize @test_throws_cuerror CUDA.ERROR_UNKNOWN add_data!(link, "vadd_parent", UInt8[0])
end
end

@not_if_memcheck @testset "error log" begin
@not_if_sanitize @testset "error log" begin
@test_throws_message contains("ptxas fatal") CuError CuModule(".version 3.1")

link = CuLink()
Expand Down
2 changes: 1 addition & 1 deletion test/cudadrv/pool.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@not_if_memcheck let
@not_if_sanitize let
dev = device()

pool = memory_pool(dev)
Expand Down
5 changes: 5 additions & 0 deletions test/cutensor/contractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ using CUDA.CUTENSOR
using CUDA
using LinearAlgebra

# these tests perform a lot of harmless-but-invalid API calls, poluting sanitizer logs
@not_if_sanitize begin

eltypes = ( (Float32, Float32, Float32, Float32),
(Float32, Float32, Float32, Float16),
(ComplexF32, ComplexF32, ComplexF32, ComplexF32),
Expand Down Expand Up @@ -196,3 +199,5 @@ can_pin = !Sys.iswindows()
end
end
end

end
8 changes: 7 additions & 1 deletion test/examples.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# NVIDIA bug 3263616: compute-sanitizer crashes when generating host backtraces,
# but --show-backtrace=no does not survive execve.
@not_if_sanitize begin

# these tests spawn subprocesses, so reset the current context to conserve memory
CUDA.release() == v"11.2" || CUDA.device_reset!()
CUDA.any_stream_ordered() || CUDA.device_reset!()

function find_sources(path::String, sources=String[])
if isdir(path)
Expand Down Expand Up @@ -28,3 +32,5 @@ cd(examples_dir) do
@test success(pipeline(`$cmd $example`, stderr=stderr))
end
end

end
8 changes: 7 additions & 1 deletion test/exceptions.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# NVIDIA bug 3263616: compute-sanitizer crashes when generating host backtraces,
# but --show-backtrace=no does not survive execve.
@not_if_sanitize begin

# these tests spawn subprocesses, so reset the current context to conserve memory
CUDA.release() == v"11.2" || CUDA.device_reset!()
CUDA.any_stream_ordered() || CUDA.device_reset!()

@testset "stack traces at different debug levels" begin

Expand Down Expand Up @@ -83,3 +87,5 @@ let (code, out, err) = julia_script(script, `-g2`)
end

end

end
8 changes: 4 additions & 4 deletions test/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ end
@testset "compilation params" begin
@cuda dummy()

@not_if_memcheck @test_throws CuError @cuda threads=2 maxthreads=1 dummy()
@not_if_sanitize @test_throws CuError @cuda threads=2 maxthreads=1 dummy()
@cuda threads=2 dummy()
end

Expand All @@ -58,14 +58,14 @@ end
CUDA.code_warntype(devnull, dummy, Tuple{})
CUDA.code_llvm(devnull, dummy, Tuple{})
CUDA.code_ptx(devnull, dummy, Tuple{})
@not_if_memcheck CUDA.code_sass(devnull, dummy, Tuple{})
@not_if_sanitize CUDA.code_sass(devnull, dummy, Tuple{})

@device_code_lowered @cuda dummy()
@device_code_typed @cuda dummy()
@device_code_warntype io=devnull @cuda dummy()
@device_code_llvm io=devnull @cuda dummy()
@device_code_ptx io=devnull @cuda dummy()
@not_if_memcheck @device_code_sass io=devnull @cuda dummy()
@not_if_sanitize @device_code_sass io=devnull @cuda dummy()

mktempdir() do dir
@device_code dir=dir @cuda dummy()
Expand All @@ -77,7 +77,7 @@ end
@test occursin("julia_dummy", sprint(io->(@device_code_llvm io=io optimize=false @cuda dummy())))
@test occursin("julia_dummy", sprint(io->(@device_code_llvm io=io @cuda dummy())))
@test occursin("julia_dummy", sprint(io->(@device_code_ptx io=io @cuda dummy())))
@not_if_memcheck @test occursin("julia_dummy", sprint(io->(@device_code_sass io=io @cuda dummy())))
@not_if_sanitize @test occursin("julia_dummy", sprint(io->(@device_code_sass io=io @cuda dummy())))

# make sure invalid kernels can be partially reflected upon
let
Expand Down
4 changes: 2 additions & 2 deletions test/initialization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# the API shouldn't have been initialized
@test CuCurrentContext() == nothing
@not_if_memcheck @test CuCurrentDevice() == nothing
@not_if_sanitize @test CuCurrentDevice() == nothing

task_cb = Any[nothing for tid in 1:Threads.nthreads()]
CUDA.attaskswitch() do
Expand Down Expand Up @@ -68,7 +68,7 @@ end

reset_cb()

if CUDA.release() != v"11.2"
if !CUDA.any_stream_ordered()
# NVIDIA bug #3240770
device_reset!()

Expand Down
2 changes: 1 addition & 1 deletion test/pool.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CUDA.alloc(0)

@not_if_memcheck @test_throws OutOfGPUMemoryError CuArray{Int}(undef, 10^20)
@not_if_sanitize @test_throws OutOfGPUMemoryError CuArray{Int}(undef, 10^20)

@testset "@allocated" begin
@test (CUDA.@allocated CuArray{Int32}(undef,1)) == 4
Expand Down
Loading

0 comments on commit fe62319

Please sign in to comment.