Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 29 additions & 25 deletions src/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,34 +80,38 @@ After a slower first call to load the library and look up the function, no addit
overhead is expected compared to regular `ccall`.
"""
macro runtime_ccall(target, args...)
# decode ccall function/library target
Meta.isexpr(target, :tuple) || error("Expected (function_name, library) tuple")
function_name, library = target.args

# global const ref to hold the function pointer
@gensym fptr_cache
@eval __module__ begin
# uses atomics (release store, acquire load) for thread safety.
# see https://github.com/JuliaGPU/CUDAapi.jl/issues/106 for details
const $fptr_cache = Threads.Atomic{UInt}(0)
end
if VERSION >= v"1.6.0-DEV.819"
quote
ccall($(esc(target)), $(map(esc, args)...))
end
else
# decode ccall function/library target
Meta.isexpr(target, :tuple) || error("Expected (function_name, library) tuple")
function_name, library = target.args

# global const ref to hold the function pointer
@gensym fptr_cache
@eval __module__ begin
# uses atomics (release store, acquire load) for thread safety.
# see https://github.com/JuliaGPU/CUDAapi.jl/issues/106 for details
const $fptr_cache = Threads.Atomic{UInt}(0)
end

return quote
# use a closure to hold the lookup and avoid code bloat in the caller
@noinline function cache_fptr!()
library = Libdl.dlopen($(esc(library)))
$(esc(fptr_cache))[] = Libdl.dlsym(library, $(esc(function_name)))
quote
# use a closure to hold the lookup and avoid code bloat in the caller
@noinline function cache_fptr!()
library = Libdl.dlopen($(esc(library)))
$(esc(fptr_cache))[] = Libdl.dlsym(library, $(esc(function_name)))

$(esc(fptr_cache))[]
end
$(esc(fptr_cache))[]
end

fptr = $(esc(fptr_cache))[]
if fptr == 0 # folded into the null check performed by ccall
fptr = cache_fptr!()
end
fptr = $(esc(fptr_cache))[]
if fptr == 0 # folded into the null check performed by ccall
fptr = cache_fptr!()
end

ccall(reinterpret(Ptr{Cvoid}, fptr), $(map(esc, args)...))
ccall(reinterpret(Ptr{Cvoid}, fptr), $(map(esc, args)...))
end
end

return
end