Open
Description
Hi!
I'm trying to use RuntimeGeneratedFunctions (via MTK) on CUDA GPUs and I'm encountering the following issue
MWE
using RuntimeGeneratedFunctions
RuntimeGeneratedFunctions.init(@__MODULE__)
function foo()
expression = :((du, u, p, t) -> du[1] = p[1] * u[1])
f = @RuntimeGeneratedFunction(expression)
end
du = [0]
f = foo()
f(du, [1], [1], 0)
using KernelAbstractions
using CUDAKernels
using CUDA
@kernel function gpu_kernel(f, du, @Const(u), @Const(p), @Const(t))
i = @index(Global, Linear)
@views @inbounds f(du[:, i], u[:, i], p[:, i], t)
end
_f = let f = f, kernel = gpu_kernel
function (du, u, p, t)
version = u isa CuArray ? CUDADevice() : CPU()
wgs = size(u, 2)
wait(version,
kernel(version)(f, du, u, p, t; ndrange=size(u, 2),
dependencies=Event(version),
workgroupsize=wgs))
end
end
_f(cu(du), cu([1f0]), cu([1f0]), 0f0)
gives
ERROR: GPU compilation of kernel #gpu_gpu_kernel(KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicSize, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}}}, RuntimeGeneratedFunction{(:du, :u, :p, :t), var"#_RGF_ModTag", var"#_RGF_ModTag", Expr}, CuDeviceVector{Int64, 1}, CuDeviceVector{Float32, 1}, CuDeviceVector{Float32, 1}, Float32) failed
KernelError: passing and using non-bitstype argument
Argument 3 to your kernel function is of type RuntimeGeneratedFunction{(:du, :u, :p, :t), var"#_RGF_ModTag", var"#_RGF_ModTag", Expr}, which is not isbits:
.body is of type Expr which is not isbits.
.head is of type Symbol which is not isbits.
.args is of type Vector{Any} which is not isbits.
One workaround is to use eval
function extract_expression(f::RuntimeGeneratedFunction{argnames,}) where {argnames}
Expr(:->, Expr(:tuple, argnames...), f.body)
end
ex = extract_expression(f)
_f = let f = eval(ex), kernel = gpu_kernel
function (du, u, p, t)
version = u isa CuArray ? CUDADevice() : CPU()
wgs = size(u, 2)
wait(version,
kernel(version)(f, du, u, p, t; ndrange=size(u, 2),
dependencies=Event(version),
workgroupsize=wgs))
end
end
_f(cu(du), cu([1f0]), cu([1f0]), 0f0)
Is there a better solution here?
I'm using
⌅ [052768ef] CUDA v3.13.1
[72cfdca4] CUDAKernels v0.4.7
[63c18a36] KernelAbstractions v0.8.6
[7e49a35a] RuntimeGeneratedFunctions v0.5.5
in julia v1.9.0-beta3.