Closed as not planned
Description
I am on Enzyme main (e8ede63), StaticArrays 1.9.2, Julia 1.10.1 and Linux. The following works when n_atoms = 200
but gives a segfault for higher values like 1000
. It could be an OOM error but ideally it wouldn't segfault. Sometimes to trigger the segfault I had to run with julia -i
and it would crash when I tried to type something into the REPL.
using Enzyme, StaticArrays, LinearAlgebra
const T = Float64
struct StillingerWeber{T}
A::T
B::T
p::Int
q::Int
a::T
λ::T
γ::T
σ::T
ϵ::T
end
Base.zero(::StillingerWeber) = StillingerWeber{T}(0.0, 0.0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0)
function Base.:+(sw1::StillingerWeber, sw2::StillingerWeber)
StillingerWeber(
sw1.A + sw2.A,
sw1.B + sw2.B,
sw1.p + sw2.p,
sw1.q + sw2.q,
sw1.a + sw2.a,
sw1.λ + sw2.λ,
sw1.γ + sw2.γ,
sw1.σ + sw2.σ,
sw1.ϵ + sw2.ϵ,
)
end
sw = StillingerWeber{T}(7.049556277, 0.6022245584, 4, 0, 1.80, 21.0, 1.20, 0.14, 200.0)
function forces!(fs_chunks, sw, coords, neighbors, n_threads)
A, B, p, q, a, λ, γ, σ, ϵ = sw.A, sw.B, sw.p, sw.q, sw.a, sw.λ, sw.γ, sw.σ, sw.ϵ
n_atoms = length(coords)
n_chunks = n_threads
Threads.@threads for chunk_i in 1:n_chunks
@inbounds for ni in chunk_i:n_chunks:length(neighbors)
i, j = neighbors[ni]
dr = coords[i] - coords[j]
rij = norm(dr)
r = rij / σ
if r < a
df_dr = -A * exp(inv(r - a)) * (B * p * inv(r^(p+1)) + (B * inv(r^p) - 1) * inv((r - a)^2))
f = -ϵ * df_dr * inv(σ)
fdr = f * dr / rij
fs_chunks[chunk_i][i] -= fdr
fs_chunks[chunk_i][j] += fdr
end
trip_cutoff = a * σ
trip_cutoff_2 = 2 * trip_cutoff
for k in (j + 1):n_atoms
dr_ik = coords[i] - coords[k]
norm_dr_ik = norm(dr_ik)
(norm_dr_ik > trip_cutoff_2) && continue
dr_jk = coords[j] - coords[k]
norm_dr_jk = norm(dr_jk)
((norm_dr_ik < trip_cutoff) || (norm_dr_jk < trip_cutoff)) || continue
dr_ij, norm_dr_ij = dr, rij
ndr_ij, ndr_ik, ndr_jk = dr_ij / norm_dr_ij, dr_ik / norm_dr_ik, dr_jk / norm_dr_jk
dot_ij_ik, dot_ji_jk, dot_ki_kj = dot(dr_ij, dr_ik), dot(-dr_ij, dr_jk), dot(dr_ik, dr_jk)
rij_trip = r
rik_trip = norm_dr_ik / σ
rjk_trip = norm_dr_jk / σ
if rij_trip < a && rik_trip < a
cos_θ_jik = dot_ij_ik / (norm_dr_ij * norm_dr_ik)
exp_term = exp(γ * inv(rij_trip - a) + γ * inv(rik_trip - a))
cos_term = cos_θ_jik + T(1/3)
dh_term = λ * cos_term^2 * -γ * exp_term
dh_drij = inv((rij_trip - a)^2) * dh_term
dh_drik = inv((rik_trip - a)^2) * dh_term
dh_dcosθjik = 2 * λ * exp_term * cos_term
dcosθ_drj = (dr_ik * norm_dr_ij^2 - dr_ij * dot_ij_ik) / (norm_dr_ij^3 * norm_dr_ik)
dcosθ_drk = (dr_ij * norm_dr_ik^2 - dr_ik * dot_ij_ik) / (norm_dr_ik^3 * norm_dr_ij)
fj = -ϵ * (dh_drij * ndr_ij / σ + dh_dcosθjik * dcosθ_drj)
fk = -ϵ * (dh_drik * ndr_ik / σ + dh_dcosθjik * dcosθ_drk)
fs_chunks[chunk_i][i] -= fj + fk
fs_chunks[chunk_i][j] += fj
fs_chunks[chunk_i][k] += fk
end
if rij_trip < a && rjk_trip < a
cos_θ_ijk = dot_ji_jk / (norm_dr_ij * norm_dr_jk)
exp_term = exp(γ * inv(rij_trip - a) + γ * inv(rjk_trip - a))
cos_term = cos_θ_ijk + T(1/3)
dh_term = λ * cos_term^2 * -γ * exp_term
dh_drij = inv((rij_trip - a)^2) * dh_term
dh_drjk = inv((rjk_trip - a)^2) * dh_term
dh_dcosθijk = 2 * λ * exp_term * cos_term
dcosθ_dri = ( dr_jk * norm_dr_ij^2 + dr_ij * dot_ji_jk) / (norm_dr_ij^3 * norm_dr_jk)
dcosθ_drk = (-dr_ij * norm_dr_jk^2 - dr_jk * dot_ji_jk) / (norm_dr_jk^3 * norm_dr_ij)
fi = -ϵ * (dh_drij * -ndr_ij / σ + dh_dcosθijk * dcosθ_dri)
fk = -ϵ * (dh_drjk * ndr_jk / σ + dh_dcosθijk * dcosθ_drk)
fs_chunks[chunk_i][j] -= fi + fk
fs_chunks[chunk_i][i] += fi
fs_chunks[chunk_i][k] += fk
end
if rik_trip < a && rjk_trip < a
cos_θ_ikj = dot_ki_kj / (norm_dr_ik * norm_dr_jk)
exp_term = exp(γ * inv(rik_trip - a) + γ * inv(rjk_trip - a))
cos_term = cos_θ_ikj + T(1/3)
dh_term = λ * cos_term^2 * -γ * exp_term
dh_drik = inv((rik_trip - a)^2) * dh_term
dh_drjk = inv((rjk_trip - a)^2) * dh_term
dh_dcosθikj = 2 * λ * exp_term * cos_term
dcosθ_dri = (-dr_jk * norm_dr_ik^2 + dr_ik * dot_ki_kj) / (norm_dr_ik^3 * norm_dr_jk)
dcosθ_drj = (-dr_ik * norm_dr_jk^2 + dr_jk * dot_ki_kj) / (norm_dr_jk^3 * norm_dr_ik)
fi = -ϵ * (dh_drik * -ndr_ik / σ + dh_dcosθikj * dcosθ_dri)
fj = -ϵ * (dh_drjk * -ndr_jk / σ + dh_dcosθikj * dcosθ_drj)
fs_chunks[chunk_i][k] -= fi + fj
fs_chunks[chunk_i][i] += fi
fs_chunks[chunk_i][j] += fj
end
end
end
end
return nothing
end
n_atoms = 1000
coords = rand(SVector{3, T}, n_atoms) .* T(1.784)
neighbors = Tuple{Int, Int}[]
for i in 1:n_atoms, j in (i+1):n_atoms
if norm(coords[i] - coords[j]) < T(0.6)
push!(neighbors, (i, j))
end
end
function forces(sw, coords, neighbors, n_threads)
fs_chunks = [zero(coords) for _ in 1:n_threads]
forces!(fs_chunks, sw, coords, neighbors, n_threads)
return sum(fs_chunks)
end
n_threads = Threads.nthreads()
println(forces(sw, coords, neighbors, n_threads))
println("Got here")
fs_chunks = [zero(coords) for _ in 1:n_threads]
d_fs = rand(SVector{3, T}, length(coords))
d_fs_chunks = [deepcopy(d_fs) for _ in 1:n_threads]
d_coords = zero(coords)
grads = autodiff(
Enzyme.Reverse,
forces!,
Const,
Duplicated(fs_chunks, d_fs_chunks),
Active(sw),
Duplicated(coords, d_coords),
Const(neighbors),
Const(n_threads),
)[1]
[2836832] signal (11.1): Segmentation fault
in expression starting at /home/jgreener/dms/molly_dev/enzyme_err33.jl:154
Allocations: 58379556 (Pool: 58232441; Big: 147115); GC: 69
Segmentation fault (core dumped)
The printall_error.txt is attached.