Skip to content

Segfault with iteration #1310

Closed as not planned
Closed as not planned
@jgreener64

Description

@jgreener64

I am on Enzyme main (e8ede63), StaticArrays 1.9.2, Julia 1.10.1 and Linux. The following works when n_atoms = 200 but gives a segfault for higher values like 1000. It could be an OOM error but ideally it wouldn't segfault. Sometimes to trigger the segfault I had to run with julia -i and it would crash when I tried to type something into the REPL.

using Enzyme, StaticArrays, LinearAlgebra

const T = Float64

struct StillingerWeber{T}
    A::T
    B::T
    p::Int
    q::Int
    a::T
    λ::T
    γ::T
    σ::T
    ϵ::T
end

Base.zero(::StillingerWeber) = StillingerWeber{T}(0.0, 0.0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0)

function Base.:+(sw1::StillingerWeber, sw2::StillingerWeber)
    StillingerWeber(
        sw1.A + sw2.A,
        sw1.B + sw2.B,
        sw1.p + sw2.p,
        sw1.q + sw2.q,
        sw1.a + sw2.a,
        sw1.λ + sw2.λ,
        sw1.γ + sw2.γ,
        sw1.σ + sw2.σ,
        sw1.ϵ + sw2.ϵ,
    )
end

sw = StillingerWeber{T}(7.049556277, 0.6022245584, 4, 0, 1.80, 21.0, 1.20, 0.14, 200.0)

function forces!(fs_chunks, sw, coords, neighbors, n_threads)
    A, B, p, q, a, λ, γ, σ, ϵ = sw.A, sw.B, sw.p, sw.q, sw.a, sw.λ, sw.γ, sw.σ, sw.ϵ
    n_atoms = length(coords)

    n_chunks = n_threads
    Threads.@threads for chunk_i in 1:n_chunks
        @inbounds for ni in chunk_i:n_chunks:length(neighbors)
            i, j = neighbors[ni]
            dr = coords[i] - coords[j]
            rij = norm(dr)
            r = rij / σ
            if r < a
                df_dr = -A * exp(inv(r - a)) * (B * p * inv(r^(p+1)) + (B * inv(r^p) - 1) * inv((r - a)^2))
                f = -ϵ * df_dr * inv(σ)
                fdr = f * dr / rij
                fs_chunks[chunk_i][i] -= fdr
                fs_chunks[chunk_i][j] += fdr
            end

            trip_cutoff = a * σ
            trip_cutoff_2 = 2 * trip_cutoff
            for k in (j + 1):n_atoms
                dr_ik = coords[i] - coords[k]
                norm_dr_ik = norm(dr_ik)
                (norm_dr_ik > trip_cutoff_2) && continue
                dr_jk = coords[j] - coords[k]
                norm_dr_jk = norm(dr_jk)
                ((norm_dr_ik < trip_cutoff) || (norm_dr_jk < trip_cutoff)) || continue
                dr_ij, norm_dr_ij = dr, rij
                ndr_ij, ndr_ik, ndr_jk = dr_ij / norm_dr_ij, dr_ik / norm_dr_ik, dr_jk / norm_dr_jk
                dot_ij_ik, dot_ji_jk, dot_ki_kj = dot(dr_ij, dr_ik), dot(-dr_ij, dr_jk), dot(dr_ik, dr_jk)
                rij_trip = r
                rik_trip = norm_dr_ik / σ
                rjk_trip = norm_dr_jk / σ

                if rij_trip < a && rik_trip < a
                    cos_θ_jik = dot_ij_ik / (norm_dr_ij * norm_dr_ik)
                    exp_term = exp* inv(rij_trip - a) + γ * inv(rik_trip - a))
                    cos_term = cos_θ_jik + T(1/3)
                    dh_term = λ * cos_term^2 * -γ * exp_term
                    dh_drij = inv((rij_trip - a)^2) * dh_term
                    dh_drik = inv((rik_trip - a)^2) * dh_term
                    dh_dcosθjik = 2 * λ * exp_term * cos_term
                    dcosθ_drj = (dr_ik * norm_dr_ij^2 - dr_ij * dot_ij_ik) / (norm_dr_ij^3 * norm_dr_ik)
                    dcosθ_drk = (dr_ij * norm_dr_ik^2 - dr_ik * dot_ij_ik) / (norm_dr_ik^3 * norm_dr_ij)
                    fj = -ϵ * (dh_drij * ndr_ij / σ + dh_dcosθjik * dcosθ_drj)
                    fk = -ϵ * (dh_drik * ndr_ik / σ + dh_dcosθjik * dcosθ_drk)
                    fs_chunks[chunk_i][i] -= fj + fk
                    fs_chunks[chunk_i][j] += fj
                    fs_chunks[chunk_i][k] += fk
                end
                if rij_trip < a && rjk_trip < a
                    cos_θ_ijk = dot_ji_jk / (norm_dr_ij * norm_dr_jk)
                    exp_term = exp* inv(rij_trip - a) + γ * inv(rjk_trip - a))
                    cos_term = cos_θ_ijk + T(1/3)
                    dh_term = λ * cos_term^2 * -γ * exp_term
                    dh_drij = inv((rij_trip - a)^2) * dh_term
                    dh_drjk = inv((rjk_trip - a)^2) * dh_term
                    dh_dcosθijk = 2 * λ * exp_term * cos_term
                    dcosθ_dri = ( dr_jk * norm_dr_ij^2 + dr_ij * dot_ji_jk) / (norm_dr_ij^3 * norm_dr_jk)
                    dcosθ_drk = (-dr_ij * norm_dr_jk^2 - dr_jk * dot_ji_jk) / (norm_dr_jk^3 * norm_dr_ij)
                    fi = -ϵ * (dh_drij * -ndr_ij / σ + dh_dcosθijk * dcosθ_dri)
                    fk = -ϵ * (dh_drjk *  ndr_jk / σ + dh_dcosθijk * dcosθ_drk)
                    fs_chunks[chunk_i][j] -= fi + fk
                    fs_chunks[chunk_i][i] += fi
                    fs_chunks[chunk_i][k] += fk
                end
                if rik_trip < a && rjk_trip < a
                    cos_θ_ikj = dot_ki_kj / (norm_dr_ik * norm_dr_jk)
                    exp_term = exp* inv(rik_trip - a) + γ * inv(rjk_trip - a))
                    cos_term = cos_θ_ikj + T(1/3)
                    dh_term = λ * cos_term^2 * -γ * exp_term
                    dh_drik = inv((rik_trip - a)^2) * dh_term
                    dh_drjk = inv((rjk_trip - a)^2) * dh_term
                    dh_dcosθikj = 2 * λ * exp_term * cos_term
                    dcosθ_dri = (-dr_jk * norm_dr_ik^2 + dr_ik * dot_ki_kj) / (norm_dr_ik^3 * norm_dr_jk)
                    dcosθ_drj = (-dr_ik * norm_dr_jk^2 + dr_jk * dot_ki_kj) / (norm_dr_jk^3 * norm_dr_ik)
                    fi = -ϵ * (dh_drik * -ndr_ik / σ + dh_dcosθikj * dcosθ_dri)
                    fj = -ϵ * (dh_drjk * -ndr_jk / σ + dh_dcosθikj * dcosθ_drj)
                    fs_chunks[chunk_i][k] -= fi + fj
                    fs_chunks[chunk_i][i] += fi
                    fs_chunks[chunk_i][j] += fj
                end
            end
        end
    end

    return nothing
end

n_atoms = 1000
coords = rand(SVector{3, T}, n_atoms) .* T(1.784)
neighbors = Tuple{Int, Int}[]
for i in 1:n_atoms, j in (i+1):n_atoms
    if norm(coords[i] - coords[j]) < T(0.6)
        push!(neighbors, (i, j))
    end
end

function forces(sw, coords, neighbors, n_threads)
    fs_chunks = [zero(coords) for _ in 1:n_threads]
    forces!(fs_chunks, sw, coords, neighbors, n_threads)
    return sum(fs_chunks)
end

n_threads = Threads.nthreads()
println(forces(sw, coords, neighbors, n_threads))
println("Got here")

fs_chunks = [zero(coords) for _ in 1:n_threads]
d_fs = rand(SVector{3, T}, length(coords))
d_fs_chunks = [deepcopy(d_fs) for _ in 1:n_threads]
d_coords = zero(coords)

grads = autodiff(
    Enzyme.Reverse,
    forces!,
    Const,
    Duplicated(fs_chunks, d_fs_chunks),
    Active(sw),
    Duplicated(coords, d_coords),
    Const(neighbors),
    Const(n_threads),
)[1]
[2836832] signal (11.1): Segmentation fault
in expression starting at /home/jgreener/dms/molly_dev/enzyme_err33.jl:154
Allocations: 58379556 (Pool: 58232441; Big: 147115); GC: 69
Segmentation fault (core dumped)

The printall_error.txt is attached.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions