-
Notifications
You must be signed in to change notification settings - Fork 102
Greedy modularity optimization community detection algorithm #314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
c752c02
322cf70
fee65af
f7c1652
d2d92e8
58fcc8c
40a381b
5412d41
ee65d63
6200ecf
4fe505d
bb85ea6
353c6a4
4615d67
cf80259
888ae41
019612a
0515202
e777f07
8d42973
ac77ee5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
function community_detection_greedy_modularity(g::AbstractGraph; weights::AbstractMatrix=weights(g)) | ||
if is_directed(g) | ||
throw(ArgumentError("The graph must not be directed")) | ||
end | ||
n = nv(g) | ||
c = Vector{Int}(1:n) | ||
cs = Vector{Vector{Int}}(undef, n) | ||
T = float(eltype(weights)) | ||
qs = Vector{T}(undef, n) | ||
olegfafurin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Q, e, a = compute_modularity(g, c, weights) | ||
m = sum(a) | ||
cs[1] = copy(c) | ||
qs[1] = Q | ||
for i in 2:n | ||
Q = modularity_greedy_step!(g, Q, e, a, c, m) | ||
cs[i] = copy(c) | ||
qs[i] = Q | ||
end | ||
imax = argmax(qs) | ||
return rewrite_class_ids(cs[imax]), qs | ||
end | ||
|
||
function modularity_greedy_step!( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test that this loop does not allocate |
||
g::AbstractGraph, | ||
Q::T, | ||
e::AbstractMatrix{T}, | ||
a::AbstractVector{T}, | ||
c::AbstractVector{<:Integer}, | ||
m::T | ||
) where {T} | ||
n = nv(g) | ||
dq_max::typeof(Q) = typemin(Q) | ||
to_merge::Tuple{Int,Int} = (0, 0) | ||
olegfafurin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for edge in edges(g) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. investigate the case of self-loops There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Self-loops indeed appear in modularity computation (in a correct way) |
||
u, v = src(edge), dst(edge) | ||
if c[u] != c[v] | ||
dq = (e[c[u], c[v]] / m - a[c[u]] * a[c[v]] / m^2) | ||
if dq > dq_max | ||
dq_max = dq | ||
to_merge = (c[u], c[v]) | ||
end | ||
end | ||
end | ||
if to_merge == (0,0) | ||
for i in vertices(g) | ||
if c[i] != c[1] | ||
to_merge = (c[1], c[i]) | ||
break | ||
end | ||
end | ||
end | ||
c1, c2 = to_merge | ||
for i in 1:n | ||
e[c1, i] += e[c2, i] | ||
end | ||
for i in 1:n | ||
if i == c2 | ||
continue | ||
end | ||
e[i, c1] += e[i, c2] | ||
end | ||
a[c1] = a[c1] + a[c2] | ||
for i in 1:n | ||
if c[i] == c2 | ||
c[i] = c1 | ||
end | ||
end | ||
return Q + 2 * dq_max | ||
end | ||
|
||
function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) | ||
modularity_type = float(eltype(w)) | ||
Q = zero(modularity_type) | ||
m = sum(w[src(e), dst(e)] for e in edges(g); init=Q) * 2 | ||
n_groups = maximum(c) | ||
a = zeros(modularity_type, n_groups) | ||
e = zeros(modularity_type, n_groups, n_groups) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we try a dict or sparse matrix here? |
||
m == 0 && return 0.0, e, a | ||
for u in vertices(g) | ||
for v in neighbors(g, u) | ||
if c[u] == c[v] | ||
Q += w[u, v] | ||
end | ||
e[c[u], c[v]] += w[u, v] | ||
a[c[u]] += w[u, v] | ||
end | ||
end | ||
Q *= m | ||
for i in 1:n_groups | ||
Q -= a[i]^2 | ||
end | ||
Q /= m^2 | ||
return Q, e, a | ||
end | ||
|
||
function rewrite_class_ids(v::AbstractVector{<:Integer}) | ||
d = Dict{Int, Int}() | ||
vn = zeros(Int64, length(v)) | ||
for i in eachindex(v) | ||
if !(v[i] in keys(d)) | ||
d[v[i]] = length(d) + 1 | ||
end | ||
vn[i] = d[v[i]] | ||
end | ||
return vn | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
function community_detection_greedy_modularity_fast(g::AbstractGraph; weights::AbstractMatrix=weights(g)) | ||
if is_directed(g) | ||
throw(ArgumentError("The graph must not be directed")) | ||
end | ||
n = nv(g) | ||
c = Vector{Int}(1:n) | ||
dq_dict, dq_heap, dq_global_heap, a, m = compute_dq(g, c, weights) | ||
modularity_type = float(eltype(weights)) | ||
empty_row_heap = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse) # placeholder, lasts empty forever | ||
while length(dq_global_heap) > 1 | ||
(u,v), (dq, _) = dequeue_pair!(dq_global_heap) | ||
if dq <= zero(modularity_type) | ||
return rewrite_class_ids(c) | ||
end | ||
dequeue!(dq_heap[u]) | ||
if !isempty(dq_heap[u]) | ||
enqueue!(dq_global_heap, peek(dq_heap[u])) | ||
end | ||
if peek(dq_heap[v])[1] == (v,u) | ||
dequeue!(dq_heap[v]) | ||
delete!(dq_global_heap, (v,u)) | ||
if !isempty(dq_heap[v]) | ||
enqueue!(dq_global_heap, peek(dq_heap[v])) | ||
end | ||
else | ||
delete!(dq_heap[v], (v,u)) | ||
end | ||
|
||
c[c .== u] .= v | ||
|
||
neighbors_u = setdiff(keys(dq_dict[u]), v) | ||
neighbors_v = setdiff(keys(dq_dict[v]), u) | ||
neighbors_all = union(neighbors_u, neighbors_v) | ||
neighbors_common = intersect(neighbors_u, neighbors_v) | ||
|
||
for w in neighbors_all | ||
if w in neighbors_common | ||
dq_w = dq_dict[v][w] + dq_dict[u][w] | ||
elseif w in neighbors_v | ||
dq_w = dq_dict[v][w] - a[u] * a[w] / m^2 | ||
else | ||
dq_w = dq_dict[u][w] - a[v] * a[w] / m^2 | ||
end | ||
for (row, column) in ((v, w), (w, v)) | ||
dq_heap_row = dq_heap[row] | ||
dq_dict[row][column] = dq_w | ||
if !isempty(dq_heap_row) | ||
oldmax = peek(dq_heap_row) | ||
else | ||
oldmax = nothing | ||
end | ||
dq_heap_row[(row,column)] = (dq_w, (-row, -column)) # update or insert | ||
if isnothing(oldmax) | ||
dq_global_heap[(row, column)] = (dq_w, (-row, -column)) | ||
else | ||
newmax = peek(dq_heap_row) | ||
if newmax != oldmax | ||
delete!(dq_global_heap, oldmax[1]) ## is it still there? | ||
enqueue!(dq_global_heap, newmax) | ||
end | ||
end | ||
end | ||
end | ||
|
||
for (w, _) in dq_dict[u] | ||
delete!(dq_dict[w], u) | ||
if w != v | ||
for (row, column) in ((w,u), (u,w)) | ||
dq_heap_row = dq_heap[row] | ||
if peek(dq_heap_row)[1] == (row, column) | ||
dequeue!(dq_heap_row) | ||
delete!(dq_global_heap, (row, column)) | ||
if !isempty(dq_heap_row) | ||
enqueue!(dq_global_heap, peek(dq_heap_row)) | ||
end | ||
else | ||
delete!(dq_heap_row, (row, column)) | ||
end | ||
end | ||
end | ||
end | ||
delete!(dq_dict, u) | ||
dq_heap[u] = empty_row_heap | ||
a[v] += a[u] | ||
a[u] = 0 | ||
end | ||
return rewrite_class_ids(c) | ||
end | ||
|
||
function compute_dq( | ||
g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray | ||
) | ||
modularity_type = float(eltype(w)) | ||
Q_zero = zero(modularity_type) | ||
m = sum(w[src(e), dst(e)] for e in edges(g); init=Q_zero) * 2 | ||
n_groups = maximum(c) | ||
a = zeros(modularity_type, n_groups) | ||
|
||
typical_dict = DefaultDict{Int, modularity_type}(Q_zero) | ||
dq_dict = Dict{Int,typeof(typical_dict)}() | ||
for v in vertices(g) | ||
dq_dict[v] = DefaultDict{Int, modularity_type}(Q_zero) | ||
end | ||
|
||
for u in vertices(g) | ||
for v in neighbors(g, u) | ||
dq_dict[u][v] += w[u,v] | ||
a[c[u]] += w[u, v] | ||
end | ||
end | ||
|
||
for (u, dct) in dq_dict | ||
for (v, w) in dct | ||
dq_dict[u][v] = w / m - a[c[u]] * a[c[v]] / m^2 | ||
end | ||
end | ||
|
||
typical_queue = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse) | ||
dq_heap = Dict{Int,typeof(typical_queue)}() | ||
for u in vertices(g) | ||
dq_heap[u] = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, (u, v) => (dq, (-u, -v)) for (v, dq) in dq_dict[u]) | ||
end | ||
|
||
v_connected = filter(v -> !isempty(dq_heap[v]), vertices(g)) | ||
global_heap = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, peek(dq_heap[v]) for v in v_connected) | ||
return dq_dict, dq_heap, global_heap, a, m | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
@testset "Greedy modularity: karate club" begin | ||
g = smallgraph(:karate) | ||
|
||
olegfafurin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
expected_c_w = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] | ||
expected_q_w = 0.3806706114398422 | ||
|
||
c_w, _ = community_detection_greedy_modularity(g) | ||
|
||
@test c_w == expected_c_w | ||
|
||
@test modularity(g, c_w) ≈ expected_q_w | ||
end | ||
|
||
@testset "Greedy modularity: weighted karate club" begin | ||
g = smallgraph(:karate) | ||
w = [ | ||
0 4 5 3 3 3 3 2 2 0 2 3 1 3 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 2 0 0; | ||
4 0 6 3 0 0 0 4 0 0 0 0 0 5 0 0 0 1 0 2 0 2 0 0 0 0 0 0 0 0 2 0 0 0; | ||
5 6 0 3 0 0 0 4 5 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 0; | ||
3 3 3 0 0 0 0 3 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
3 0 0 0 0 0 2 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
3 0 0 0 0 0 5 0 0 0 3 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
3 0 0 0 2 5 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
2 4 4 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
2 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 4; | ||
0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2; | ||
2 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
3 5 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4; | ||
0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2; | ||
2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1; | ||
2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 4 0 3 0 0 5 4; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 3 0 0 0 2 0 0; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 2 0 0 0 0 0 0 7 0 0; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 2; | ||
0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 3 0 0 0 0 0 0 0 0 4; | ||
0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2; | ||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 4 0 0 0 0 0 4 2; | ||
0 2 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3; | ||
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 7 0 0 2 0 0 0 4 4; | ||
0 0 2 0 0 0 0 0 3 0 0 0 0 0 3 3 0 0 1 0 3 0 2 5 0 0 0 0 0 4 3 4 0 5; | ||
0 0 0 0 0 0 0 0 4 2 0 0 0 3 2 4 0 0 2 1 1 0 3 4 0 0 2 4 2 2 3 4 5 0 | ||
] | ||
expected_c_w = [1, 1, 1, 1, 2, 2, 2, 1, 3, 3, 2, 1, 1, 1, 3, 3, 2, 1, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] | ||
expected_q_w = 0.4345214669889994 | ||
|
||
c_w, _ = community_detection_greedy_modularity(g, weights=w) | ||
@test c_w == expected_c_w | ||
@test modularity(g,c_w, distmx=w) ≈ expected_q_w | ||
end | ||
|
||
|
||
@testset "Greedy modularity: disconnected graph" begin | ||
olegfafurin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
g = SimpleGraph(10) | ||
for i=1:5 | ||
add_edge!(g, 2*i - 1, 2*i) | ||
end | ||
c, _ = community_detection_greedy_modularity(g) | ||
q = modularity(g, c) | ||
|
||
expected_c = [1,1,2,2,3,3,4,4,5,5] | ||
expected_q = 0.8 | ||
|
||
@test c == expected_c | ||
@test q ≈ expected_q | ||
end | ||
|
||
|
||
@testset "Greedy modularity: complete graph" begin | ||
g = complete_graph(10) | ||
c, _ = community_detection_greedy_modularity(g) | ||
q = modularity(g, c) | ||
|
||
expected_c = ones(Int, 10) | ||
expected_q = 0.0 | ||
|
||
@test c == expected_c | ||
@test q ≈ expected_q | ||
end | ||
|
||
|
||
@testset "Greedy modularity: empty graph" begin | ||
g = SimpleGraph(10) | ||
c, _ = community_detection_greedy_modularity(g) | ||
q = modularity(g, c) | ||
|
||
expected_c = Vector(1:10) | ||
expected_q = 0.0 | ||
|
||
@test c == expected_c | ||
@test q ≈ expected_q | ||
end | ||
|
||
|
||
@testset "Greedy modularity: random stochastic block model graph" begin | ||
g_sbm = stochastic_block_model(99,1,[500,1000]) | ||
expected_c = [i > 500 ? 2 : 1 for i=1:1500] | ||
|
||
c, _ = community_detection_greedy_modularity(g_sbm) | ||
|
||
matches1 = sum(c .== expected_c) | ||
matches2 = sum((3 .- c) .== expected_c) # complementary cluster numbers assignment | ||
|
||
@test matches1 ≥ 0.95 * nv(g_sbm) || matches2 ≥ 0.95 * nv(g_sbm) | ||
end | ||
|
Uh oh!
There was an error while loading. Please reload this page.