From d918430c94af7c88cc4902ce5257e99385180f66 Mon Sep 17 00:00:00 2001 From: Subhajit Sahu Date: Thu, 30 Nov 2023 01:01:40 +0530 Subject: [PATCH] :bug: update louvain --- inc/louvain.hxx | 665 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 532 insertions(+), 133 deletions(-) diff --git a/inc/louvain.hxx b/inc/louvain.hxx index 6cb6234..09ab99d 100644 --- a/inc/louvain.hxx +++ b/inc/louvain.hxx @@ -1,79 +1,155 @@ #pragma once #include -#include +#include #include +#include #include "_main.hxx" #include "Graph.hxx" -#include "duplicate.hxx" #include "properties.hxx" #include "csr.hxx" -#include "modularity.hxx" - #ifdef OPENMP #include #endif -using std::pair; +using std::tuple; using std::vector; using std::make_pair; using std::move; using std::swap; +using std::get; using std::min; using std::max; -// LOUVAIN OPTIONS -// --------------- - +#pragma region TYPES +/** + * Options for Louvain algorithm. + */ struct LouvainOptions { - int repeat; + #pragma region DATA + /** Number of times to repeat the algorithm [1]. */ + int repeat; + /** Resolution parameter for modularity [1]. */ double resolution; + /** Tolerance for convergence [1e-2]. */ double tolerance; + /** Tolerance for aggregation [0.8]. */ double aggregationTolerance; + /** Tolerance drop factor after each pass [10]. */ double toleranceDrop; - int maxIterations; - int maxPasses; - + /** Maximum number of iterations per pass [20]. */ + int maxIterations; + /** Maximum number of passes [10]. */ + int maxPasses; + #pragma endregion + + + #pragma region CONSTRUCTORS + /** + * Define options for Louvain algorithm. + * @param repeat number of times to repeat the algorithm [1] + * @param resolution resolution parameter for modularity [1] + * @param tolerance tolerance for convergence [1e-2] + * @param aggregationTolerance tolerance for aggregation [0.8] + * @param toleranceDrop tolerance drop factor after each pass [10] + * @param maxIterations maximum number of iterations per pass [20] + * @param maxPasses maximum number of passes [10] + */ LouvainOptions(int repeat=1, double resolution=1, double tolerance=1e-2, double aggregationTolerance=0.8, double toleranceDrop=10, int maxIterations=20, int maxPasses=10) : repeat(repeat), resolution(resolution), tolerance(tolerance), aggregationTolerance(aggregationTolerance), toleranceDrop(toleranceDrop), maxIterations(maxIterations), maxPasses(maxPasses) {} + #pragma endregion }; -// Weight to be using in hashtable. -#define LOUVAIN_WEIGHT_TYPE double +/** Weight to be used in hashtable. */ +#define LOUVAIN_WEIGHT_TYPE double -// LOUVAIN RESULT -// -------------- -template +/** + * Result of Louvain algorithm. + * @tparam K key type (vertex-id) + * @tparam W weight type + */ +template struct LouvainResult { + #pragma region DATA + /** Community membership each vertex belongs to. */ vector membership; - int iterations; - int passes; + /** Total edge weight of each vertex. */ + vector vertexWeight; + /** Total edge weight of each community. */ + vector communityWeight; + /** Number of iterations performed. */ + int iterations; + /** Number of passes performed. */ + int passes; + /** Time spent in milliseconds. */ float time; - float preprocessingTime; + /** Time spent in milliseconds for initial marking of affected vertices. */ + float markingTime; + /** Time spent in initializing community memberships and total vertex/community weights. */ + float initializationTime; + /** Time spent in milliseconds in first pass. */ float firstPassTime; + /** Time spent in milliseconds in local-moving phase. */ float localMoveTime; + /** Time spent in milliseconds in aggregation phase. */ float aggregationTime; + /** Number of vertices initially marked as affected. */ size_t affectedVertices; - - LouvainResult(vector&& membership, int iterations=0, int passes=0, float time=0, float preprocessingTime=0, float firstPassTime=0, float localMoveTime=0, float aggregationTime=0, size_t affectedVertices=0) : - membership(membership), iterations(iterations), passes(passes), time(time), preprocessingTime(preprocessingTime), firstPassTime(firstPassTime), localMoveTime(localMoveTime), aggregationTime(aggregationTime), affectedVertices(affectedVertices) {} - - LouvainResult(vector& membership, int iterations=0, int passes=0, float time=0, float preprocessingTime=0, float firstPassTime=0, float localMoveTime=0, float aggregationTime=0, size_t affectedVertices=0) : - membership(move(membership)), iterations(iterations), passes(passes), time(time), preprocessingTime(preprocessingTime), firstPassTime(firstPassTime), localMoveTime(localMoveTime), aggregationTime(aggregationTime), affectedVertices(affectedVertices) {} + #pragma endregion + + + #pragma region CONSTRUCTORS + /** + * Result of Louvain algorithm. + * @param membership community membership each vertex belongs to + * @param vertexWeight total edge weight of each vertex + * @param communityWeight total edge weight of each community + * @param iterations number of iterations performed + * @param passes number of passes performed + * @param time time spent in milliseconds + * @param markingTime time spent in milliseconds for initial marking of affected vertices + * @param initializationTime time spent in initializing community memberships and total vertex/community weights + * @param firstPassTime time spent in milliseconds in first pass + * @param localMoveTime time spent in milliseconds in local-moving phase + * @param aggregationTime time spent in milliseconds in aggregation phase + * @param affectedVertices number of vertices initially marked as affected + */ + LouvainResult(vector&& membership, vector&& vertexWeight, vector&& communityWeight, int iterations=0, int passes=0, float time=0, float markingTime=0, float initializationTime=0, float firstPassTime=0, float localMoveTime=0, float aggregationTime=0, size_t affectedVertices=0) : + membership(membership), vertexWeight(vertexWeight), communityWeight(communityWeight), iterations(iterations), passes(passes), time(time), markingTime(markingTime), initializationTime(initializationTime), firstPassTime(firstPassTime), localMoveTime(localMoveTime), aggregationTime(aggregationTime), affectedVertices(affectedVertices) {} + + + /** + * Result of Louvain algorithm. + * @param membership community membership each vertex belongs to (moved) + * @param vertexWeight total edge weight of each vertex (moved) + * @param communityWeight total edge weight of each community (moved) + * @param iterations number of iterations performed + * @param passes number of passes performed + * @param time time spent in milliseconds + * @param markingTime time spent in milliseconds for initial marking of affected vertices + * @param initializationTime time spent in initializing community memberships and total vertex/community weights + * @param firstPassTime time spent in milliseconds in first pass + * @param localMoveTime time spent in milliseconds in local-moving phase + * @param aggregationTime time spent in milliseconds in aggregation phase + * @param affectedVertices number of vertices initially marked as affected + */ + LouvainResult(vector& membership, vector& vertexWeight, vector& communityWeight, int iterations=0, int passes=0, float time=0, float markingTime=0, float initializationTime=0, float firstPassTime=0, float localMoveTime=0, float aggregationTime=0, size_t affectedVertices=0) : + membership(move(membership)), vertexWeight(move(vertexWeight)), communityWeight(move(communityWeight)), iterations(iterations), passes(passes), time(time), markingTime(markingTime), initializationTime(initializationTime), firstPassTime(firstPassTime), localMoveTime(localMoveTime), aggregationTime(aggregationTime), affectedVertices(affectedVertices) {} + #pragma endregion }; +#pragma endregion -// LOUVAIN HASHTABLES -// ------------------ - +#pragma region METHODS +#pragma region HASHTABLES /** * Allocate a number of hashtables. * @param vcs communities vertex u is linked to (updated) @@ -103,13 +179,12 @@ inline void louvainFreeHashtablesW(vector*>& vcs, vector*>& delete vcout[i]; } } +#pragma endregion -// LOUVAIN INITIALIZE -// ------------------ - +#pragma region INITIALIZE /** * Find the total edge weight of each vertex. * @param vtot total edge weight of each vertex (updated, must be initialized) @@ -124,7 +199,13 @@ inline void louvainVertexWeightsW(vector& vtot, const G& x) { }); } + #ifdef OPENMP +/** + * Find the total edge weight of each vertex. + * @param vtot total edge weight of each vertex (updated, must be initialized) + * @param x original graph + */ template inline void louvainVertexWeightsOmpW(vector& vtot, const G& x) { using K = typename G::key_type; @@ -153,7 +234,15 @@ inline void louvainCommunityWeightsW(vector& ctot, const G& x, const vector inline void louvainCommunityWeightsOmpW(vector& ctot, const G& x, const vector& vcom, const vector& vtot) { size_t S = x.span(); @@ -183,7 +272,15 @@ inline void louvainInitializeW(vector& vcom, vector& ctot, const G& x, con }); } + #ifdef OPENMP +/** + * Initialize communities such that each vertex is its own community. + * @param vcom community each vertex belongs to (updated, must be initialized) + * @param ctot total edge weight of each community (updated, must be initialized) + * @param x original graph + * @param vtot total edge weight of each vertex + */ template inline void louvainInitializeOmpW(vector& vcom, vector& ctot, const G& x, const vector& vtot) { size_t S = x.span(); @@ -201,26 +298,35 @@ inline void louvainInitializeOmpW(vector& vcom, vector& ctot, const G& x, * Initialize communities from given initial communities. * @param vcom community each vertex belongs to (updated, must be initialized) * @param ctot total edge weight of each community (updated, must be initialized) - * @param x original graph + * @param y updated graph * @param vtot total edge weight of each vertex * @param q initial community each vertex belongs to */ template -inline void louvainInitializeFromW(vector& vcom, vector& ctot, const G& x, const vector& vtot, const vector& q) { - x.forEachVertexKey([&](auto u) { +inline void louvainInitializeFromW(vector& vcom, vector& ctot, const G& y, const vector& vtot, const vector& q) { + y.forEachVertexKey([&](auto u) { K c = q[u]; vcom[u] = c; ctot[c] += vtot[u]; }); } + #ifdef OPENMP +/** + * Initialize communities from given initial communities. + * @param vcom community each vertex belongs to (updated, must be initialized) + * @param ctot total edge weight of each community (updated, must be initialized) + * @param y updated graph + * @param vtot total edge weight of each vertex + * @param q initial community each vertex belongs to + */ template -inline void louvainInitializeFromOmpW(vector& vcom, vector& ctot, const G& x, const vector& vtot, const vector& q) { - size_t S = x.span(); +inline void louvainInitializeFromOmpW(vector& vcom, vector& ctot, const G& y, const vector& vtot, const vector& q) { + size_t S = y.span(); #pragma omp parallel for schedule(static, 2048) for (K u=0; u& vcom, vector& ctot, const G& #endif +/** + * Update weights using given edge deletions and insertions. + * @param vtot total edge weight of each vertex (updated) + * @param ctot total edge weight of each community (updated) + * @param y updated graph + * @param deletions edge deletions in batch update + * @param insertions edge insertions in batch update + * @param vcom community each vertex belongs to + */ +template +inline void louvainUpdateWeightsFromU(vector& vtot, vector& ctot, const G& y, const vector>& deletions, const vector>& insertions, const vector& vcom) { + for (auto [u, v, w] : deletions) { + K c = vcom[u]; + vtot[u] -= w; + ctot[c] -= w; + } + for (auto [u, v, w] : insertions) { + K c = vcom[u]; + vtot[u] += w; + ctot[c] += w; + } +} + + +#ifdef OPENMP +/** + * Update weights using given edge deletions and insertions. + * @param vtot total edge weight of each vertex (updated) + * @param ctot total edge weight of each community (updated) + * @param y updated graph + * @param deletions edge deletions in batch update + * @param insertions edge insertions in batch update + * @param vcom community each vertex belongs to + */ +template +inline void louvainUpdateWeightsFromOmpU(vector& vtot, vector& ctot, const G& y, const vector>& deletions, const vector>& insertions, const vector& vcom) { + #pragma omp parallel + { + for (auto [u, v, w] : deletions) { + K c = vcom[u]; + if (belongsOmp(u)) vtot[u] -= w; + if (belongsOmp(c)) ctot[c] -= w; + } + for (auto [u, v, w] : insertions) { + K c = vcom[u]; + if (belongsOmp(u)) vtot[u] += w; + if (belongsOmp(c)) ctot[c] += w; + } + } +} +#endif +#pragma endregion + -// LOUVAIN CHANGE COMMUNITY -// ------------------------ +#pragma region CHANGE COMMUNITY /** * Scan an edge community connected to a vertex. * @param vcs communities vertex u is linked to (updated) @@ -323,7 +481,17 @@ inline void louvainChangeCommunityW(vector& vcom, vector& ctot, const G& x vcom[u] = c; } + #ifdef OPENMP +/** + * Move vertex to another community C. + * @param vcom community each vertex belongs to (updated) + * @param ctot total edge weight of each community (updated) + * @param x original graph + * @param u given vertex + * @param c community to move to + * @param vtot total edge weight of each vertex + */ template inline void louvainChangeCommunityOmpW(vector& vcom, vector& ctot, const G& x, K u, K c, const vector& vtot) { K d = vcom[u]; @@ -334,13 +502,12 @@ inline void louvainChangeCommunityOmpW(vector& vcom, vector& ctot, const G vcom[u] = c; } #endif +#pragma endregion -// LOUVAIN MOVE -// ------------ - +#pragma region LOCAL-MOVING PHASE /** * Louvain algorithm's local moving phase. * @param vcom community each vertex belongs to (initial, updated) @@ -354,16 +521,17 @@ inline void louvainChangeCommunityOmpW(vector& vcom, vector& ctot, const G * @param R resolution (0, 1] * @param L max iterations * @param fc has local moving phase converged? + * @param fa is vertex allowed to be updated? * @returns iterations performed (0 if converged already) */ -template -inline int louvainMoveW(vector& vcom, vector& ctot, vector& vaff, vector& vcs, vector& vcout, const G& x, const vector& vtot, double M, double R, int L, FC fc) { +template +inline int louvainMoveW(vector& vcom, vector& ctot, vector& vaff, vector& vcs, vector& vcout, const G& x, const vector& vtot, double M, double R, int L, FC fc, FA fa) { int l = 0; W el = W(); for (; l& vcom, vector& ctot, vector& vaff, vecto return l>1 || el? l : 0; } -#ifdef OPENMP + +/** + * Louvain algorithm's local moving phase. + * @param vcom community each vertex belongs to (initial, updated) + * @param ctot total edge weight of each community (precalculated, updated) + * @param vaff is vertex affected flag (updated) + * @param vcs communities vertex u is linked to (temporary buffer, updated) + * @param vcout total edge weight from vertex u to community C (temporary buffer, updated) + * @param x original graph + * @param vtot total edge weight of each vertex + * @param M total weight of "undirected" graph (1/2 of directed graph) + * @param R resolution (0, 1] + * @param L max iterations + * @param fc has local moving phase converged? + * @returns iterations performed (0 if converged already) + */ template -inline int louvainMoveOmpW(vector& vcom, vector& ctot, vector& vaff, vector*>& vcs, vector*>& vcout, const G& x, const vector& vtot, double M, double R, int L, FC fc) { +inline int louvainMoveW(vector& vcom, vector& ctot, vector& vaff, vector& vcs, vector& vcout, const G& x, const vector& vtot, double M, double R, int L, FC fc) { + auto fa = [](auto u) { return true; }; + return louvainMoveW(vcom, ctot, vaff, vcs, vcout, x, vtot, M, R, L, fc, fa); +} + + +#ifdef OPENMP +/** + * Louvain algorithm's local moving phase. + * @param vcom community each vertex belongs to (initial, updated) + * @param ctot total edge weight of each community (precalculated, updated) + * @param vaff is vertex affected flag (updated) + * @param vcs communities vertex u is linked to (temporary buffer, updated) + * @param vcout total edge weight from vertex u to community C (temporary buffer, updated) + * @param x original graph + * @param vtot total edge weight of each vertex + * @param M total weight of "undirected" graph (1/2 of directed graph) + * @param R resolution (0, 1] + * @param L max iterations + * @param fc has local moving phase converged? + * @param fa is vertex allowed to be updated? + * @returns iterations performed (0 if converged already) + */ +template +inline int louvainMoveOmpW(vector& vcom, vector& ctot, vector& vaff, vector*>& vcs, vector*>& vcout, const G& x, const vector& vtot, double M, double R, int L, FC fc, FA fa) { size_t S = x.span(); int l = 0; W el = W(); @@ -388,7 +595,7 @@ inline int louvainMoveOmpW(vector& vcom, vector& ctot, vector& vaff, ve for (K u=0; u& vcom, vector& ctot, vector& vaff, ve } return l>1 || el? l : 0; } -#endif +/** + * Louvain algorithm's local moving phase. + * @param vcom community each vertex belongs to (initial, updated) + * @param ctot total edge weight of each community (precalculated, updated) + * @param vaff is vertex affected flag (updated) + * @param vcs communities vertex u is linked to (temporary buffer, updated) + * @param vcout total edge weight from vertex u to community C (temporary buffer, updated) + * @param x original graph + * @param vtot total edge weight of each vertex + * @param M total weight of "undirected" graph (1/2 of directed graph) + * @param R resolution (0, 1] + * @param L max iterations + * @param fc has local moving phase converged? + * @returns iterations performed (0 if converged already) + */ +template +inline int louvainMoveOmpW(vector& vcom, vector& ctot, vector& vaff, vector*>& vcs, vector*>& vcout, const G& x, const vector& vtot, double M, double R, int L, FC fc) { + auto fa = [](auto u) { return true; }; + return louvainMoveOmpW(vcom, ctot, vaff, vcs, vcout, x, vtot, M, R, L, fc, fa); +} +#endif +#pragma endregion + -// LOUVAIN COMMUNITY PROPERTIES -// ---------------------------- +#pragma region COMMUNITY PROPERTIES /** * Examine if each community exists. * @param a does each community exist (updated) @@ -427,7 +655,15 @@ inline size_t louvainCommunityExistsW(vector& a, const G& x, const vector& return C; } + #ifdef OPENMP +/** + * Examine if each community exists. + * @param a does each community exist (updated) + * @param x original graph + * @param vcom community each vertex belongs to + * @returns number of communities + */ template inline size_t louvainCommunityExistsOmpW(vector& a, const G& x, const vector& vcom) { size_t S = x.span(); @@ -464,7 +700,14 @@ inline void louvainCommunityTotalDegreeW(vector& a, const G& x, const vector< }); } + #ifdef OPENMP +/** + * Find the total degree of each community. + * @param a total degree of each community (updated) + * @param x original graph + * @param vcom community each vertex belongs to + */ template inline void louvainCommunityTotalDegreeOmpW(vector& a, const G& x, const vector& vcom) { size_t S = x.span(); @@ -497,7 +740,14 @@ inline void louvainCountCommunityVerticesW(vector& a, const G& x, const vecto }); } + #ifdef OPENMP +/** + * Find the number of vertices in each community. + * @param a number of vertices belonging to each community (updated) + * @param x original graph + * @param vcom community each vertex belongs to + */ template inline void louvainCountCommunityVerticesOmpW(vector& a, const G& x, const vector& vcom) { size_t S = x.span(); @@ -535,7 +785,17 @@ inline void louvainCommunityVerticesW(vector& coff, vector& cdeg, vector inline void louvainCommunityVerticesOmpW(vector& coff, vector& cdeg, vector& cedg, vector& bufk, const G& x, const vector& vcom) { size_t S = x.span(); @@ -551,13 +811,12 @@ inline void louvainCommunityVerticesOmpW(vector& coff, vector& cdeg, vecto } } #endif +#pragma endregion -// LOUVAIN LOOKUP COMMUNITIES -// -------------------------- - +#pragma region LOOKUP COMMUNITIES /** * Update community membership in a tree-like fashion (to handle aggregation). * @param a output community each vertex belongs to (updated) @@ -569,7 +828,13 @@ inline void louvainLookupCommunitiesU(vector& a, const vector& vcom) { v = vcom[v]; } + #ifdef OPENMP +/** + * Update community membership in a tree-like fashion (to handle aggregation). + * @param a output community each vertex belongs to (updated) + * @param vcom community each vertex belongs to (at this aggregation level) + */ template inline void louvainLookupCommunitiesOmpU(vector& a, const vector& vcom) { size_t S = a.size(); @@ -578,13 +843,12 @@ inline void louvainLookupCommunitiesOmpU(vector& a, const vector& vcom) { a[u] = vcom[a[u]]; } #endif +#pragma endregion -// LOUVAIN AGGREGATE -// ----------------- - +#pragma region AGGREGATION PHASE /** * Aggregate outgoing edges of each community. * @param ydeg degree of each community (updated) @@ -614,7 +878,21 @@ inline void louvainAggregateEdgesW(vector& ydeg, vector& yedg, vector& } } + #ifdef OPENMP +/** + * Aggregate outgoing edges of each community. + * @param ydeg degree of each community (updated) + * @param yedg vertex ids of outgoing edges of each community (updated) + * @param ywei weights of outgoing edges of each community (updated) + * @param vcs communities vertex u is linked to (temporary buffer, updated) + * @param vcout total edge weight from vertex u to community C (temporary buffer, updated) + * @param x original graph + * @param vcom community each vertex belongs to + * @param coff offsets for vertices belonging to each community + * @param cedg vertices belonging to each community + * @param yoff offsets for vertices belonging to each community + */ template inline void louvainAggregateEdgesOmpW(vector& ydeg, vector& yedg, vector& ywei, vector*>& vcs, vector*>& vcout, const G& x, const vector& vcom, const vector& coff, const vector& cedg, const vector& yoff) { size_t C = coff.size() - 1; @@ -649,7 +927,16 @@ inline size_t louvainRenumberCommunitiesW(vector& vcom, vector& cext, cons return C; } + #ifdef OPENMP +/** + * Re-number communities such that they are numbered 0, 1, 2, ... + * @param vcom community each vertex belongs to (updated) + * @param cext does each community exist (updated) + * @param bufk buffer for exclusive scan of size |threads| (scratch) + * @param x original graph + * @returns number of communities + */ template inline size_t louvainRenumberCommunitiesOmpW(vector& vcom, vector& cext, vector& bufk, const G& x) { size_t C = exclusiveScanOmpW(cext, bufk, cext); @@ -680,7 +967,22 @@ inline void louvainAggregateW(vector& yoff, vector& ydeg, vector& louvainAggregateEdgesW(ydeg, yedg, ywei, vcs, vcout, x, vcom, coff, cedg, yoff); } + #ifdef OPENMP +/** + * Louvain algorithm's community aggregation phase. + * @param yoff offsets for vertices belonging to each community (updated) + * @param ydeg degree of each community (updated) + * @param yedg vertex ids of outgoing edges of each community (updated) + * @param ywei weights of outgoing edges of each community (updated) + * @param bufs buffer for exclusive scan of size |threads| (scratch) + * @param vcs communities vertex u is linked to (temporary buffer, updated) + * @param vcout total edge weight from vertex u to community C (temporary buffer, updated) + * @param x original graph + * @param vcom community each vertex belongs to + * @param coff offsets for vertices belonging to each community + * @param cedg vertices belonging to each community + */ template inline void louvainAggregateOmpW(vector& yoff, vector& ydeg, vector& yedg, vector& ywei, vector& bufs, vector*>& vcs, vector*>& vcout, const G& x, const vector& vcom, vector& coff, vector& cedg) { size_t C = coff.size() - 1; @@ -689,65 +991,81 @@ inline void louvainAggregateOmpW(vector& yoff, vector& ydeg, vector -auto louvainSeq(const G& x, const vector *q, const LouvainOptions& o, FM fm) { +template +inline auto louvainInvoke(const G& x, const LouvainOptions& o, FI fi, FM fm, FA fa) { + using K = typename G::key_type; using W = LOUVAIN_WEIGHT_TYPE; using B = FLAG; + // Options. double R = o.resolution; int L = o.maxIterations, l = 0; int P = o.maxPasses, p = 0; + // Get graph properties. size_t X = x.size(); - size_t S = x.span(), naff = 0; + size_t S = x.span(); double M = edgeWeight(x)/2; - vector vaff(S); - vector vcom(S), a(S); - vector vtot(S), ctot(S); - vector vcs; - vector vcout(S); + // Allocate buffers. + vector vaff(S); // Affected vertex flag (any pass) + vector ucom, vcom(S); // Community membership (first pass, current pass) + vector utot, vtot(S); // Total vertex weights (first pass, current pass) + vector ctot; // Total community weights (any pass) + vector vcs; // Hashtable keys + vector vcout(S); // Hashtable values + if (!DYNAMIC) ucom.resize(S); + if (!DYNAMIC) utot.resize(S); + if (!DYNAMIC) ctot.resize(S); size_t Z = max(size_t(o.aggregationTolerance * X), X); size_t Y = max(size_t(o.aggregationTolerance * Z), Z); - DiGraphCsr cv(S, S); - DiGraphCsr y(S, Y); // y(S, X) - DiGraphCsr z(S, Z); // z(S, X) - float tm = 0, tp = 0, tl = 0, ta = 0; + DiGraphCsr cv(S, S); // CSR for community vertices + DiGraphCsr y(S, Y); // CSR for aggregated graph (input); y(S, X) + DiGraphCsr z(S, Z); // CSR for aggregated graph (output); z(S, X) + // Perform Louvain algorithm. + float tm = 0, ti = 0, tp = 0, tl = 0, ta = 0; // Time spent in different phases float t = measureDurationMarked([&](auto mark) { double E = o.tolerance; auto fc = [&](double el, int l) { return el<=E; }; + // Reset buffers, in case of multiple runs. + fillValueU(vaff, B()); + fillValueU(ucom, K()); fillValueU(vcom, K()); + fillValueU(utot, W()); fillValueU(vtot, W()); fillValueU(ctot, W()); - fillValueU(a, K()); cv.respan(S); y .respan(S); z .respan(S); + // Time the algorithm. mark([&]() { - tm += measureDuration([&]() { fm(vaff); }); - naff = sumValues(vaff, size_t()); + // Initialize community membership and total vertex/community weights. + ti += measureDuration([&]() { fi(ucom, utot, ctot); }); + // Mark affected vertices. + tm += measureDuration([&]() { fm(vaff, vcs, vcout, ucom, utot, ctot); }); + // Start timing first pass. auto t0 = timeNow(), t1 = t0; - louvainVertexWeightsW(vtot, x); - if (q) louvainInitializeFromW(vcom, ctot, x, vtot, *q); - else louvainInitializeW(vcom, ctot, x, vtot); + // Start local-moving, aggregation phases. + // NOTE: In first pass, the input graph is a DiGraph. + // NOTE: For subsequent passes, the input graph is a DiGraphCsr (optimization). for (l=0, p=0; M>0 && P>0;) { if (p==1) t1 = timeNow(); bool isFirst = p==0; int m = 0; tl += measureDuration([&]() { - if (isFirst) m = louvainMoveW(vcom, ctot, vaff, vcs, vcout, x, vtot, M, R, L, fc); + if (isFirst) m = louvainMoveW(ucom, ctot, vaff, vcs, vcout, x, utot, M, R, L, fc, fa); else m = louvainMoveW(vcom, ctot, vaff, vcs, vcout, y, vtot, M, R, L, fc); }); l += max(m, 1); ++p; @@ -755,18 +1073,18 @@ auto louvainSeq(const G& x, const vector *q, const LouvainOptions& o, FM fm) size_t GN = isFirst? x.order() : y.order(); size_t GS = isFirst? x.span() : y.span(); size_t CN = 0; - if (isFirst) CN = louvainCommunityExistsW(cv.degrees, x, vcom); + if (isFirst) CN = louvainCommunityExistsW(cv.degrees, x, ucom); else CN = louvainCommunityExistsW(cv.degrees, y, vcom); if (double(CN)/GN >= o.aggregationTolerance) break; - if (isFirst) louvainRenumberCommunitiesW(vcom, cv.degrees, x); + if (isFirst) louvainRenumberCommunitiesW(ucom, cv.degrees, x); else louvainRenumberCommunitiesW(vcom, cv.degrees, y); - if (isFirst) copyValuesW(a, vcom); - else louvainLookupCommunitiesU(a, vcom); + if (isFirst) {} + else louvainLookupCommunitiesU(ucom, vcom); cv.respan(CN); z.respan(CN); - if (isFirst) louvainCommunityVerticesW(cv.offsets, cv.degrees, cv.edgeKeys, x, vcom); + if (isFirst) louvainCommunityVerticesW(cv.offsets, cv.degrees, cv.edgeKeys, x, ucom); else louvainCommunityVerticesW(cv.offsets, cv.degrees, cv.edgeKeys, y, vcom); ta += measureDuration([&]() { - if (isFirst) louvainAggregateW(z.offsets, z.degrees, z.edgeKeys, z.edgeValues, vcs, vcout, x, vcom, cv.offsets, cv.edgeKeys); + if (isFirst) louvainAggregateW(z.offsets, z.degrees, z.edgeKeys, z.edgeValues, vcs, vcout, x, ucom, cv.offsets, cv.edgeKeys); else louvainAggregateW(z.offsets, z.degrees, z.edgeKeys, z.edgeValues, vcs, vcout, y, vcom, cv.offsets, cv.edgeKeys); }); swap(y, z); @@ -778,64 +1096,90 @@ auto louvainSeq(const G& x, const vector *q, const LouvainOptions& o, FM fm) louvainInitializeW(vcom, ctot, y, vtot); E /= o.toleranceDrop; } - if (p<=1) copyValuesW(a, vcom); - else louvainLookupCommunitiesU(a, vcom); + if (p<=1) {} + else louvainLookupCommunitiesU(ucom, vcom); if (p<=1) t1 = timeNow(); tp += duration(t0, t1); }); }, o.repeat); - return LouvainResult(a, l, p, t, tm/o.repeat, tp/o.repeat, tl/o.repeat, ta/o.repeat, naff); + return LouvainResult(ucom, utot, ctot, l, p, t, tm/o.repeat, ti/o.repeat, tp/o.repeat, tl/o.repeat, ta/o.repeat, countValue(vaff, B(1))); } + #ifdef OPENMP -template -auto louvainOmp(const G& x, const vector *q, const LouvainOptions& o, FM fm) { +/** + * Setup and perform the Louvain algorithm. + * @param x original graph + * @param o louvain options + * @param fi initializing community membership and total vertex/community weights (vcom, vtot, ctot) + * @param fm marking affected vertices (vaff, vcs, vcout, vcom, vtot, ctot) + * @param fa is vertex allowed to be updated? (u) + * @returns louvain result + */ +template +inline auto louvainInvokeOmp(const G& x, const LouvainOptions& o, FI fi, FM fm, FA fa) { + using K = typename G::key_type; using W = LOUVAIN_WEIGHT_TYPE; using B = FLAG; + // Options. double R = o.resolution; int L = o.maxIterations, l = 0; int P = o.maxPasses, p = 0; + // Get graph properties. size_t X = x.size(); - size_t S = x.span(), naff = 0; + size_t S = x.span(); double M = edgeWeightOmp(x)/2; + // Allocate buffers. int T = omp_get_max_threads(); - vector vaff(S); - vector vcom(S), a(S); - vector vtot(S), ctot(S); - vector bufk(T); - vector bufs(T); - vector*> vcs(T); - vector*> vcout(T); + vector vaff(S); // Affected vertex flag (any pass) + vector ucom, vcom(S); // Community membership (first pass, current pass) + vector utot, vtot(S); // Total vertex weights (first pass, current pass) + vector ctot; // Total community weights (any pass) + vector bufk(T); // Buffer for exclusive scan + vector bufs(T); // Buffer for exclusive scan + vector*> vcs(T); // Hashtable keys + vector*> vcout(T); // Hashtable values + if (!DYNAMIC) ucom.resize(S); + if (!DYNAMIC) utot.resize(S); + if (!DYNAMIC) ctot.resize(S); louvainAllocateHashtablesW(vcs, vcout, S); size_t Z = max(size_t(o.aggregationTolerance * X), X); size_t Y = max(size_t(o.aggregationTolerance * Z), Z); - DiGraphCsr cv(S, S); - DiGraphCsr y(S, Y); // y(S, X) - DiGraphCsr z(S, Z); // z(S, X) - float tm = 0, tp = 0, tl = 0, ta = 0; + DiGraphCsr cv(S, S); // CSR for community vertices + DiGraphCsr y(S, Y); // CSR for aggregated graph (input); y(S, X) + DiGraphCsr z(S, Z); // CSR for aggregated graph (output); z(S, X) + // Perform Louvain algorithm. + float tm = 0, ti = 0, tp = 0, tl = 0, ta = 0; // Time spent in different phases float t = measureDurationMarked([&](auto mark) { double E = o.tolerance; auto fc = [&](double el, int l) { return el<=E; }; + // Reset buffers, in case of multiple runs. + fillValueOmpU(vaff, B()); + fillValueOmpU(ucom, K()); fillValueOmpU(vcom, K()); + fillValueOmpU(utot, W()); fillValueOmpU(vtot, W()); fillValueOmpU(ctot, W()); - fillValueOmpU(a, K()); cv.respan(S); y .respan(S); z .respan(S); + // Time the algorithm. mark([&]() { - tm += measureDuration([&]() { fm(vaff); }); - naff = sumValuesOmp(vaff, size_t()); + // Initialize community membership and total vertex/community weights. + ti += measureDuration([&]() { fi(ucom, utot, ctot); }); + // Mark affected vertices. + tm += measureDuration([&]() { fm(vaff, vcs, vcout, ucom, utot, ctot); }); + // Start timing first pass. auto t0 = timeNow(), t1 = t0; - louvainVertexWeightsOmpW(vtot, x); - if (q) louvainInitializeFromOmpW(vcom, ctot, x, vtot, *q); - else louvainInitializeOmpW(vcom, ctot, x, vtot); + // Start local-moving, aggregation phases. + // NOTE: In first pass, the input graph is a DiGraph. + // NOTE: For subsequent passes, the input graph is a DiGraphCsr (optimization). for (l=0, p=0; M>0 && P>0;) { if (p==1) t1 = timeNow(); bool isFirst = p==0; int m = 0; tl += measureDuration([&]() { - if (isFirst) m = louvainMoveOmpW(vcom, ctot, vaff, vcs, vcout, x, vtot, M, R, L, fc); + if (isFirst) m = louvainMoveOmpW(ucom, ctot, vaff, vcs, vcout, x, utot, M, R, L, fc, fa); else m = louvainMoveOmpW(vcom, ctot, vaff, vcs, vcout, y, vtot, M, R, L, fc); }); l += max(m, 1); ++p; @@ -843,18 +1187,18 @@ auto louvainOmp(const G& x, const vector *q, const LouvainOptions& o, FM fm) size_t GN = isFirst? x.order() : y.order(); size_t GS = isFirst? x.span() : y.span(); size_t CN = 0; - if (isFirst) CN = louvainCommunityExistsOmpW(cv.degrees, x, vcom); + if (isFirst) CN = louvainCommunityExistsOmpW(cv.degrees, x, ucom); else CN = louvainCommunityExistsOmpW(cv.degrees, y, vcom); if (double(CN)/GN >= o.aggregationTolerance) break; - if (isFirst) louvainRenumberCommunitiesOmpW(vcom, cv.degrees, bufk, x); + if (isFirst) louvainRenumberCommunitiesOmpW(ucom, cv.degrees, bufk, x); else louvainRenumberCommunitiesOmpW(vcom, cv.degrees, bufk, y); - if (isFirst) copyValuesOmpW(a, vcom); - else louvainLookupCommunitiesOmpU(a, vcom); + if (isFirst) {} + else louvainLookupCommunitiesOmpU(ucom, vcom); cv.respan(CN); z.respan(CN); - if (isFirst) louvainCommunityVerticesOmpW(cv.offsets, cv.degrees, cv.edgeKeys, bufk, x, vcom); + if (isFirst) louvainCommunityVerticesOmpW(cv.offsets, cv.degrees, cv.edgeKeys, bufk, x, ucom); else louvainCommunityVerticesOmpW(cv.offsets, cv.degrees, cv.edgeKeys, bufk, y, vcom); ta += measureDuration([&]() { - if (isFirst) louvainAggregateOmpW(z.offsets, z.degrees, z.edgeKeys, z.edgeValues, bufs, vcs, vcout, x, vcom, cv.offsets, cv.edgeKeys); + if (isFirst) louvainAggregateOmpW(z.offsets, z.degrees, z.edgeKeys, z.edgeValues, bufs, vcs, vcout, x, ucom, cv.offsets, cv.edgeKeys); else louvainAggregateOmpW(z.offsets, z.degrees, z.edgeKeys, z.edgeValues, bufs, vcs, vcout, y, vcom, cv.offsets, cv.edgeKeys); }); swap(y, z); @@ -866,33 +1210,88 @@ auto louvainOmp(const G& x, const vector *q, const LouvainOptions& o, FM fm) louvainInitializeOmpW(vcom, ctot, y, vtot); E /= o.toleranceDrop; } - if (p<=1) copyValuesOmpW(a, vcom); - else louvainLookupCommunitiesOmpU(a, vcom); + if (p<=1) {} + else louvainLookupCommunitiesOmpU(ucom, vcom); if (p<=1) t1 = timeNow(); tp += duration(t0, t1); }); }, o.repeat); louvainFreeHashtablesW(vcs, vcout); - return LouvainResult(a, l, p, t, tm/o.repeat, tp/o.repeat, tl/o.repeat, ta/o.repeat, naff); + return LouvainResult(ucom, utot, ctot, l, p, t, tm/o.repeat, ti/o.repeat, tp/o.repeat, tl/o.repeat, ta/o.repeat, countValueOmp(vaff, B(1))); } #endif +#pragma endregion + + +#pragma region REPEAT SETUP (DYNAMIC) +/** + * Setup the Dynamic Louvain algorithm for multiple runs. + * @param qs initial community membership for each run (updated) + * @param qvtots initial total vertex weights for each run (updated) + * @param qctots initial total community weights for each run (updated) + * @param q initial community membership + * @param qvtot initial total vertex weights + * @param qctot initial total community weights + * @param repeat number of runs + */ +template +inline void louvainSetupInitialsW(vector2d& qs, vector2d& qvtots, vector2d& qctots, const vector& q, const vector& qvtot, const vector& qctot, int repeat) { + qs .resize(repeat); + qvtots.resize(repeat); + qctots.resize(repeat); + for (int r=0; r -inline auto louvainStaticSeq(const G& x, const vector* q=nullptr, const LouvainOptions& o={}) { - auto fm = [](auto& vertices) { fillValueU(vertices, FLAG(1)); }; - return louvainSeq(x, q, o, fm); +#pragma region STATIC APPROACH +/** + * Obtain the community membership of each vertex with Static Louvain. + * @param x original graph + * @param o louvain options + * @returns louvain result + */ +template +inline auto louvainStatic(const G& x, const LouvainOptions& o={}) { + auto fi = [&](auto& vcom, auto& vtot, auto& ctot) { + louvainVertexWeightsW(vtot, x); + louvainInitializeW(vcom, ctot, x, vtot); + }; + auto fm = [ ](auto& vaff, const auto& vcom, const auto& vtot, const auto& ctot, auto& vcs, auto& vcout) { + fillValueU(vaff, FLAG(1)); + }; + auto fa = [ ](auto u) { return true; }; + return louvainInvoke(x, o, fi, fm, fa); } + #ifdef OPENMP -template -inline auto louvainStaticOmp(const G& x, const vector* q=nullptr, const LouvainOptions& o={}) { - auto fm = [](auto& vertices) { fillValueOmpU(vertices, FLAG(1)); }; - return louvainOmp(x, q, o, fm); +/** + * Obtain the community membership of each vertex with Static Louvain. + * @param x original graph + * @param o louvain options + * @returns louvain result + */ +template +inline auto louvainStaticOmp(const G& x, const LouvainOptions& o={}) { + auto fi = [&](auto& vcom, auto& vtot, auto& ctot) { + louvainVertexWeightsOmpW(vtot, x); + louvainInitializeOmpW(vcom, ctot, x, vtot); + }; + auto fm = [ ](auto& vaff, const auto& vcom, const auto& vtot, const auto& ctot, auto& vcs, auto& vcout) { + fillValueOmpU(vaff, FLAG(1)); + }; + auto fa = [ ](auto u) { return true; }; + return louvainInvokeOmp(x, o, fi, fm, fa); } #endif +#pragma endregion +#pragma endregion