Skip to content

Commit

Permalink
🐛 read ground truth communities
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfram77 committed Jul 6, 2023
1 parent 5a33d96 commit b3e5e39
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 17 deletions.
92 changes: 84 additions & 8 deletions main.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,83 @@ using namespace std;
// HELPERS
// -------

template <class T>
inline auto minMaxAverageSize(const vector2d<T>& xs) {
size_t smin = numeric_limits<size_t>::min();
size_t smax = 0, ssum = 0;
for (const auto& x : xs) {
smin = min(smin, x.size());
smax = max(smax, x.size());
ssum += x.size();
}
return make_tuple(smin, smax, double(ssum) / xs.size());
}


template <class G, class K>
inline double getModularity(const G& x, const LouvainResult<K>& a, double M) {
auto fc = [&](auto u) { return a.membership[u]; };
return modularityByOmp(x, fc, M, 1.0);
}


template <class K>
inline void readGroundTruthCommunitiesOmpW(vector2d<K>& comv, const char *pth) {
ifstream s(pth);
readLinesOmpDo(s, [&](auto c, const auto& line) {
const char *str = line.c_str();
while (1) {
const char *ptr = str;
size_t u = strtoull(str, (char**) &ptr, 10);
if (ptr==str) break;
str = ptr;
comv[c].push_back(K(u));
}
sortValues(comv[c]);
});
}


template <class G, class K>
inline vector2d<K> communityMembershipsOmp(const G& x, const vector2d<K>& comv) {
vector2d<K> a(x.span());
#pragma omp parallel
{
for (size_t c=0; c<comv.size(); ++c) {
for (auto u : comv[c])
if (belongsOmp(u)) a[u].push_back(K(c));
}
}
return a;
}


template <class G, class K>
inline vector2d<K> communityVerticesOmp(const G& x, const vector<K>& vcom) {
size_t S = x.span();
vector2d<K> a(S);
#pragma omp parallel
{
for (size_t u=0; u<x.span(); ++u) {
if (!x.hasVertex(u)) continue;
K c = vcom[u];
if (belongsOmp(c)) a[c].push_back(K(u));
}
}
#pragma omp parallel for schedule(dynamic, 2048)
for (size_t i=0; i<a.size(); ++i)
sortValues(a[i]);
return a;
}




// PERFORM EXPERIMENT
// ------------------

template <class G>
void runExperiment(const G& x) {
using K = typename G::key_type;
template <class G, class K>
void runExperiment(const G& x, const vector2d<K>& gtcomv, const vector2d<K>& gtvcom) {
using V = typename G::edge_value_type;
int repeat = REPEAT_METHOD;
int retries = 5;
Expand All @@ -70,18 +132,32 @@ int main(int argc, char **argv) {
using K = uint32_t;
using V = TYPE;
install_sigsegv();
char *file = argv[1];
size_t span = argc>2? stoull(argv[2]) : 0;
bool weighted = argc>3? stoi(argv[3]) : false;
bool symmetric = argc>4? stoi(argv[4]) : false;
char *file = argv[1];
size_t span = argc>2? stoull(argv[2]) : 0;
bool weighted = argc>3? stoi(argv[3]) : false;
bool symmetric = argc>4? stoi(argv[4]) : false;
char *groundTruth = argc>5? argv[5] : nullptr;
omp_set_num_threads(MAX_THREADS);
LOG("OMP_NUM_THREADS=%d\n", MAX_THREADS);
LOG("Loading graph %s ...\n", file);
DiGraph<K, None, V> x;
if (span) x.respan(span);
readEdgelistOmpW(x, file, weighted); LOG(""); println(x);
if (!symmetric) { x = symmetricizeOmp(x); LOG(""); print(x); printf(" (symmetricize)\n"); }
runExperiment(x);
vector2d<K> gtcomv, gtvcom;
if (groundTruth) {
gtcomv.resize(2 * x.span());
LOG("Loading ground truth communities %s ...\n", groundTruth);
readGroundTruthCommunitiesOmpW(gtcomv, groundTruth);
LOG("Loaded %zu ground truth communities.\n", gtcomv.size());
auto [cmin, cmax, cavg] = minMaxAverageSize(gtcomv);
LOG("Community size distribution: min=%zu, max=%zu, avg=%.2f.\n", cmin, cmax, cavg);
gtvcom = communityMembershipsOmp(x, gtcomv);
LOG("Obtained ground truth communities each vertex belongs to.\n");
auto [vmin, vmax, vavg] = minMaxAverageSize(gtvcom);
LOG("Vertex membership distribution: min=%zu, max=%zu, avg=%.2f.\n", vmin, vmax, vavg);
}
runExperiment(x, gtcomv, gtvcom);
printf("\n");
return 0;
}
8 changes: 4 additions & 4 deletions main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ DEFINES=(""

# Run
g++ ${DEFINES[*]} -std=c++17 -O3 -fopenmp main.cxx
# stdbuf --output=L ./a.out ~/Graphs/TYPES/communities/com-dblp.ungraph.txt 0 0 0 2>&1 | tee -a "$out"
stdbuf --output=L ./a.out ~/Graphs/TYPES/communities/com-lj.ungraph.txt 0 0 0 2>&1 | tee -a "$out"
stdbuf --output=L ./a.out ~/Graphs/TYPES/communities/com-orkut.ungraph.txt 0 0 0 2>&1 | tee -a "$out"
stdbuf --output=L ./a.out ~/Graphs/TYPES/communities/com-friendster.ungraph.txt 0 0 0 2>&1 | tee -a "$out"
stdbuf --output=L ./a.out ~/Graphs/TYPES/communities/com-dblp.ungraph.txt 0 0 0 ~/Graphs/TYPES/communities/com-dblp.all.cmty.txt 2>&1 | tee -a "$out"
stdbuf --output=L ./a.out ~/Graphs/TYPES/communities/com-lj.ungraph.txt 0 0 0 ~/Graphs/TYPES/communities/com-lj.all.cmty.txt 2>&1 | tee -a "$out"
stdbuf --output=L ./a.out ~/Graphs/TYPES/communities/com-orkut.ungraph.txt 0 0 0 ~/Graphs/TYPES/communities/com-orkut.all.cmty.txt 2>&1 | tee -a "$out"
stdbuf --output=L ./a.out ~/Graphs/TYPES/communities/com-friendster.ungraph.txt 0 0 0 ~/Graphs/TYPES/communities/com-friendster.all.cmty.txt 2>&1 | tee -a "$out"
11 changes: 6 additions & 5 deletions src/_iostream.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ using std::cout;
/**
* Read lines from a stream and apply a function to each line.
* @param s input stream
* @param fp process function (line)
* @param fp process function (line_index, line)
*/
template <class FP>
inline void readLinesDo(istream& s, FP fp) {
string line;
while (getline(s, line)) {
for (size_t l=0; getline(s, line);) {
if (line[0]=='#') continue;
fp(line);
fp(l++, line);
}
}

Expand All @@ -56,7 +56,7 @@ template <class FP>
inline void readLinesOmpDo(istream& s, FP fp) {
const int LINES = 131072;
vector<string> lines(LINES);
while (true) {
for (size_t l=0;;) {
// Read several lines from the stream.
int READ = 0;
for (int i=0; i<LINES;) {
Expand All @@ -68,7 +68,8 @@ inline void readLinesOmpDo(istream& s, FP fp) {
// Process lines using multiple threads.
#pragma omp parallel for schedule(dynamic, 1024)
for (int i=0; i<READ; ++i)
fp(lines[i]);
fp(l+i, lines[i]);
l += READ;
}
}
#endif
Expand Down

0 comments on commit b3e5e39

Please sign in to comment.