forked from yuang-chen/Corder-TPDS-21
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.hpp
157 lines (129 loc) · 5.54 KB
/
parser.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#pragma once
#include <string>
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "graph.hpp"
#include <algorithm>
bool parseGraph(std::string filename, Graph& graph) {
std::ifstream csr_file;
csr_file.open(filename, std::ios::binary);
if(!csr_file.is_open()) {
std::cout << "cannot open csr file!" << std::endl;
return false;
}
csr_file.read(reinterpret_cast<char*>(&graph.num_vertex), sizeof(unsigned));
csr_file.read(reinterpret_cast<char*>(&graph.num_edges), sizeof(unsigned));
std::vector<unsigned> local_row(graph.num_vertex);
std::vector<unsigned> local_col(graph.num_edges);
csr_file.read(reinterpret_cast<char*>(local_row.data()), graph.num_vertex * sizeof(unsigned));
csr_file.read(reinterpret_cast<char*>(local_col.data()), graph.num_edges * sizeof(unsigned));
local_row.push_back(graph.num_edges);
graph.row_index = std::move(local_row);
graph.col_index = std::move(local_col);
#ifdef WEIGHTED
std::vector<unsigned> local_wei(graph.num_edges);
csr_file.read(reinterpret_cast<char*>(local_wei.data()), graph.num_edges * sizeof(unsigned));
graph.edge_weight = std::move(local_wei);
#endif
csr_file.close();
return true;
};
bool writeEdgelist(std::string filename, Graph& graph) {
std::ofstream output(filename);
if(!output.is_open()) {
std::cout << "cannot open txt file!" << std::endl;
return false;
}
for(unsigned i = 0; i < graph.num_vertex; i++) {
for(unsigned j = graph.row_index[i]; j < graph.row_index[i+1]; j++) {
output << i << " " << graph.col_index[j] << '\n';
}
}
output.close();
}
bool writeGraph(std::string filename, Graph& graph) {
std::ofstream output_file(filename, std::ios::out | std::ios::binary);
if(!output_file.is_open()) {
std::cout << "cannot open output file!" << '\n';
return false;
}
output_file.write(reinterpret_cast<char*>(&graph.num_vertex), sizeof(unsigned));
output_file.write(reinterpret_cast<char*>(&graph.num_edges), sizeof(unsigned));
output_file.write(reinterpret_cast<char*>(graph.row_index.data()), graph.num_vertex * sizeof(unsigned));
output_file.write(reinterpret_cast<char*>(graph.col_index.data()), graph.num_edges * sizeof(unsigned));
#ifdef WEIGHTED
output_file.write(reinterpret_cast<char*>(graph.edge_weight.data()), graph.num_edges * sizeof(unsigned));
#endif
output_file.close();
std::cout << filename << " is writen" << std::endl;
return true;
};
void calculate_skew_locality(Graph& graph) {
unsigned num_clusters = params::num_partitions;
std::vector<unsigned> sum_degree(num_clusters - 1, 0);
unsigned tmp = 0;
#pragma omp parallel for schedule(static) num_threads(params::num_threads)
for(unsigned i = 0; i < num_clusters - 1; i++) {
for(unsigned j = 0; j < params::partition_size; j++) {
sum_degree[i] += graph.out_degree[i * params::partition_size + j];
}
}
std::sort(sum_degree.begin(), sum_degree.end());
std::vector<unsigned> count;
std::vector<float> locality_skew;
std::vector<std::pair<unsigned, unsigned>> hotness;
count.push_back(ceil(params::num_partitions * 0.01));
count.push_back(ceil(params::num_partitions * 0.1));
count.push_back(ceil(params::num_partitions * 0.2));
count.push_back(ceil(params::num_partitions * 0.3));
count.push_back(ceil(params::num_partitions * 0.4));
count.push_back(ceil(params::num_partitions * 0.5));
for(auto c : count) {
float first = 0, second = 0;
for(auto it = sum_degree.begin(); it != sum_degree.begin() + c; it++)
first += *it;
for(auto it = sum_degree.end() - c; it != sum_degree.end(); it++)
second += *it;
locality_skew.push_back(second/first);
hotness.emplace_back(first, second);
}
std::cout << " 1%, 10%, 20%, 30%, 40%, 50% Locality Skew: " << " ";
for(auto lk: locality_skew)
std::cout << lk << " ";
std::cout << std::endl;
std::cout << " 1%, 10%, 20%, 30%, 40%, 50% Hotness Pairs: " << " ";
for(auto h: hotness)
std::cout << h.first << " " << h.second << " / ";
std::cout << std::endl;
}
void isOverflow(Graph& graph) {
std::vector<unsigned> segment_large;
segment_large.reserve(graph.num_vertex);
std::vector<unsigned> segment_small;
segment_small.reserve(graph.num_vertex/2);
unsigned average_degree = graph.num_edges / graph.num_vertex;
for(unsigned i = 0; i < graph.num_vertex; i++)
if(graph.out_degree[i] > 1 * average_degree)
segment_large.push_back(i);
else
segment_small.push_back(i);
unsigned num_large_per_seg = ceil((float) segment_large.size() / params::num_partitions);
params::overflow_ceil = num_large_per_seg;
std::vector<unsigned> count(params::num_partitions - 1, 0);
for(unsigned i = 0; i < params::num_partitions - 1; i++) {
unsigned offset = i * params::partition_size;
for(unsigned j = 0; j < params::partition_size; j++) {
if(graph.out_degree[offset + j] > average_degree)
count[i]++;
}
}
for(unsigned i = 0; i < params::num_partitions - 1; i++) {
int extra = count[i] - params::overflow_ceil;
if(extra > 0) {
std::cout << "subgraph " << i << " is overflowed by " << extra << " vertices." << std::endl;
}
}
}