Skip to content

Commit eec25c1

Browse files
committed
OpenMP exercises.
1 parent 5dd3965 commit eec25c1

24 files changed

+2437
-0
lines changed

5-openmp/demo/false_sharing.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#include <random>
2+
#include <algorithm>
3+
#include <omp.h>
4+
#include <iostream>
5+
#include <cstdio>
6+
#include <chrono>
7+
8+
std::vector<int> init_random_vector(size_t n) {
9+
std::vector<int> v(n);
10+
std::mt19937 m{std::random_device{}()};
11+
std::uniform_int_distribution<int> dist{0, 1};
12+
std::generate(v.begin(), v.end(), [&dist, &m](){return dist(m);});
13+
return std::move(v);
14+
}
15+
16+
size_t num_zeroes_false_sharing(size_t nthreads, const std::vector<int>& data) {
17+
size_t zeroes[nthreads] = {0};
18+
size_t global_zeroes = 0;
19+
#pragma omp parallel num_threads(nthreads)
20+
{
21+
int tid = omp_get_thread_num();
22+
#pragma omp for
23+
for (size_t i = 0; i < data.size(); i++) {
24+
if(data[i] == 0) {
25+
++zeroes[tid];
26+
}
27+
}
28+
}
29+
for(int i = 0; i < nthreads; ++i) {
30+
global_zeroes += zeroes[i];
31+
}
32+
return global_zeroes;
33+
}
34+
35+
size_t num_zeroes_fixed(size_t nthreads, const std::vector<int>& data) {
36+
size_t padding = 16;
37+
size_t zeroes[nthreads * padding] = {0};
38+
size_t global_zeroes = 0;
39+
#pragma omp parallel num_threads(nthreads)
40+
{
41+
int tid = omp_get_thread_num();
42+
#pragma omp for
43+
for (size_t i = 0; i < data.size(); i++) {
44+
if(data[i] == 0) {
45+
++zeroes[tid*padding];
46+
}
47+
}
48+
}
49+
for(int i = 0; i < nthreads; ++i) {
50+
global_zeroes += zeroes[i*padding];
51+
}
52+
return global_zeroes;
53+
}
54+
55+
int main(int argc, char** argv) {
56+
if(argc != 2) {
57+
std::cout << "usage: " << argv[0] << " <num_threads>" << std::endl;
58+
exit(1);
59+
}
60+
size_t nthreads = std::stoll(argv[1]);
61+
std::vector<int> v = init_random_vector(100000000);
62+
auto start = std::chrono::steady_clock::now();
63+
size_t zeroes1 = num_zeroes_false_sharing(nthreads, v);
64+
auto end = std::chrono::steady_clock::now();
65+
std::cout << "(false sharing) time = " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms\n";
66+
std::cout << "zeroes1 = " << zeroes1 << std::endl;
67+
68+
start = std::chrono::steady_clock::now();
69+
size_t zeroes2 = num_zeroes_fixed(nthreads, v);
70+
end = std::chrono::steady_clock::now();
71+
std::cout << "(fixed) time = " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms\n";
72+
std::cout << "zeroes2 = " << zeroes2 << std::endl;
73+
return 0;
74+
}

5-openmp/demo/reduction.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#include <vector>
2+
#include <cassert>
3+
#include <iostream>
4+
#include <algorithm>
5+
#include <random>
6+
7+
std::vector<double> proportion(const std::vector<int>& data) {
8+
std::vector<double> p(data.size());
9+
int total = 0;
10+
#pragma omp parallel for reduction(+:total)
11+
for (int i = 0; i < data.size(); ++i) {
12+
total += data[i];
13+
}
14+
// Implicit barrier
15+
#pragma omp parallel for
16+
for (int i = 0; i < data.size(); ++i) {
17+
p[i] = static_cast<double>(data[i]) / static_cast<double>(total);
18+
}
19+
// Implicit barrier
20+
return std::move(p);
21+
}
22+
23+
std::vector<int> init_random_vector(size_t n) {
24+
std::vector<int> v(n);
25+
std::mt19937 m{std::random_device{}()};
26+
std::uniform_int_distribution<int> dist{1, 100};
27+
std::generate(v.begin(), v.end(), [&dist, &m](){return dist(m);});
28+
return std::move(v);
29+
}
30+
31+
int main() {
32+
std::vector<int> a = init_random_vector(1000000);
33+
std::vector<double> p = proportion(a);
34+
double sum_p = 0.0;
35+
for(double d : p) {
36+
sum_p += d;
37+
}
38+
std::cout << "Sum: " << sum_p << std::endl;
39+
}

5-openmp/demo/rule184_par1.cpp

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#include <iostream>
2+
#include <cstdio>
3+
#include <random>
4+
#include <chrono>
5+
#include <iostream>
6+
#include <algorithm>
7+
#include <vector>
8+
#include <thread>
9+
#include <omp.h>
10+
11+
#define VERBOSE
12+
13+
#ifdef VERBOSE
14+
// Surround an instruction `S` by two timers in order to measure and print how much time it took to be executed.
15+
#define CLOCK_PRINT(i, K, MSG, S) \
16+
auto start##K = std::chrono::steady_clock::now(); \
17+
S; \
18+
auto end##K = std::chrono::steady_clock::now(); \
19+
std::cout << "[" << i << "] " << MSG << " = " << std::chrono::duration_cast<std::chrono::milliseconds>(end##K - start##K).count() << "ms\n";
20+
#else
21+
#define CLOCK_PRINT(i, K, MSG, S) S;
22+
#endif
23+
24+
std::vector<int> initialize_cells(size_t n) {
25+
std::vector<int> cells(n);
26+
// std::mt19937 m{std::random_device{}()};
27+
std::mt19937 m{0}; // fix the seed to ease debugging.
28+
std::discrete_distribution<> d({10, 90});
29+
for(size_t i = 0; i < cells.size(); ++i) {
30+
cells[i] = d(m);
31+
}
32+
return std::move(cells);
33+
}
34+
35+
void print(const std::vector<int>& cells) {
36+
for(size_t i = 0; i < cells.size(); ++i) {
37+
std::cout << (cells[i] == 0 ? " " : "\u25A0");
38+
}
39+
std::cout << std::endl;
40+
}
41+
42+
void simulate_step(const std::vector<int>& current, std::vector<int>& next) {
43+
#pragma omp parallel for
44+
for(size_t i = 1; i < current.size()-1; ++i) {
45+
if(current[i] == 0) {
46+
next[i] = current[i-1];
47+
}
48+
else {
49+
next[i] = current[i+1];
50+
}
51+
}
52+
}
53+
54+
size_t longest_queue(const std::vector<int>& cells) {
55+
size_t kmax = 0;
56+
size_t k = 0;
57+
for(size_t i = 0; i < cells.size(); ++i) {
58+
if(cells[i] == 1) {
59+
++k;
60+
kmax = std::max(kmax, k);
61+
}
62+
else {
63+
k = 0;
64+
}
65+
}
66+
return kmax;
67+
}
68+
69+
size_t simulate(size_t steps, std::vector<int>& current, std::vector<int>& next) {
70+
using namespace std::chrono_literals;
71+
std::mt19937 m{0}; // fixed seed to ease debugging.
72+
// std::uniform_int_distribution<int> d{0, 1};
73+
std::discrete_distribution<> d({10, 90});
74+
size_t lmax = 0;
75+
for(size_t i = 0; i < steps; ++i) {
76+
CLOCK_PRINT(i, 1, "longest", lmax = std::max(lmax, longest_queue(current)))
77+
CLOCK_PRINT(i, 2, "simulate", simulate_step(current, next))
78+
std::swap(current, next);
79+
current[0] = d(m); // Next car, random.
80+
}
81+
return lmax;
82+
}
83+
84+
int main(int argc, char** argv) {
85+
if(argc < 3) {
86+
std::cout << "usage: " << argv[0] << " <size> <steps> [threads]\n";
87+
exit(1);
88+
}
89+
size_t n = std::atoll(argv[1]);
90+
size_t steps = std::atoll(argv[2]);
91+
size_t num_threads = argc == 4 ? std::atoll(argv[3]) : omp_get_max_threads();
92+
std::cout << "num_threads = " << num_threads << std::endl;
93+
omp_set_num_threads(num_threads);
94+
std::vector<int> cells = initialize_cells(n);
95+
std::vector<int> next(n);
96+
auto start = std::chrono::steady_clock::now();
97+
size_t lmax = simulate(steps, cells, next);
98+
auto end = std::chrono::steady_clock::now();
99+
std::cout << "longest_queue = " << lmax << std::endl;
100+
std::cout << "time = " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms\n";
101+
return 0;
102+
}

5-openmp/demo/rule184_par2.cpp

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#include <iostream>
2+
#include <cstdio>
3+
#include <random>
4+
#include <chrono>
5+
#include <iostream>
6+
#include <algorithm>
7+
#include <vector>
8+
#include <thread>
9+
#include <omp.h>
10+
11+
#define VERBOSE
12+
13+
#ifdef VERBOSE
14+
// Surround an instruction `S` by two timers in order to measure and print how much time it took to be executed.
15+
#define CLOCK_PRINT(i, K, MSG, S) \
16+
auto start##K = std::chrono::steady_clock::now(); \
17+
S; \
18+
auto end##K = std::chrono::steady_clock::now(); \
19+
std::cout << "[" << i << "] " << MSG << " = " << std::chrono::duration_cast<std::chrono::milliseconds>(end##K - start##K).count() << "ms\n";
20+
#else
21+
#define CLOCK_PRINT(i, K, MSG, S) S;
22+
#endif
23+
24+
std::vector<int> initialize_cells(size_t n) {
25+
std::vector<int> cells(n);
26+
// std::mt19937 m{std::random_device{}()};
27+
std::mt19937 m{0}; // fix the seed to ease debugging.
28+
std::discrete_distribution<> d({10, 90});
29+
for(size_t i = 0; i < cells.size(); ++i) {
30+
cells[i] = d(m);
31+
}
32+
return std::move(cells);
33+
}
34+
35+
void print(const std::vector<int>& cells) {
36+
for(size_t i = 0; i < cells.size(); ++i) {
37+
std::cout << (cells[i] == 0 ? " " : "\u25A0");
38+
}
39+
std::cout << std::endl;
40+
}
41+
42+
void simulate_step(const std::vector<int>& current, std::vector<int>& next) {
43+
#pragma omp parallel for
44+
for(size_t i = 1; i < current.size()-1; ++i) {
45+
if(current[i] == 0) {
46+
next[i] = current[i-1];
47+
}
48+
else {
49+
next[i] = current[i+1];
50+
}
51+
}
52+
}
53+
54+
struct chunk_data {
55+
size_t left;
56+
size_t right;
57+
size_t internal;
58+
size_t chunk_size;
59+
chunk_data(): left(0), right(0), internal(0), chunk_size(0){}
60+
};
61+
62+
chunk_data process_chunk(size_t b, size_t e, const std::vector<int>& data) {
63+
size_t kmax = 0;
64+
size_t k = 0;
65+
chunk_data ld;
66+
ld.chunk_size = e - b;
67+
size_t i = b;
68+
for(;i < e && data[i] == 1; ++i) {
69+
ld.left++;
70+
}
71+
for(; i < e; ++i) {
72+
if(data[i] == 1) {
73+
++k;
74+
ld.internal = std::max(ld.internal, k);
75+
}
76+
else {
77+
k = 0;
78+
}
79+
}
80+
ld.right = k;
81+
return ld;
82+
}
83+
84+
int longest_queue(const std::vector<int>& data) {
85+
size_t num_threads = omp_get_max_threads();
86+
std::vector<chunk_data> results(num_threads);
87+
#pragma omp parallel num_threads(num_threads)
88+
{
89+
int tid = omp_get_thread_num(); // Get thread ID
90+
size_t chunk_size = data.size() / num_threads; // Define the chunk size
91+
size_t b = tid * chunk_size; // Start index for this thread
92+
size_t e = (tid == num_threads - 1) ? data.size() : b + chunk_size; // End index (handle remainder)
93+
94+
// Each thread calls process_chunk on its assigned portion of the array
95+
results[tid] = process_chunk(b, e, data);
96+
}
97+
size_t max = std::max(results[0].left, results[0].internal);
98+
for(int i = 1; i < results.size(); ++i) {
99+
max = std::max(max, results[i-1].right + results[i].left);
100+
if(results[i].left == results[i].chunk_size) {
101+
results[i].right = results[i-1].right + results[i].left;
102+
}
103+
max = std::max(max, results[i].internal);
104+
}
105+
return max;
106+
}
107+
108+
size_t simulate(size_t steps, std::vector<int>& current, std::vector<int>& next) {
109+
using namespace std::chrono_literals;
110+
std::mt19937 m{0}; // fixed seed to ease debugging.
111+
// std::uniform_int_distribution<int> d{0, 1};
112+
std::discrete_distribution<> d({10, 90});
113+
int lmax = 0;
114+
for(int i = 0; i < steps; ++i) {
115+
CLOCK_PRINT(i, 1, "longest", lmax = std::max(lmax, longest_queue(current)))
116+
CLOCK_PRINT(i, 2, "simulate", simulate_step(current, next))
117+
std::swap(current, next);
118+
current[0] = d(m); // Next car, random.
119+
}
120+
return lmax;
121+
}
122+
123+
int main(int argc, char** argv) {
124+
if(argc < 3) {
125+
std::cout << "usage: " << argv[0] << " <size> <steps> [threads]\n";
126+
exit(1);
127+
}
128+
size_t n = std::atoll(argv[1]);
129+
size_t steps = std::atoll(argv[2]);
130+
size_t num_threads = argc == 4 ? std::atoll(argv[3]) : omp_get_max_threads();
131+
std::cout << "num_threads = " << num_threads << std::endl;
132+
omp_set_num_threads(num_threads);
133+
std::vector<int> cells = initialize_cells(n);
134+
std::vector<int> next(n);
135+
auto start = std::chrono::steady_clock::now();
136+
size_t lmax = simulate(steps, cells, next);
137+
auto end = std::chrono::steady_clock::now();
138+
std::cout << "longest_queue = " << lmax << std::endl;
139+
std::cout << "time = " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms\n";
140+
return 0;
141+
}

0 commit comments

Comments
 (0)