Skip to content

Commit dbc7e62

Browse files
Modified mp functions using OpenMP and OpenBLAS
1 parent 35e7681 commit dbc7e62

17 files changed

+51
-56
lines changed

src/data/benchmark_blocks_BLAS.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
256 0.111944
2-
512 1.544904
3-
1024 11.227543
1+
256 0.106118
2+
512 2.280422
3+
1024 10.936971

src/data/benchmark_blocks_BLAS_MP.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
256 0.053706
2-
512 6.166882
3-
1024 16.398344
1+
256 0.133424
2+
512 0.738715
3+
1024 4.116122

src/data/benchmark_blocks_NoBLAS.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
256 0.376757
2-
512 2.228056
3-
1024 13.547425
1+
256 0.729653
2+
512 1.982209
3+
1024 15.146636
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
256 0.061757
2-
512 0.533625
3-
1024 3.274027
1+
256 0.060192
2+
512 0.509851
3+
1024 3.235947
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
256 0.104602
2-
512 0.478607
3-
1024 3.103020
1+
256 0.055740
2+
512 0.544654
3+
1024 4.393463
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
256 0.360917
2-
512 0.988932
3-
1024 4.843099
1+
256 0.054218
2+
512 0.442351
3+
1024 3.072420
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
256 0.173421
2-
512 0.971074
3-
1024 4.871781
1+
256 0.090017
2+
512 0.436566
3+
1024 2.908950
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
256 0.110528
2-
512 0.436584
3-
1024 2.869017
1+
256 0.090896
2+
512 0.444606
3+
1024 3.990551

src/main

45.1 KB
Binary file not shown.

src/main.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ void benchmark_loops_order(double p){
270270
/* Benchmarking the order of loops.
271271
The most efficient one is IKJ.
272272
*/
273-
int m = 3; // Executes m times each algo
273+
int m = 1; // Executes m times each algo
274274
for (int i=8; i<11; i++){
275275
int n = (int) pow(2, i);
276276
double sum_ijk = 0;
@@ -308,7 +308,7 @@ void benchmark_modulos(double p, double u, double u_overline, double u_b){
308308
/* Benchmarking different modulos.
309309
The most efficient one SIMD2.
310310
*/
311-
int m = 3; // Executes m times each algo
311+
int m = 1; // Executes m times each algo
312312
for (int i=8; i<11; i++){
313313
int n = (int) pow(2, i);
314314
double sum_mod_naive = 0;
@@ -344,7 +344,7 @@ void benchmark_modulos_MP(double p, double u, double u_overline, double u_b){
344344
/* Benchmarking different modulos.
345345
The most efficient one SIMD2.
346346
*/
347-
int m = 3; // Executes m times each algo
347+
int m = 1; // Executes m times each algo
348348
for (int i=8; i<11; i++){
349349
int n = (int) pow(2, i);
350350

@@ -381,7 +381,7 @@ void benchmark_modulos_MP(double p, double u, double u_overline, double u_b){
381381
void benchmark_blocks(double p, double u_overline){
382382
/* Benchmarking different modulos.
383383
*/
384-
int m = 3; // Executes m times each algo
384+
int m = 1; // Executes m times each algo
385385
for (int i=8; i<11; i++){
386386
int n = (int) pow(2, i);
387387
int b = get_blocksize(get_bitsize(p), n);
@@ -508,15 +508,15 @@ int main(){
508508
// benchmark_modulos(p, u, u_overline, u_b);
509509

510510
// Benchmarking different modulos with OpenMP
511-
clean_file_modulos_MP();
512-
benchmark_modulos_MP(p, u, u_overline, u_b);
511+
// clean_file_modulos_MP();
512+
// benchmark_modulos_MP(p, u, u_overline, u_b);
513513

514514
// Benchmarking blocks.
515-
// clean_file_blocks();
516-
// benchmark_blocks(p, u_overline);
515+
clean_file_blocks();
516+
benchmark_blocks(p, u_overline);
517517

518-
clean_file_blocks_MP();
519-
benchmark_blocks_MP(p, u_overline);
518+
// clean_file_blocks_MP();
519+
// benchmark_blocks_MP(p, u_overline);
520520

521521

522522
return 0;

src/main_blas

16.4 KB
Binary file not shown.

src/main_test

43.8 KB
Binary file not shown.

src/matrix.o

10.1 KB
Binary file not shown.

src/matrix_mul.c

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ void mp_SIMD1_MP(double* A, double* B, double* C, int n, double p, double u){
199199
void mp_SIMD2_MP(double* A, double* B, double* C, int n, double p, double u){
200200
// Assert C is a zero matrix
201201
for (int k=0; k<n; k++){
202+
#pragma omp parallel for
202203
for (int i=0; i<n; i++){
203-
#pragma omp parallel for
204204
for (int j=0; j<n; j++){
205205
C[i*n + j] = C[i*n + j] + modulo_SIMD2(A[i*n + k] * B[k*n + j], p, u);
206206
}
@@ -274,10 +274,8 @@ void mp_block(double* A, double* B, double* C, int n, double p, double u, int b)
274274
}
275275
}
276276

277-
for (int i=0; i<n; i++){
278-
for (int j=0; j<n; j++){
279-
C[i*n + j] = modulo_SIMD3(C[i*n + j], p, u);
280-
}
277+
for (int i=0; i<n*n; i++){
278+
C[i] = modulo_SIMD3(C[i], p, u);
281279
}
282280

283281
}
@@ -288,16 +286,15 @@ void mp_block_BLAS(double* A, double* B, double* C, int n, double p, double u, i
288286
/* Compute the product of two matrices using OpenBLAS's block product.
289287
It allows to reduce the amount of modulo needed.
290288
*/
291-
289+
openblas_set_num_threads(1); // 8 is slower than 1.
292290
for (int k=0; k<n; k+=b){
293291
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
294292
n, n, b, 1, A + k, n, B + n*k, n,
295293
1, C, n);
296294

297-
for (int i=0; i<n; i++){
298-
for (int j=0; j<n; j++){
299-
C[i*n + j] = modulo_SIMD3(C[i*n + j], p, u);
300-
}
295+
296+
for (int i=0; i<n*n; i++){
297+
C[i] = modulo_SIMD3(C[i], p, u);
301298
}
302299

303300
}
@@ -306,19 +303,16 @@ void mp_block_BLAS(double* A, double* B, double* C, int n, double p, double u, i
306303

307304
void mp_block_BLAS_MP(double* A, double* B, double* C, int n, double p, double u, int b){
308305
/* Compute the product of two matrices using OpenBLAS's block product.
309-
It allows to reduce the amount of modulo needed.
306+
It allows us to reduce the amount of modulo needed.
310307
*/
311-
312308
for (int k=0; k<n; k+=b){
313309
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
314310
n, n, b, 1, A + k, n, B + n*k, n,
315311
1, C, n);
316312

317313
#pragma omp parallel for
318-
for (int i=0; i<n; i++){
319-
for (int j=0; j<n; j++){
320-
C[i*n + j] = modulo_SIMD3(C[i*n + j], p, u);
321-
}
314+
for (int i=0; i<n*n; i++){
315+
C[i] = modulo_SIMD3(C[i], p, u);
322316
}
323317

324318
}

src/matrix_mul.o

17.6 KB
Binary file not shown.

src/tools/gnuplot_blocks_MP.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ set style line 6 lc rgb '#ff00ff' lt 1 lw 2 pt 7 pi -1 ps 1.5
1515
set pointintervalbox 3
1616

1717
plot "../data/benchmark_modulo_SIMD2.txt" using 1:2 title "No blocks" with linespoints ls 1
18-
replot "../data/benchmark_modulo_MP_SIMD2.txt" using 1:2 title "MP SIMD2" with linespoints ls 2
18+
replot "../data/benchmark_modulo_MP_SIMD2.txt" using 1:2 title "MP SIMD3" with linespoints ls 2
1919
replot "../data/benchmark_blocks_BLAS.txt" using 1:2 title "BLAS" with linespoints ls 3
2020
replot "../data/benchmark_blocks_BLAS_MP.txt" using 1:2 title "BLAS MP" with linespoints ls 4
21-
replot "../data/benchmark_order_kij.txt" using 1:2 title "No Mod" with linespoints ls 5
21+
# replot "../data/benchmark_order_kij.txt" using 1:2 title "No Mod" with linespoints ls 5
2222

2323
# Graph Blocks product with multithreads

src/tools/gnuplot_modulos_MP.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,12 @@ set pointintervalbox 3
2020
# replot "../data/benchmark_modulo_MP_SIMD1.txt" using 1:2 title "MP SIMD1" with linespoints ls 3
2121
# replot "../data/benchmark_modulo_SIMD1.txt" using 1:2 title "SIMD1" with linespoints ls 3
2222

23-
plot "../data/benchmark_modulo_MP_SIMD2.txt" using 1:2 title "MP SIMD2" with linespoints ls 3
24-
replot "../data/benchmark_modulo_SIMD2.txt" using 1:2 title "SIMD2" with linespoints ls 4
23+
plot "../data/benchmark_modulo_SIMD2.txt" using 1:2 title "SIMD2" with linespoints ls 3
24+
replot "../data/benchmark_modulo_SIMD3.txt" using 1:2 title "SIMD3" with linespoints ls 4
25+
26+
replot "../data/benchmark_modulo_MP_SIMD2.txt" using 1:2 title "MP SIMD2" with linespoints ls 5
27+
replot "../data/benchmark_modulo_MP_SIMD3.txt" using 1:2 title "MP SIMD3" with linespoints ls 6
2528

26-
replot "../data/benchmark_modulo_MP_SIMD3.txt" using 1:2 title "MP SIMD3" with linespoints ls 5
27-
replot "../data/benchmark_modulo_SIMD3.txt" using 1:2 title "SIMD3" with linespoints ls 6
2829

2930
# replot "../data/benchmark_modulo_MP_Barrett.txt" using 1:2 title "MPBarrett" with linespoints ls 6
3031
# replot "../data/benchmark_modulo_Barrett.txt" using 1:2 title "Barrett" with linespoints ls 6

0 commit comments

Comments
 (0)