Skip to content

Commit a5dd9ac

Browse files
Benchmarked the matrix product with blocks
1 parent 55e205f commit a5dd9ac

13 files changed

+272
-38
lines changed

src/data/benchmark_blocks_BLAS.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
256 0.513373
2+
512 4.054125
3+
1024 51.442055

src/data/benchmark_blocks_NoBLAS.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
256 0.539848
2+
512 2.960455
3+
1024 42.780513

src/main

35.2 KB
Binary file not shown.

src/main.c

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#include "matrix_mul.h"
33
#include <math.h>
44

5+
6+
// Matrix Product with different order of loops.
57
double benchmark_ijk(double** A, double** B, int n){
68
double** C = zero_matrix_2D(n);
79

@@ -68,12 +70,8 @@ double benchmark_kji(double** A, double** B, int n){
6870
return ((double) (final - initial)) / CLOCKS_PER_SEC;
6971
}
7072

71-
void write_benchmark_time(char* filename, char* text, int n, double time){
72-
FILE* f = fopen(filename, "a");
73-
printf("%s: n = %d time = %f \n", text,n, time);
74-
fprintf(f, "%d %f \n", n, time);
75-
fclose(f);
76-
}
73+
74+
// Matrix Product with modulos-.
7775

7876
double benchmark_mod_naive(double** A, double** B, int n, double p){
7977
double** C = zero_matrix_2D(n);
@@ -130,6 +128,38 @@ double benchmark_mod_Barrett(double** A, double** B, int n, double p, double u){
130128
return ((double) (final - initial)) / CLOCKS_PER_SEC;
131129
}
132130

131+
// Matrix Product with Blocks.
132+
133+
double benchmark_blocks_NoBLAS(double* A, double* B, int n, double p, double u, int b){
134+
double* C = zero_matrix_1D(n*n);
135+
136+
clock_t initial = clock();
137+
mp_block(A, B, C, n, p, u, b);
138+
clock_t final = clock();
139+
140+
delete_matrix_1D(&C, n);
141+
return ((double) (final - initial)) / CLOCKS_PER_SEC;
142+
}
143+
144+
double benchmark_blocks_BLAS(double* A, double* B, int n, double p, double u, int b){
145+
double* C = zero_matrix_1D(n*n);
146+
147+
clock_t initial = clock();
148+
mp_block_BLAS(A, B, C, n, p, u, b);
149+
clock_t final = clock();
150+
151+
delete_matrix_1D(&C, n);
152+
return ((double) (final - initial)) / CLOCKS_PER_SEC;
153+
}
154+
155+
156+
void write_benchmark_time(char* filename, char* text, int n, double time){
157+
FILE* f = fopen(filename, "a");
158+
printf("%s: n = %d time = %f \n", text,n, time);
159+
fprintf(f, "%d %f \n", n, time);
160+
fclose(f);
161+
}
162+
133163

134164
void benchmark_loops_order(double p){
135165
/* Benchmarking the order of loops.
@@ -171,7 +201,7 @@ void benchmark_loops_order(double p){
171201

172202
void benchmark_modulos(double p, double u, double u_overline, double u_b){
173203
/* Benchmarking different modulos.
174-
The most efficient one is IKJ.
204+
The most efficient one SIMD2.
175205
*/
176206
int m = 5; // Executes m times each algo
177207
for (int i=8; i<11; i++){
@@ -205,6 +235,35 @@ void benchmark_modulos(double p, double u, double u_overline, double u_b){
205235
}
206236
}
207237

238+
void benchmark_blocks(double p, double u_overline){
239+
/* Benchmarking different modulos.
240+
*/
241+
int m = 5; // Executes m times each algo
242+
for (int i=8; i<11; i++){
243+
int n = (int) pow(2, i);
244+
int b = get_blocksize(get_bitsize(p), n);
245+
246+
double sum_blocks = 0;
247+
double sum_blocks_BLAS = 0;
248+
249+
for (int j=0; j<m; j++){
250+
double*A = random_matrix_1D(n, p);
251+
double*B = random_matrix_1D(n, p);
252+
sum_blocks += benchmark_blocks_NoBLAS(A, B, n, p, u_overline, b);
253+
sum_blocks_BLAS += benchmark_blocks_BLAS(A, B, n, p, u_overline, b);
254+
255+
delete_matrix_1D(&A, n);
256+
delete_matrix_1D(&B, n);
257+
}
258+
259+
printf("\n");
260+
write_benchmark_time("data/benchmark_blocks_NoBLAS.txt", "Block", n, sum_blocks/m);
261+
write_benchmark_time("data/benchmark_blocks_BLAS.txt", "Block BLAS", n, sum_blocks_BLAS/m);
262+
263+
}
264+
}
265+
266+
208267
void clean_file_loops(){
209268
char noms[6][64] = {"data/benchmark_order_ijk.txt", "data/benchmark_order_ikj.txt",\
210269
"data/benchmark_order_jik.txt", "data/benchmark_order_jki.txt",\
@@ -227,6 +286,16 @@ void clean_file_modulos(){
227286
}
228287
}
229288

289+
void clean_file_blocks(){
290+
char noms[2][64] = {"data/benchmark_blocks_BLAS.txt", "data/benchmark_blocks_NoBLAS.txt"};
291+
292+
for (int i=0; i<2; i++){
293+
FILE *f = fopen(noms[i], "w");
294+
fclose(f);
295+
}
296+
297+
}
298+
230299

231300
int main(){
232301
// Initialization
@@ -240,15 +309,20 @@ int main(){
240309
fesetround(FE_TONEAREST);
241310
u_int32_t u_b = (int) (pow(2, 56) / p); // Constant for Barrett
242311

243-
// // // Testing loops order
312+
313+
// Benchmarking order of loop.
244314
// 07/07/23 13:27 I did a benchmark for 5
245315
// clean_file_loops();
246316
// benchmark_loops_order(p);
247317

248-
// Testing different modulo
318+
// Benchmarking different modulos.
249319
//
250-
clean_file_modulos();
251-
benchmark_modulos(p, u, u_overline, u_b);
320+
// clean_file_modulos();
321+
// benchmark_modulos(p, u, u_overline, u_b);
322+
323+
// Benchmarking blocks.
324+
clean_file_blocks();
325+
benchmark_blocks(p, u_overline);
252326

253327

254328
return 0;

src/main_blas

16.4 KB
Binary file not shown.

src/main_test

34.5 KB
Binary file not shown.

src/main_test.c

Lines changed: 90 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ int main(int argc, char** argv){
2929
const int TEST5 = 0;
3030
const int TEST6 = 0;
3131
const int TEST7 = 0;
32-
const int TEST8 = 1;
32+
const int TEST8 = 0;
33+
const int TEST9 = 1;
3334

3435

3536
if (TEST1){
@@ -210,18 +211,16 @@ int main(int argc, char** argv){
210211
}
211212

212213
if (TEST6){
213-
// Testing blocksize and mp_block
214+
// Testing blocksize
214215
srand(time(NULL));
215-
double p = pow(2, 3) - 5;
216+
double p = pow(2, 26) - 5;
216217
int bitsize_p = 26;
217218
int n = 2;
218219

219220
int blocksize = get_blocksize(bitsize_p, n);
220221
printf("blocksize = %d \n", blocksize);
221222
double** A = random_matrix_2D(n, p);
222-
223223
double** B = random_matrix_2D(n, p);
224-
225224
double* A_1D = convert_2D_to_1D(A, n);
226225
double* B_1D = convert_2D_to_1D(B, n);
227226
double* C = zero_matrix_1D(n*n);
@@ -293,28 +292,111 @@ int main(int argc, char** argv){
293292
if (TEST8){
294293
// Testing sub_matrix product with OpenBLAS.
295294
int n;
296-
int b = 2;
295+
double p = pow(2, 26) - 5;
296+
// double p = pow(2, 25) - 39;
297+
298+
fesetround(FE_UPWARD);
299+
double u_overline = 1.0 / p; // Constant for SIMD2 and SIMD3
300+
fesetround(FE_TONEAREST);
297301

298302
double** A = read_matrix("data/Matrix_A_1D_3.txt", &n);
299303
double** B = read_matrix("data/Matrix_B_1D_3.txt", &n);
304+
305+
int bitsize_p = get_bitsize(p);
306+
int b = get_blocksize(bitsize_p, n);
307+
printf("b = %d \n", b);
308+
300309
double* A_1D = convert_2D_to_1D(A, n);
301310
double* B_1D = convert_2D_to_1D(B, n);
302311
double* C = zero_matrix_1D(n*n);
312+
double* D = zero_matrix_1D(n*n);
313+
double** E = zero_matrix_2D(n);
314+
315+
printf("Matrix A: \n");
316+
print_matrix_1D(A_1D, n);
317+
printf("Matrix B: \n");
318+
print_matrix_1D(B_1D, n);
319+
320+
mp_block_BLAS(A_1D, B_1D, C, n, p, u_overline, b);
321+
mp_block(A_1D, B_1D, D, n, p, u_overline, b);
322+
mp_kij(A, B, E, n);
303323

304-
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,b,b,b, 1, A_1D + b, n, B_1D + n*b, n, 1, C + 0,n);
305-
//
306324
printf("Matrix C: \n");
307325
print_matrix_1D(C, n);
326+
printf("Matrix D: \n");
327+
print_matrix_1D(D, n);
328+
printf("Matrix E: \n");
329+
print_matrix_2D(E, n);
308330

309331
delete_matrix_2D(&A, n);
310332
delete_matrix_2D(&B, n);
311333

312334
delete_matrix_1D(&A_1D, n);
313335
delete_matrix_1D(&B_1D, n);
314336
delete_matrix_1D(&C, n);
337+
delete_matrix_1D(&D, n);
315338

316339
}
317340

341+
if (TEST9){
342+
// Test for matrix product with blocks.
343+
srand(time(NULL));
344+
double p = pow(2, 26) - 5;
345+
// double p = pow(2, 25) - 39;
346+
347+
348+
// Precomputed constants for Modular and Blocking functions.
349+
fesetround(FE_UPWARD);
350+
double u_overline = 1.0 / p;
351+
int n = 256;
352+
int bitsize_p = get_bitsize(p);
353+
printf("bp = %d \n", bitsize_p);
354+
int b = get_blocksize(bitsize_p, n);
355+
printf("b = %d \n", b);
356+
357+
358+
for (int i=0; i<10; i++){
359+
360+
double**A = random_matrix_2D(n, p);
361+
double**B = random_matrix_2D(n, p);
362+
double* A_1D = convert_2D_to_1D(A, n);
363+
double* B_1D = convert_2D_to_1D(B, n);
364+
double*C = zero_matrix_1D(n*n); // With BLAS
365+
double*D = zero_matrix_1D(n*n); // Without BLAS
366+
double**E = zero_matrix_2D(n); // Naive mp
367+
368+
mp_block_BLAS(A_1D, B_1D, C, n, p, u_overline, b);
369+
mp_block(A_1D, B_1D, D, n, p, u_overline, b);
370+
mp_SIMD2(A, B, E, n, p, u_overline);
371+
372+
373+
// write_matrix(A, n, "data/Matrix_A.txt");
374+
// write_matrix(B, n, "data/Matrix_B.txt");
375+
// write_matrix(C, n, "data/Matrix_C.txt"); // Naive
376+
// write_matrix(D, n, "data/Matrix_D.txt"); // SIMD1
377+
// write_matrix(E, n, "data/Matrix_E.txt"); // SIMD2
378+
// write_matrix(F, n, "data/Matrix_F.txt"); // SIMD3
379+
// write_matrix(G, n, "data/Matrix_G.txt"); // Barrett
380+
381+
int nb1 = equals_matrix_2D_1D(E, C, n);
382+
int nb2 = equals_matrix_2D_1D(E, D, n);
383+
384+
delete_matrix_2D(&A, n);
385+
delete_matrix_2D(&B, n);
386+
delete_matrix_1D(&A_1D, n);
387+
delete_matrix_1D(&B_1D, n);
388+
delete_matrix_1D(&C, n);
389+
delete_matrix_1D(&D, n);
390+
delete_matrix_2D(&E, n);
391+
392+
393+
printf("i=%d \n", i);
394+
assert(nb1==1);
395+
assert(nb2==1);
396+
}
397+
398+
printf("Tests passed \n");
399+
}
318400

319401

320402

src/makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ matrix_mul.o: matrix_mul.c
99

1010
# Mains
1111
main: main.c matrix.o matrix_mul.o
12-
gcc -o main main.c matrix.o matrix_mul.o -lm
12+
gcc -o main main.c matrix.o matrix_mul.o -lopenblas -lm
1313
main_test: main_test.c matrix.o matrix_mul.o
1414
gcc -o main_test main_test.c matrix.o matrix_mul.o -lopenblas -lm
1515
main_blas: main_blas.c

src/matrix.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ double* random_matrix_1D(int n, double p){
6363
with numbers between 0 and p-1. */
6464

6565
// Allocate the matrix
66-
double* mat = zero_matrix_1D(n);
66+
double* mat = zero_matrix_1D(n*n);
6767
// Fill the matrix
6868
if (mat == NULL){
6969
return NULL;
@@ -253,7 +253,7 @@ double** read_matrix(char* filename, int* n){
253253
j++;
254254
}
255255
}
256-
256+
257257
fclose(f);
258258
return mat;
259259
}

src/matrix.o

8.97 KB
Binary file not shown.

0 commit comments

Comments
 (0)