2
2
#include "matrix_mul.h"
3
3
#include <math.h>
4
4
5
- void benchmark_ijk (double * * A , double * * B , int n ){
6
- FILE * f = fopen ("data/benchmark_order_ijk.txt" , "a" );
5
+ double benchmark_ijk (double * * A , double * * B , int n ){
7
6
double * * C = zero_matrix (n );
8
7
9
8
clock_t initial = clock ();
10
9
mp_ijk (A , B , C , n );
11
10
clock_t final = clock ();
12
11
13
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
14
- printf ("IJK: n = %d time = %f \n" , n , time );
15
- fprintf (f , "%d %f \n" , n , time );
16
-
17
- fclose (f );
12
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
18
13
}
19
14
20
- void benchmark_kij (double * * A , double * * B , int n ){
21
- FILE * f = fopen ("data/benchmark_order_kij.txt" , "a" );
15
+ double benchmark_kij (double * * A , double * * B , int n ){
22
16
double * * C = zero_matrix (n );
23
17
24
18
clock_t initial = clock ();
25
19
mp_kij (A , B , C , n );
26
20
clock_t final = clock ();
27
21
28
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
29
- printf ("KIJ: n = %d time = %f \n" , n , time );
30
- fprintf (f , "%d %f \n" , n , time );
31
-
32
- fclose (f );
22
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
33
23
}
34
24
35
- void benchmark_jki (double * * A , double * * B , int n ){
36
- FILE * f = fopen ("data/benchmark_order_jki.txt" , "a" );
25
+ double benchmark_jki (double * * A , double * * B , int n ){
37
26
double * * C = zero_matrix (n );
38
27
39
28
clock_t initial = clock ();
40
29
mp_jki (A , B , C , n );
41
30
clock_t final = clock ();
42
31
43
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
44
- printf ("JKI: n = %d time = %f \n" , n , time );
45
- fprintf (f , "%d %f \n" , n , time );
46
-
47
- fclose (f );
32
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
48
33
}
49
34
50
- void benchmark_ikj (double * * A , double * * B , int n ){
51
- FILE * f = fopen ("data/benchmark_order_ikj.txt" , "a" );
35
+ double benchmark_ikj (double * * A , double * * B , int n ){
52
36
double * * C = zero_matrix (n );
53
37
54
38
clock_t initial = clock ();
55
39
mp_ikj (A , B , C , n );
56
40
clock_t final = clock ();
57
41
58
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
59
- printf ("IKJ: n = %d time = %f \n" , n , time );
60
- fprintf (f , "%d %f \n" , n , time );
61
-
62
- fclose (f );
42
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
63
43
}
64
44
65
- void benchmark_jik (double * * A , double * * B , int n ){
66
- FILE * f = fopen ("data/benchmark_order_jik.txt" , "a" );
45
+ double benchmark_jik (double * * A , double * * B , int n ){
67
46
double * * C = zero_matrix (n );
68
47
69
48
clock_t initial = clock ();
70
49
mp_jik (A , B , C , n );
71
50
clock_t final = clock ();
72
51
73
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
74
- printf ("JIK: n = %d time = %f \n" , n , time );
75
- fprintf (f , "%d %f \n" , n , time );
76
-
77
- fclose (f );
52
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
78
53
}
79
54
80
- void benchmark_kji (double * * A , double * * B , int n ){
81
- FILE * f = fopen ("data/benchmark_order_kji.txt" , "a" );
55
+ double benchmark_kji (double * * A , double * * B , int n ){
82
56
double * * C = zero_matrix (n );
83
57
84
58
clock_t initial = clock ();
85
59
mp_kji (A , B , C , n );
86
60
clock_t final = clock ();
87
61
88
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
89
- printf ("KJI: n = %d time = %f \n\n" , n , time );
90
- fprintf (f , "%d %f \n" , n , time );
91
-
92
- fclose (f );
62
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
93
63
}
94
64
95
- void benchmark_loops_order (double p ){
96
-
97
- for (int i = 8 ; i < 11 ; i ++ ){
98
- int n = (int ) pow (2 , i );
99
- double * * A = random_matrix (n , p );
100
- double * * B = random_matrix (n , p );
101
- benchmark_ijk (A , B , n );
102
- benchmark_kij (A , B , n );
103
- benchmark_jki (A , B , n );
104
- benchmark_ikj (A , B , n );
105
- benchmark_jik (A , B , n );
106
- benchmark_kji (A , B , n );
107
- }
65
+ void write_benchmark_time (char * filename , char * text , int n , double time ){
66
+ FILE * f = fopen (filename , "a" );
67
+ printf ("%s: n = %d time = %f \n" , text ,n , time );
68
+ fprintf (f , "%d %f \n" , n , time );
69
+ fclose (f );
108
70
}
109
71
110
- void benchmark_mod_naive (double * * A , double * * B , int n , double p ){
111
- FILE * f = fopen ("data/benchmark_modulo_naive.txt" , "a" );
72
+ double benchmark_mod_naive (double * * A , double * * B , int n , double p ){
112
73
double * * C = zero_matrix (n );
113
74
114
75
clock_t initial = clock ();
115
76
mp_naive (A , B , C , n , p );
116
77
clock_t final = clock ();
117
78
118
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
119
- printf ("Modulo Naive: n = %d time = %f \n\n" , n , time );
120
- fprintf (f , "%d %f \n" , n , time );
121
-
122
- fclose (f );
79
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
123
80
}
124
81
125
- void benchmark_mod_SIMD1 (double * * A , double * * B , int n , double p , double u ){
126
- FILE * f = fopen ("data/benchmark_modulo_SIMD1.txt" , "a" );
82
+ double benchmark_mod_SIMD1 (double * * A , double * * B , int n , double p , double u ){
127
83
double * * C = zero_matrix (n );
128
84
129
85
clock_t initial = clock ();
130
86
mp_SIMD1 (A , B , C , n , p , u );
131
87
clock_t final = clock ();
132
88
133
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
134
- printf ("Modulo SIMD1: n = %d time = %f \n\n" , n , time );
135
- fprintf (f , "%d %f \n" , n , time );
136
-
137
- fclose (f );
89
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
138
90
}
139
91
140
- void benchmark_mod_SIMD2 (double * * A , double * * B , int n , double p , double u ){
141
- FILE * f = fopen ("data/benchmark_modulo_SIMD2.txt" , "a" );
92
+ double benchmark_mod_SIMD2 (double * * A , double * * B , int n , double p , double u ){
142
93
double * * C = zero_matrix (n );
143
94
144
95
clock_t initial = clock ();
145
96
mp_SIMD2 (A , B , C , n , p , u );
146
97
clock_t final = clock ();
147
98
148
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
149
- printf ("Modulo SIMD2: n = %d time = %f \n\n" , n , time );
150
- fprintf (f , "%d %f \n" , n , time );
151
-
152
- fclose (f );
99
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
153
100
}
154
101
155
- void benchmark_mod_SIMD3 (double * * A , double * * B , int n , double p , double u ){
156
- FILE * f = fopen ("data/benchmark_modulo_SIMD3.txt" , "a" );
102
+ double benchmark_mod_SIMD3 (double * * A , double * * B , int n , double p , double u ){
157
103
double * * C = zero_matrix (n );
158
104
159
105
clock_t initial = clock ();
160
106
mp_SIMD3 (A , B , C , n , p , u );
161
107
clock_t final = clock ();
162
108
163
- double time = ((double ) (final - initial )) / CLOCKS_PER_SEC ;
164
- printf ("Modulo SIMD3: n = %d time = %f \n\n" , n , time );
165
- fprintf (f , "%d %f \n" , n , time );
109
+ return ((double ) (final - initial )) / CLOCKS_PER_SEC ;
110
+ }
166
111
167
- fclose (f );
112
+ void benchmark_loops_order (double p ){
113
+ /* Benchmarking the order of loops.
114
+ The most efficient one is IKJ.
115
+ */
116
+ int m = 5 ; // Executes m times each algo
117
+ for (int i = 8 ; i < 11 ; i ++ ){
118
+ int n = (int ) pow (2 , i );
119
+ double sum_ijk = 0 ;
120
+ double sum_kij = 0 ;
121
+ double sum_jki = 0 ;
122
+ double sum_ikj = 0 ;
123
+ double sum_jik = 0 ;
124
+ double sum_kji = 0 ;
125
+
126
+ for (int j = 0 ; j < m ; j ++ ){
127
+ double * * A = random_matrix (n , p );
128
+ double * * B = random_matrix (n , p );
129
+ sum_ijk += benchmark_ijk (A , B , n );
130
+ sum_kij += benchmark_kij (A , B , n );
131
+ sum_jki += benchmark_jki (A , B , n );
132
+ sum_ikj += benchmark_ikj (A , B , n );
133
+ sum_jik += benchmark_jik (A , B , n );
134
+ sum_kji += benchmark_kji (A , B , n );
135
+ }
136
+ printf ("\n" );
137
+ write_benchmark_time ("data/benchmark_order_ijk.txt" , "IJK" , n , sum_ijk /m );
138
+ write_benchmark_time ("data/benchmark_order_kij.txt" , "KIJ" , n , sum_kij /m );
139
+ write_benchmark_time ("data/benchmark_order_jki.txt" , "JKI" , n , sum_jki /m );
140
+ write_benchmark_time ("data/benchmark_order_ikj.txt" , "IKJ" , n , sum_ikj /m );
141
+ write_benchmark_time ("data/benchmark_order_jik.txt" , "JIK" , n , sum_jik /m );
142
+ write_benchmark_time ("data/benchmark_order_kji.txt" , "KJI" , n , sum_kji /m );
143
+
144
+ }
168
145
}
169
146
170
147
void benchmark_modulos (double p , double u ){
171
-
148
+ /* Benchmarking different modulos.
149
+ The most efficient one is IKJ.
150
+ */
151
+ int m = 5 ; // Executes m times each algo
172
152
for (int i = 8 ; i < 11 ; i ++ ){
173
153
int n = (int ) pow (2 , i );
174
- double * * A = random_matrix (n , p );
175
- double * * B = random_matrix (n , p );
176
- benchmark_mod_naive (A , B , n , p ); // Worst
177
- benchmark_ikj (A , B , n ); // Best
178
- benchmark_mod_SIMD1 (A , B , n , p , u );
179
- benchmark_mod_SIMD2 (A , B , n , p , u );
180
- benchmark_mod_SIMD3 (A , B , n , p , u );
154
+ double sum_mod_naive = 0 ;
155
+ double sum_mod_SIMD1 = 0 ;
156
+ double sum_mod_SIMD2 = 0 ;
157
+ double sum_mod_SIMD3 = 0 ;
158
+
159
+ for (int j = 0 ; j < m ; j ++ ){
160
+ double * * A = random_matrix (n , p );
161
+ double * * B = random_matrix (n , p );
162
+ sum_mod_naive += benchmark_mod_naive (A , B , n , p );
163
+ sum_mod_SIMD1 += benchmark_mod_SIMD1 (A , B , n , p , u );
164
+ sum_mod_SIMD2 += benchmark_mod_SIMD2 (A , B , n , p , u );
165
+ sum_mod_SIMD3 += benchmark_mod_SIMD3 (A , B , n , p , u );
166
+ }
167
+
168
+ printf ("\n" );
169
+ write_benchmark_time ("data/benchmark_modulo_naive.txt" , "Mod Naive" , n , sum_mod_naive /m );
170
+ write_benchmark_time ("data/benchmark_modulo_SIMD1.txt" , "Mod SIMD1" , n , sum_mod_SIMD1 /m );
171
+ write_benchmark_time ("data/benchmark_modulo_SIMD2.txt" , "Mod SIMD2" , n , sum_mod_SIMD2 /m );
172
+ write_benchmark_time ("data/benchmark_modulo_SIMD3.txt" , "Mod SIMD3" , n , sum_mod_SIMD3 /m );
181
173
182
174
}
183
-
184
175
}
185
176
186
177
void clean_file_loops (){
@@ -195,11 +186,10 @@ void clean_file_loops(){
195
186
}
196
187
197
188
void clean_file_modulos (){
198
- char noms [5 ][64 ] = {"data/benchmark_modulo_naive.txt" , "data/benchmark_order_ijk.txt" ,\
199
- "data/benchmark_modulo_SIMD1.txt" , "data/benchmark_modulo_SIMD2.txt" ,\
200
- "data/benchmark_modulo_SIMD3.txt" };
189
+ char noms [4 ][64 ] = {"data/benchmark_modulo_naive.txt" , "data/benchmark_modulo_SIMD1.txt" ,\
190
+ "data/benchmark_modulo_SIMD2.txt" , "data/benchmark_modulo_SIMD3.txt" };
201
191
202
- for (int i = 0 ; i < 5 ; i ++ ){
192
+ for (int i = 0 ; i < 4 ; i ++ ){
203
193
FILE * f = fopen (noms [i ], "w" );
204
194
fclose (f );
205
195
}
@@ -215,6 +205,7 @@ int main(){
215
205
216
206
217
207
// // // Testing loops order
208
+ // 07/07/23 13:27 I did a benchmark for 5
218
209
// clean_file_loops();
219
210
// benchmark_loops_order(p);
220
211
0 commit comments