Skip to content

Commit fbbd08e

Browse files
Modular functions bug fixes
1 parent b1cd4b1 commit fbbd08e

File tree

7 files changed

+146
-64
lines changed

7 files changed

+146
-64
lines changed

src/data/Matrix_A.txt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1-
2
2-
[ 3464256.000000 41961928.000000 ]
3-
[ 14050932.000000 45158647.000000 ]
1+
10
2+
[ 3893311.000000 24427124.000000 57579801.000000 12343826.000000 16454108.000000 59310408.000000 20632655.000000 9257869.000000 4194537.000000 56019209.000000 ]
3+
[ 27438191.000000 13382507.000000 14953278.000000 12482153.000000 57396864.000000 30409038.000000 25892983.000000 47680411.000000 11459931.000000 19495085.000000 ]
4+
[ 10937240.000000 8714633.000000 8852245.000000 41501784.000000 28833321.000000 51854748.000000 59142188.000000 38410981.000000 52902145.000000 16641988.000000 ]
5+
[ 65240720.000000 56795296.000000 41069112.000000 55711502.000000 2030103.000000 57523060.000000 47913051.000000 22662759.000000 66780769.000000 52107588.000000 ]
6+
[ 11572949.000000 27110102.000000 65489935.000000 26526228.000000 39592255.000000 55777780.000000 56935106.000000 65485239.000000 36349173.000000 1286179.000000 ]
7+
[ 17871305.000000 47286413.000000 10000652.000000 26723550.000000 21679178.000000 38833973.000000 11469440.000000 13712348.000000 10136096.000000 64371425.000000 ]
8+
[ 30354176.000000 8267957.000000 54057703.000000 4314269.000000 63979459.000000 56087806.000000 61837330.000000 44783651.000000 11641546.000000 61509240.000000 ]
9+
[ 29782220.000000 23214496.000000 21510483.000000 28163137.000000 49740724.000000 61102579.000000 16832058.000000 39566811.000000 59478959.000000 53181231.000000 ]
10+
[ 40852830.000000 10241405.000000 33358626.000000 50853483.000000 36964956.000000 55037644.000000 22578597.000000 48434236.000000 1641133.000000 32714693.000000 ]
11+
[ 45696642.000000 31995149.000000 40982490.000000 32645486.000000 36309419.000000 37852930.000000 21624274.000000 31037890.000000 15527722.000000 33265820.000000 ]

src/data/Matrix_B.txt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1-
2
2-
[ 47333287.000000 10245261.000000 ]
3-
[ 8857026.000000 48800053.000000 ]
1+
10
2+
[ 25438111.000000 45309783.000000 56480316.000000 46948595.000000 6363901.000000 39112021.000000 40942315.000000 23195959.000000 11569974.000000 33312255.000000 ]
3+
[ 9268172.000000 52422804.000000 43553660.000000 42626638.000000 36167428.000000 13409597.000000 30555423.000000 58745866.000000 61843673.000000 32196397.000000 ]
4+
[ 24351540.000000 40431297.000000 64191546.000000 65334031.000000 5967924.000000 33391946.000000 36077942.000000 27592198.000000 64429836.000000 51605505.000000 ]
5+
[ 60858019.000000 22759089.000000 29806429.000000 50229476.000000 2598665.000000 36170330.000000 22232479.000000 43540820.000000 59366129.000000 33802453.000000 ]
6+
[ 9744216.000000 1525282.000000 19116398.000000 53297716.000000 44151920.000000 55283667.000000 66707154.000000 7598325.000000 46920514.000000 61441808.000000 ]
7+
[ 39794562.000000 4163195.000000 34764246.000000 36877089.000000 2388207.000000 40732171.000000 3160177.000000 38466150.000000 1215350.000000 480994.000000 ]
8+
[ 22962796.000000 62073209.000000 23240083.000000 52769065.000000 45193667.000000 25838588.000000 21830536.000000 317287.000000 2270389.000000 14087646.000000 ]
9+
[ 34119740.000000 12014605.000000 15612929.000000 53235978.000000 65312162.000000 59764689.000000 41410626.000000 64910457.000000 253995.000000 21222281.000000 ]
10+
[ 59243246.000000 40048557.000000 25385477.000000 26898634.000000 9816628.000000 27773684.000000 521786.000000 12976805.000000 66239674.000000 1737136.000000 ]
11+
[ 13457799.000000 22093611.000000 63810186.000000 36697723.000000 7753817.000000 41894994.000000 62536311.000000 29584193.000000 42212281.000000 64806701.000000 ]

src/data/Matrix_C.txt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1-
2
2-
[ 65679586.000000 30980945.000000 ]
3-
[ 47825320.000000 16604839.000000 ]
1+
10
2+
[ 14563787.000000 64401435.000000 11643024.000000 7581626.000000 943181.000000 39294832.000000 7589971.000000 33443805.000000 6252983.000000 3686009.000000 ]
3+
[ 29383461.000000 42754670.000000 50070319.000000 24274051.000000 51113951.000000 44590081.000000 52594916.000000 50935599.000000 26653102.000000 65328631.000000 ]
4+
[ 46237780.000000 34443052.000000 51288319.000000 58035708.000000 37090740.000000 35902143.000000 48667108.000000 13523625.000000 8294255.000000 35089420.000000 ]
5+
[ 23648146.000000 55627206.000000 7204688.000000 61116640.000000 18245497.000000 65934682.000000 37155407.000000 62133135.000000 6037512.000000 15791650.000000 ]
6+
[ 30436082.000000 64237578.000000 53627088.000000 34434227.000000 61353631.000000 60479012.000000 432911.000000 17850205.000000 35596260.000000 57228007.000000 ]
7+
[ 63970889.000000 13447272.000000 13005537.000000 63237479.000000 28108506.000000 51902132.000000 20809873.000000 30202201.000000 53561346.000000 19157802.000000 ]
8+
[ 49193622.000000 62336157.000000 39410208.000000 43950949.000000 18615219.000000 30442887.000000 18372142.000000 53054489.000000 34069085.000000 48506461.000000 ]
9+
[ 7884627.000000 39933127.000000 44277864.000000 41508567.000000 38300322.000000 36483974.000000 36385120.000000 38776002.000000 49344873.000000 52639230.000000 ]
10+
[ 44354325.000000 35893352.000000 41668824.000000 20514672.000000 38120485.000000 50154313.000000 63596864.000000 25019897.000000 33599254.000000 21922498.000000 ]
11+
[ 63254267.000000 1400635.000000 55965936.000000 30804987.000000 18378282.000000 18853022.000000 26380898.000000 51771263.000000 62443765.000000 63733880.000000 ]

src/main.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ void benchmark_loops_order(double p){
144144
}
145145
}
146146

147-
void benchmark_modulos(double p, double u){
147+
void benchmark_modulos(double p, double u, double u_overline){
148148
/* Benchmarking different modulos.
149149
The most efficient one is IKJ.
150150
*/
@@ -161,8 +161,8 @@ void benchmark_modulos(double p, double u){
161161
double**B = random_matrix(n, p);
162162
sum_mod_naive += benchmark_mod_naive(A, B, n, p);
163163
sum_mod_SIMD1 += benchmark_mod_SIMD1(A, B, n, p, u);
164-
sum_mod_SIMD2 += benchmark_mod_SIMD2(A, B, n, p, u);
165-
sum_mod_SIMD3 += benchmark_mod_SIMD3(A, B, n, p, u);
164+
sum_mod_SIMD2 += benchmark_mod_SIMD2(A, B, n, p, u_overline);
165+
sum_mod_SIMD3 += benchmark_mod_SIMD3(A, B, n, p, u_overline);
166166
}
167167

168168
printf("\n");
@@ -199,9 +199,11 @@ void clean_file_modulos(){
199199
int main(){
200200
// Initialization
201201
srand(time(NULL));
202+
fesetround(FE_DOWNWARD);
202203
double p = pow(2, 26) - 5;
203-
double u = 1.0 / p; // u = inv(p)
204-
u_int32_t u_b = (int) (pow(2, 36) / p);
204+
double u = 1.0 / p;
205+
double u_overline = rint(1.0 / p);
206+
u_int32_t u_b = (int) (pow(2, 36) / p); // Needed for Barrett
205207

206208

207209
// // // Testing loops order
@@ -211,7 +213,7 @@ int main(){
211213

212214
// Testing different modulo
213215
clean_file_modulos();
214-
benchmark_modulos(p, u);
216+
benchmark_modulos(p, u, u_overline);
215217

216218

217219
return 0;

src/main_test.c

Lines changed: 74 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,30 @@
2222
int main(int argc, char** argv){
2323
const int TEST1 = 0;
2424
const int TEST2 = 0;
25-
const int TEST3 = 0;
25+
const int TEST3 = 1;
2626
const int TEST4 = 1;
27-
// const int TEST5 = 0;
27+
const int TEST5 = 1;
2828
// const int TEST6 = 0;
2929
// const int TEST7 = 1;
3030

3131

3232
if (TEST1){
33-
// Test for modulo_SIMD1
33+
// Testing modular functions
3434
double p = pow(2, 26) - 5;
35-
double u = 1.0 / p;
35+
36+
// Precomputed constants for Modular functions
37+
double u = 1.0 / p; // Constant for SIMD
38+
u_int32_t u_b = (int) (pow(2, 56) / p); // Constant for Barrett
39+
3640
double a = pow(2, 26);
37-
u_int32_t u_b = (int) (pow(2, 56) / p); // Needed for Barrett
3841

42+
fesetround(FE_DOWNWARD);
3943
double SIMD1 = modulo_SIMD1(a, p, u);
44+
fesetround(FE_UPWARD);
4045
double SIMD2 = modulo_SIMD2(a, p, u);
4146
double SIMD3 = modulo_SIMD3(a, p, u);
42-
double Barrett = modulo_barrett(a, p, u_b);
43-
47+
fesetround(FE_DOWNWARD);
48+
double Barrett = modulo_Barrett(a, p, u_b);
4449

4550
printf("a = %f \n", a);
4651
printf("p = %f \n", p);
@@ -50,21 +55,25 @@ int main(int argc, char** argv){
5055
printf("SIMD3 returns:%f \n", SIMD3);
5156
printf("Barrett returns:%f \n\n", Barrett);
5257

53-
5458
}
5559

5660
if (TEST2){
57-
// Test for modulo_SIMD1
61+
// Testing modular functions
5862
double p = pow(2, 26) - 5;
59-
double u = 1.0 / p;
63+
64+
// Precomputed constants for Modular functions
65+
double u = 1.0 / p; // Constant for SIMD
66+
u_int32_t u_b = (int) (pow(2, 56) / p); // Constant for Barrett
67+
6068
double a = (p-1) * (p-1) * 32;
61-
u_int32_t u_b = (int) (pow(2, 56) / p);
6269

70+
fesetround(FE_DOWNWARD);
6371
double SIMD1 = modulo_SIMD1(a, p, u);
72+
fesetround(FE_UPWARD);
6473
double SIMD2 = modulo_SIMD2(a, p, u);
6574
double SIMD3 = modulo_SIMD3(a, p, u);
66-
double Barrett = modulo_barrett(a, p, u_b);
67-
75+
fesetround(FE_DOWNWARD);
76+
double Barrett = modulo_Barrett(a, p, u_b);
6877

6978
printf("a = (p-1)(p-1) + 2^5 \n");
7079
printf("p = 2^26 - 5 \n");
@@ -77,21 +86,26 @@ int main(int argc, char** argv){
7786
}
7887

7988
if (TEST3){
80-
// Test for modulo_SIMD1
89+
// Testing modular functions
8190
double p = pow(2, 26) - 5;
82-
double u = 1.0 / p;
83-
// double a = (p-1) * (p-1) * 64;
84-
double a = (p-4567890);
85-
u_int32_t u_b = (int) (pow(2, 56) / p);
8691

92+
// Precomputed constants for Modular functions
93+
double u = 1.0 / p; // Constant for SIMD
94+
u_int32_t u_b = (int) (pow(2, 56) / p); // Constant for Barrett
95+
96+
double a = (p-1) * (p-1) * 16; // a > 2^(25+25+4) = 2^54
97+
98+
fesetround(FE_DOWNWARD);
8799
double SIMD1 = modulo_SIMD1(a, p, u);
100+
fesetround(FE_UPWARD);
88101
double SIMD2 = modulo_SIMD2(a, p, u);
89102
double SIMD3 = modulo_SIMD3(a, p, u);
90-
double Barrett = modulo_barrett(a, p, u_b);
103+
fesetround(FE_DOWNWARD);
104+
double Barrett = modulo_Barrett(a, p, u_b);
91105

92-
printf("a = (p-1)(p-1) + 2^6 \n");
106+
printf("a = (p-1)(p-1)* 2^4 \n");
93107
printf("p = 2^26 - 5 \n");
94-
printf("Correct answer is 64 \n");
108+
printf("Correct answer is 16 \n");
95109
printf("SIMD1 returns:%f \n", SIMD1);
96110
printf("SIMD2 returns:%f \n", SIMD2);
97111
printf("SIMD3 returns:%f \n", SIMD3);
@@ -102,22 +116,31 @@ int main(int argc, char** argv){
102116
if (TEST4){
103117
// Test for matrix multiplication
104118
srand(time(NULL));
105-
fesetround(FE_DOWNWARD);
106119
double p = pow(2, 26) - 5;
107-
double u = 1.0 / p;
108-
double overline_u = rint(1.0 / p);
109-
int n = 2;
120+
121+
// Precomputed constants for Modular functions
122+
double u = 1.0 / p; // Constant for SIMD
123+
u_int32_t u_b = (int) (pow(2, 56) / p); // Constant for Barrett
124+
125+
126+
int n = 10;
110127
double**A = random_matrix(n, p);
111128
double**B = random_matrix(n, p);
112129
double**C = zero_matrix(n); // Naive
113130
double**D = zero_matrix(n); // SIMD1
114131
double**E = zero_matrix(n); // SIMD2
115132
double**F = zero_matrix(n); // SIMD3
133+
double**G = zero_matrix(n); // Barrett
134+
116135

136+
fesetround(FE_DOWNWARD);
117137
mp_naive(A, B, C, n, p);
118138
mp_SIMD1(A, B, D, n, p, u);
119-
mp_SIMD2(A, B, E, n, p, overline_u);
120-
mp_SIMD3(A, B, F, n, p, overline_u);
139+
fesetround(FE_UPWARD);
140+
mp_SIMD2(A, B, E, n, p, u);
141+
mp_SIMD3(A, B, F, n, p, u);
142+
fesetround(FE_DOWNWARD);
143+
mp_Barrett(A, B, G, n, p, u_b);
121144

122145
write_matrix(A, n, "data/Matrix_A.txt");
123146
write_matrix(B, n, "data/Matrix_B.txt");
@@ -134,14 +157,37 @@ int main(int argc, char** argv){
134157
printf("SIMD2 A*B = \n");
135158
print_matrix(E, n);
136159
printf("SIMD3 A*B = \n");
137-
print_matrix(D, n);
160+
print_matrix(F, n);
161+
printf("Barrett A*B = \n");
162+
print_matrix(G, n);
163+
138164
}
139165

166+
if (TEST5){
167+
/* Testing the equivalence between
168+
if (d < 0) return d+p; else return d;
169+
And
170+
return d + p()
171+
*/
140172

141173

142174

175+
double d1 = -2;
176+
double d2 = 2;
177+
double p = 100.0;
143178

179+
int a = 100;
180+
int temp = (-1) && a;
144181

182+
// 0b 1111 1111 ... 1111 for double =
183+
184+
double res1 = d1 + ((-(d1<0)) && p);
185+
double res2 = d2 + ((-(d2<0)) && p);
186+
187+
printf("res1 = %f \n", res1);
188+
printf("res2 = %f \n", res2);
189+
printf("temp = %d \n", temp);
190+
}
145191

146192
return 0;
147193
}

src/matrix_mul.c

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ double modulo_naive(double a, double p){
66
}
77

88
double modulo_SIMD1(double a, double p, double u){
9-
/* Function 3.1 of SIMD article
9+
/* Function .1 from SIMD article for floats.
1010
Hypothesis: Rounding mode = down and p < 2^26.
1111
*/
1212
double b = a * u;
@@ -24,7 +24,7 @@ double modulo_SIMD1(double a, double p, double u){
2424
}
2525

2626
double modulo_SIMD2(double a, double p, double u){
27-
/* Function 3.1 of SIMD article
27+
/* Function 3.1 from SIMD article for floats.
2828
Hypothesis: Rounding mode = up and p < 2^26.
2929
*/
3030
double b = a * u;
@@ -35,40 +35,29 @@ double modulo_SIMD2(double a, double p, double u){
3535
}
3636

3737
double modulo_SIMD3(double a, double p, double u){
38-
/* Function 3.1 of SIMD article
38+
/* Function 3.1 from SIMD article for floats.
3939
Hypothesis: Rounding mode = up and p < 2^26.
4040
*/
4141
double b = a * u;
4242
double c = (double)(int)b;
43-
// The lib fenv.h has a function
44-
// double rint(double d)
4543
double d = a - c * p;
46-
if (d < 0) return d+p;
47-
int cond1 = d<0;
48-
// return cond1*(d+p) + (!cond1)*d;
49-
// return cond1&&(d+p) || (!cond1)&&d;
5044
return d + (-(d<0) && p);
5145
}
5246

53-
u_int32_t modulo_barrett(u_int64_t a, u_int32_t p, u_int32_t u_b){
54-
/* Function 3.1 of SIMD article
47+
u_int32_t modulo_Barrett(u_int64_t a, u_int32_t p, u_int32_t u){
48+
/* Barrett's modular function for integers.
5549
Hypothesis: nonnegative numbers, and no assumption on p.
5650
Returns a % p
5751
*/
5852
u_int32_t s = 23;
5953
u_int32_t t = 33;
60-
// u_int32_t s = 30;
61-
// u_int32_t t = 32;
6254

6355
u_int64_t b = a >> s;
64-
u_int64_t c = (b * u_b) >> t; // 4294967284 = (2^62 / p)
56+
u_int64_t c = (b * ) >> t; // u = 2^(s+t) / p
6557

6658
u_int32_t res = a - c * p;
67-
while (res >= p){
68-
res -= p;
69-
}
70-
// (res >= p) ? res -= p : res;
71-
// (res >= p) ? res -= p : res;
59+
60+
if (res >= p) return res - p;
7261
return res;
7362
}
7463

@@ -148,7 +137,27 @@ void mp_SIMD3(double** A, double** B, double** C, int n, double p, double u){
148137

149138
}
150139

151-
// Comparing loop order. KIJ wins.
140+
141+
void mp_Barrett(double** A, double** B, double** C, int n, double p, u_int32_t u){
142+
// Assert C is a zero matrix
143+
for (int i=0; i<n; i++){
144+
for (int j=0; j<n; j++){
145+
for (int k=0; k<n; k++){
146+
double temp = modulo_Barrett(A[i][k] * B[k][j], p, u);
147+
C[i][j] += temp;
148+
}
149+
}
150+
}
151+
152+
for (int i=0; i<n; i++){
153+
for (int j=0; j<n; j++){
154+
C[i][j] = modulo_Barrett(C[i][j], p, u);
155+
}
156+
}
157+
158+
}
159+
160+
// Comparing loop order. IKJ wins.
152161

153162
// Loop 1
154163
void mp_ijk(double** A, double** B, double** C, int n){

src/matrix_mul.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ double modulo_naive(double a, double p);
1111
double modulo_SIMD1(double a, double p, double u);
1212
double modulo_SIMD2(double a, double p, double u);
1313
double modulo_SIMD3(double a, double p, double u);
14-
u_int32_t modulo_barrett(u_int64_t a, u_int32_t p, u_int32_t u_b);
14+
u_int32_t modulo_Barrett(u_int64_t a, u_int32_t p, u_int32_t u_b);
1515

1616
void mp_naive(double** A, double** B, double** C, int n, double p);
1717
void mp_SIMD1(double** A, double** B, double** C, int n, double p, double u);
1818
void mp_SIMD2(double** A, double** B, double** C, int n, double p, double u);
1919
void mp_SIMD3(double** A, double** B, double** C, int n, double p, double u);
20+
void mp_Barrett(double** A, double** B, double** C, int n, double p, u_int32_t u);
2021

2122
void mp_ijk(double** A, double** B, double** C, int n);
2223
void mp_kij(double** A, double** B, double** C, int n);

0 commit comments

Comments
 (0)