Skip to content

Commit

Permalink
Parallelized red black algorithm (somewhat final)
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreu Glasmann committed Apr 27, 2016
1 parent d2df7bf commit ea9c372
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 91 deletions.
201 changes: 119 additions & 82 deletions redblack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,67 +13,67 @@ using namespace std;
void redBlackSerial(double * T, double * b,
int n, int m, double alpha, int itermax) {
int i, j;
int index, index_left, index_right, index_up, index_down;
int index, pos_l, pos_r, pos_u, pos_d;

int iter = 0;
while (iter < itermax) {
// update half the board
for (i=1; i<n-1; i+=2) {
for (j=1; j<m-1; j+=2) {
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);
pos_l = getIndex(i-1, j, m);
pos_r = getIndex(i+1, j, m);
pos_u = getIndex(i, j+1, m);
pos_d = getIndex(i, j-1, m);

T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
}
for (i=2; i<n-1; i+=2) {
for (j=2; j<m-1; j+=2) {
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);
pos_l = getIndex(i-1, j, m);
pos_r = getIndex(i+1, j, m);
pos_u = getIndex(i, j+1, m);
pos_d = getIndex(i, j-1, m);

T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
}

// update second half of the board
for (i=1; i<n-1; i+=2) {
for(j=2; j<m-1; j+=2) {
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);
pos_l = getIndex(i-1, j, m);
pos_r = getIndex(i+1, j, m);
pos_u = getIndex(i, j+1, m);
pos_d = getIndex(i, j-1, m);

T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
}
for (i=2; i<n-1; i+=2) {
for (j=1; j<m-1; j+=2) {
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);
pos_l = getIndex(i-1, j, m);
pos_r = getIndex(i+1, j, m);
pos_u = getIndex(i, j+1, m);
pos_d = getIndex(i, j-1, m);

T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
}
iter ++;
Expand All @@ -87,79 +87,116 @@ void redBlackSerial(double * T, double * b,
void redBlackParallel(double * T, double * b,
int n, int m, double alpha, int itermax) {
int i, j;
int index, index_left, index_right, index_up, index_down;
//int index, pos_l, pos_r, pos_u, pos_d, shift;

int iter = 0;
while (iter < itermax) {
// update half the board in parallel
#pragma omp parallel for collapse(2) \
private(i, j) shared(T, b, n, m, alpha)
for (i=1; i<n-1; i+=2) {
for (j=1; j<m-1; j+=2) {
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);
#pragma omp parallel for collapse(2) shared(T, b)
for (i=1; i<n-1; i++) {
// #pragma omp parallel for shared(T, b)
for (j=1; j<m-1; j+=2) {
int shift;
if (i%2 == 0) { shift = 1; }
else { shift = 0; }
int index = getIndex(i, j+shift, m);
int pos_l = getIndex(i-1, j+shift, m);
int pos_r = getIndex(i+1, j+shift, m);
int pos_u = getIndex(i, j+1+shift, m);
int pos_d = getIndex(i, j-1+shift, m);

T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
}
#pragma omp parallel for collapse(2) \
private(i, j) shared(T, b, n, m, alpha)
for (i=2; i<n-1; i+=2) {
for (j=2; j<m-1; j+=2) {
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);

T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
}
/* #pragma omp parallel for shared(T, b)
for (N=1; N<n*m-1; N+=2) {
int i = N/m; int j = N%m;
int shift;
if (i%2 == 0) { shift = 1; }
else { shift = 0; }
int index = getIndex(i, j+shift, m);
int pos_l = getIndex(i-1, j+shift, m);
int pos_r = getIndex(i+1, j+shift, m);
int pos_u = getIndex(i, j+1+shift, m);
int pos_d = getIndex(i, j-1+shift, m);
T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
#pragma omp parallel for shared(T, b)
for (N=2; N<n*m-1; N+=2) {
int i = N/m; int j = N%m;
int shift;
if (i%2 == 0) { shift = 1; }
else { shift = 0; }
int index = getIndex(i, j+shift, m);
int pos_l = getIndex(i-1, j+shift, m);
int pos_r = getIndex(i+1, j+shift, m);
int pos_u = getIndex(i, j+1+shift, m);
int pos_d = getIndex(i, j-1+shift, m);
i
T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
iter++;
}
*/

// update second half of the board
#pragma omp parallel for collapse(2) \
private(i, j) shared(T, b, n, m, alpha)
for (i=1; i<n-1; i+=2) {
for(j=2; j<m-1; j+=2) {
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);
#pragma omp parallel for collapse(2) shared(T,b)
for (i=1; i<n-1; i++) {
//#pragma omp parallel for shared(T, b)
for (j=2; j<m-1; j+=2) {
int shift;
if (i%2 == 0) { shift = -1; }
else { shift = 0; }
int index = getIndex(i, j+shift, m);
int pos_l = getIndex(i-1, j+shift, m);
int pos_r = getIndex(i+1, j+shift, m);
int pos_u = getIndex(i, j+1+shift, m);
int pos_d = getIndex(i, j-1+shift, m);

T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
}
#pragma omp parallel for collapse(2) \
private(i, j) shared(T, b, n, m, alpha)
for (i=2; i<n-1; i+=2) {
for (j=1; j<m-1; j+=2) {
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);
iter ++;
}

}

void gaussSeidelParallel(double* T, double* b, int n, int m, double alpha, int itermax) {
int iter=0;
int i, j;
while (iter < itermax) {
#pragma omp parallel for shared(T,b) private(j)
for (i=1; i<n-1; i++) {
//#pragma omp parallel for shared(T, b)
for (j=1; j<m-1; j++) {
int index = getIndex(i, j, m);
int pos_l = getIndex(i, j-1, m);
int pos_r = getIndex(i, j+1, m);
int pos_u = getIndex(i+1, j, m);
int pos_d = getIndex(i-1, j, m);

T[index] = (1.0 - alpha) * T[index] +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
}
}
iter ++;
}
iter++;
}
}

int getIndex(int x, int y, int width) { return x*width + y; }
Expand All @@ -172,22 +209,22 @@ double computeNorm(double * x, int N) {

double computeResidual(double * T, double * b, int n, int m, double alpha) {
int i, j;
int index, index_left, index_right, index_up, index_down;
int index, pos_l, pos_r, pos_u, pos_d;

double residual = 0.0;
for (i=1; i<n-1; i++) {
for (j=1; j<m-1; j++) {
double r;
index = getIndex(i, j, m);
index_left = getIndex(i-1, j, m);
index_right = getIndex(i+1, j, m);
index_up = getIndex(i, j+1, m);
index_down = getIndex(i, j-1, m);
pos_l = getIndex(i-1, j, m);
pos_r = getIndex(i+1, j, m);
pos_u = getIndex(i, j+1, m);
pos_d = getIndex(i, j-1, m);

r = (-1.0*alpha*T[index]) +
((alpha/4.0) *
(T[index_left] + T[index_right] +
T[index_up] + T[index_down] )) + b[index];
(T[pos_l] + T[pos_r] +
T[pos_u] + T[pos_d] )) + b[index];
residual += r*r;
}
}
Expand Down
1 change: 1 addition & 0 deletions redblack.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ void redBlackSerial(double * T, double * b,
int n, int m, double alpha, int itermax);
void redBlackParallel(double * T, double * b,
int n, int m, double alpha, int itermax);
void gaussSeidelParallel(double* T, double* b, int n, int m, double alpha, int itermax);
int getIndex(int x, int y, int width);
double computeResidual(double * T, double * b, int n, int m, double alpha);
36 changes: 27 additions & 9 deletions redblack_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include <iostream>
#include <cmath>
#include <ctime>
#include <stdio.h>
#include <omp.h>
using namespace std;

void initArrays(double * T, double * b, int n, int m) {
Expand All @@ -17,28 +19,43 @@ void initArrays(double * T, double * b, int n, int m) {
}

int main() {
const int n = 100;
const int m = 100;
const int n = 500;
const int m = 500;
double alpha = 0.9;
double * T = new double[n*m];
double * b = new double[n*m];
initArrays(T, b, n, m);

/* see if threads are utilized properly */
int tid, nthreads;
#pragma omp parallel shared(nthreads) private(tid)
{
tid = omp_get_thread_num();
if (tid == 0) { nthreads = omp_get_num_threads(); }
#pragma omp barrier
printf("hello from thread %d of %d!\n", tid, nthreads);
}

double cc = 99.0;
int iter = 0;
cout << "Starting without parallel!\n";
clock_t start;
double t1, t2;
double duration;
start = clock();

t1 = omp_get_wtime();
while (cc > 1e-5) {
redBlackSerial(T, b, n, m, alpha, 1);
cc = computeResidual(T, b, n, m, alpha);
if (iter%1000==0) {
cout << "Iteration: " << iter << ", Res: " << cc << "\n";
}
iter ++;

//remove this
cc=1e-6;
}
duration = (clock() - start) / (double) CLOCKS_PER_SEC;
t2 = omp_get_wtime();
duration = (t2 - t1); /// (double) CLOCKS_PER_SEC;
cout << "Non-parallel: " << duration << " seconds.\n";

FILE * output = fopen("T_serial.txt", "w");
Expand All @@ -48,22 +65,23 @@ int main() {
}
}
fclose(output);

initArrays(T, b, n, m);

cc = 99.0;
iter = 0;
cout << "Starting with parallel!\n";
start = clock();
t1 = omp_get_wtime();
while (cc > 1e-5) {
redBlackParallel(T, b, n, m, alpha, 1);
//redBlackParallel(T, b, n, m, alpha, 1);
gaussSeidelParallel(T, b, n, m, alpha, 1);
cc = computeResidual(T, b, n, m, alpha);
if (iter%1000==0) {
cout << "Iteration: " << iter << ", Res: " << cc << "\n";
}
iter ++;
}
duration = (clock() - start) / (double) CLOCKS_PER_SEC;
t2 = omp_get_wtime();
duration = (t2 - t1);// / (double) CLOCKS_PER_SEC;
cout << "Parallel: " << duration << " seconds.\n";

output = fopen("T_parallel.txt", "w");
Expand Down

0 comments on commit ea9c372

Please sign in to comment.