Skip to content

Commit

Permalink
Adding StreamTriad and Stencil examples for OpenMP
Browse files Browse the repository at this point in the history
  • Loading branch information
brobey committed Nov 3, 2019
1 parent 3e06db0 commit 35b3c79
Show file tree
Hide file tree
Showing 26 changed files with 514 additions and 76 deletions.
6 changes: 3 additions & 3 deletions OpenACC/ShallowWater/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ if (NOT CMAKE_OPENACC_VERBOSE)
endif (NOT CMAKE_OPENACC_VERBOSE)

if (CMAKE_C_COMPILER_ID MATCHES "PGI")
set(C_FLAGS "${C_FLAGS} -alias=ansi")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -alias=ansi")
elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(C_FLAGS "${C_FLAGS} -fstrict-aliasing")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing")
endif (CMAKE_C_COMPILER_ID MATCHES "PGI")

find_package(OpenACC)
Expand Down Expand Up @@ -53,4 +53,4 @@ set_target_properties(ShallowWater_par4 PROPERTIES LINK_FLAGS "${OpenACC_C_FLAGS

# Cleanup
add_custom_target(distclean COMMAND rm -rf CMakeCache.txt CMakeFiles
Makefile cmake_install.cmake StreamTriad.dSYM ipo_out.optrpt)
Makefile cmake_install.cmake ShallowWater.dSYM ipo_out.optrpt)
7 changes: 3 additions & 4 deletions OpenACC/Stencil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,15 @@ if (NOT CMAKE_OPENACC_VERBOSE)
endif (NOT CMAKE_OPENACC_VERBOSE)

if (CMAKE_C_COMPILER_ID MATCHES "PGI")
set(C_FLAGS "${C_FLAGS} -alias=ansi")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -alias=ansi")
elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(C_FLAGS "${C_FLAGS} -fstrict-aliasing")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing")
endif (CMAKE_C_COMPILER_ID MATCHES "PGI")

find_package(OpenACC)

if (CMAKE_C_COMPILER_ID MATCHES "PGI")
#set(OpenACC_C_VERBOSE "${OpenACC_C_VERBOSE} -Minfo=all,ccff")
set(OpenACC_C_VERBOSE "${OpenACC_C_VERBOSE} -Minfo=accel -ta=nvidia,time")
set(OpenACC_C_VERBOSE "${OpenACC_C_VERBOSE} -Minfo=accel")
elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(OpenACC_C_VERBOSE "${OpenACC_C_VERBOSE} -fopt-info-optimized-omp")
endif (CMAKE_C_COMPILER_ID MATCHES "PGI")
Expand Down
5 changes: 2 additions & 3 deletions OpenACC/StreamTriad/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@ if (NOT CMAKE_OPENACC_VERBOSE)
endif (NOT CMAKE_OPENACC_VERBOSE)

if (CMAKE_C_COMPILER_ID MATCHES "PGI")
set(C_FLAGS "${C_FLAGS} -alias=ansi")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -alias=ansi")
elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(C_FLAGS "${C_FLAGS} -fstrict-aliasing")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing")
endif (CMAKE_C_COMPILER_ID MATCHES "PGI")

find_package(OpenACC)

if (CMAKE_C_COMPILER_ID MATCHES "PGI")
#set(OpenACC_C_VERBOSE "${OpenACC_C_VERBOSE} -Minfo=all,ccff")
set(OpenACC_C_VERBOSE "${OpenACC_C_VERBOSE} -Minfo=accel")
elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(OpenACC_C_VERBOSE "${OpenACC_C_VERBOSE} -fopt-info-optimized-omp")
Expand Down
16 changes: 8 additions & 8 deletions OpenACC/mass_sum/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.10)
project (StreamTriad)
project (mass_sum)

set (CMAKE_C_STANDARD 99)

Expand All @@ -9,6 +9,12 @@ if (NOT CMAKE_OPENACC_VERBOSE)
set(CMAKE_OPENACC_VERBOSE true)
endif (NOT CMAKE_OPENACC_VERBOSE)

if (CMAKE_C_COMPILER_ID MATCHES "PGI")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -alias=ansi")
elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing")
endif (CMAKE_C_COMPILER_ID MATCHES "PGI")

find_package(OpenACC)

if (CMAKE_C_COMPILER_ID MATCHES "PGI")
Expand All @@ -18,12 +24,6 @@ elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(OpenACC_C_VERBOSE "${OpenACC_C_VERBOSE} -fopt-info-optimized-omp")
endif (CMAKE_C_COMPILER_ID MATCHES "PGI")

if (CMAKE_C_COMPILER_ID MATCHES "PGI")
set(OpenACC_C_FLAGS "${OpenACC_C_FLAGS} -alias=ansi")
elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(OpenACC_C_FLAGS "${OpenACC_C_FLAGS} -fstrict-aliasing")
endif (CMAKE_C_COMPILER_ID MATCHES "PGI")

if (CMAKE_OPENACC_VERBOSE)
set(OpenACC_C_FLAGS "${OpenACC_C_FLAGS} ${OpenACC_C_VERBOSE}")
endif (CMAKE_OPENACC_VERBOSE)
Expand All @@ -35,4 +35,4 @@ set_target_properties(mass_sum PROPERTIES LINK_FLAGS "${OpenACC_C_FLAGS}")

# Cleanup
add_custom_target(distclean COMMAND rm -rf CMakeCache.txt CMakeFiles
Makefile cmake_install.cmake StreamTriad.dSYM ipo_out.optrpt mass_sum.o main.o)
Makefile cmake_install.cmake mass_sum.dSYM ipo_out.optrpt mass_sum.o main.o)
69 changes: 69 additions & 0 deletions OpenMP/Stencil/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
cmake_minimum_required(VERSION 2.8)
project (Stencil)

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")

set (CMAKE_C_STANDARD 99)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O3")

if (NOT CMAKE_OPENMPACCEL_VERBOSE)
set(CMAKE_OPENMPACCEL_VERBOSE true)
endif (NOT CMAKE_OPENMPACCEL_VERBOSE)

if (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing")
elseif (CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing")
elseif (CMAKE_C_COMPILER_ID MATCHES "XL")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -qalias=ansi")
elseif (CMAKE_C_COMPILER_ID MATCHES "Cray")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -h restrict=a")
endif (CMAKE_C_COMPILER_ID MATCHES "GNU")

find_package(CUDA REQUIRED)
message("CUDA include ${CUDA_INCLUDE_DIRS}")
find_package(OpenMPAccel)

if (CMAKE_C_COMPILER_ID MATCHES "XL")
set(OpenMPAccel_C_FLAGS "${OpenMPAccel_C_FLAGS} -qreport")
elseif (CMAKE_C_COMPILER_ID MATCHES "GNU")
set(OpenMPAccel_C_FLAGS "${OpenMPAccel_C_FLAGS} -fopt-info-omp")
endif (CMAKE_C_COMPILER_ID MATCHES "XL")

if (CMAKE_OPENMPACCEL_VERBOSE)
set(OpenACC_C_FLAGS "${OpenACC_C_FLAGS} ${OpenACC_C_VERBOSE}")
endif (CMAKE_OPENMPACCEL_VERBOSE)

# Adds build target of stream_triad with source code files
add_executable(Stencil Stencil.c malloc2D.c malloc2D.h timer.c timer.h)

# Adds build target of stream_triad_par1 with source code files
add_executable(Stencil_par1 Stencil_par1.c malloc2D.c malloc2D.h timer.c timer.h)
set_target_properties(Stencil_par1 PROPERTIES COMPILE_FLAGS ${OpenMPAccel_C_FLAGS})
set_target_properties(Stencil_par1 PROPERTIES LINK_FLAGS "${OpenMPAccel_C_FLAGS}")

# Adds build target of stream_triad_par2 with source code files
add_executable(Stencil_par2 Stencil_par2.c malloc2D.c malloc2D.h timer.c timer.h)
set_target_properties(Stencil_par2 PROPERTIES COMPILE_FLAGS ${OpenMPAccel_C_FLAGS})
set_target_properties(Stencil_par2 PROPERTIES LINK_FLAGS "${OpenMPAccel_C_FLAGS}")

## Adds build target of stream_triad_par3 with source code files
#add_executable(Stencil_par3 Stencil_par3.c malloc2D.c malloc2D.h timer.c timer.h)
#set_target_properties(Stencil_par3 PROPERTIES COMPILE_FLAGS ${OpenMPAccel_C_FLAGS})
#set_target_properties(Stencil_par3 PROPERTIES LINK_FLAGS "${OpenMPAccel_C_FLAGS}")
#
## Adds build target of stream_triad_par4 with source code files
#add_executable(Stencil_par4 Stencil_par4.c malloc2D.c malloc2D.h timer.c timer.h)
#set_target_properties(Stencil_par4 PROPERTIES COMPILE_FLAGS ${OpenMPAccel_C_FLAGS})
#set_target_properties(Stencil_par4 PROPERTIES LINK_FLAGS "${OpenMPAccel_C_FLAGS}")
#
## Adds build target of stream_triad_par5 with source code files
#add_executable(Stencil_par5 Stencil_par5.c malloc2D.c malloc2D.h timer.c timer.h)
#set_target_properties(Stencil_par5 PROPERTIES COMPILE_FLAGS "${OpenMPAccel_C_FLAGS}")
#target_include_directories(Stencil_par5 PRIVATE "${CUDA_INCLUDE_DIRS}")
#set_target_properties(Stencil_par5 PROPERTIES LINK_FLAGS "${OpenMPAccel_C_FLAGS} ${CUDA_C_FLAGS}")

# Cleanup
add_custom_target(distclean COMMAND rm -rf CMakeCache.txt CMakeFiles
Makefile cmake_install.cmake Stencil.dSYM ipo_out.optrpt)
24 changes: 24 additions & 0 deletions OpenMP/Stencil/Makefile.simple.xl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
default: StreamTriad StreamTriad_par1 StreamTriad_par2
all: StreamTriad StreamTriad_par1 StreamTriad_par2

CFLAGS:=-qthreaded -g -O3 -std=gnu99 -qalias=ansi -qhot -qsmp=omp -qoffload -qreport

%.o: %.c
${CC} ${CFLAGS} -c $^

StreamTriad: StreamTriad.o timer.o
${CC} ${CFLAGS} $^ -o StreamTriad

StreamTriad_par1: StreamTriad_par1.o timer.o
${CC} ${CFLAGS} $^ -o StreamTriad_par1

StreamTriad_par2: StreamTriad_par2.o timer.o
${CC} ${CFLAGS} $^ -o StreamTriad_par2

clean:
rm -f StreamTriad StreamTriad.o
rm -f StreamTriad_par1 StreamTriad_par1.o
rm -f StreamTriad_par2 StreamTriad_par2.o

distclean:
rm -f Makefile
55 changes: 55 additions & 0 deletions OpenMP/Stencil/Stencil.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#include "malloc2D.h"
#include "timer.h"

#define SWAP_PTR(xnew,xold,xtmp) (xtmp=xnew, xnew=xold, xold=xtmp)

int main(int argc, char *argv[])
{
struct timespec tstart_cpu, tstop_cpu;
double cpu_time;
int imax=2002, jmax = 2002;
int niter=1000, nburst=100;

double** restrict xtmp;
double** restrict x = malloc2D(jmax, imax);
double** restrict xnew = malloc2D(jmax, imax);

for (int j = 0; j < jmax; j++){
for (int i = 0; i < imax; i++){
xnew[j][i] = 0.0;
x[j][i] = 5.0;
}
}

for (int j = jmax/2 - 5; j < jmax/2 + 5; j++){
for (int i = imax/2 - 5; i < imax/2 -1; i++){
x[j][i] = 400.0;
}
}

for (int iter = 0; iter < niter; iter+=nburst){

for (int ib = 0; ib < nburst; ib++){
cpu_timer_start(&tstart_cpu);
for (int j = 1; j < jmax-1; j++){
for (int i = 1; i < imax-1; i++){
xnew[j][i] = ( x[j][i] + x[j][i-1] + x[j][i+1] + x[j-1][i] + x[j+1][i] )/5.0;
}
}
cpu_time += cpu_timer_stop(tstart_cpu);

SWAP_PTR(xnew, x, xtmp);
}

printf("Iter %d\n",iter+nburst);
}

free(x);
free(xnew);

printf("Timing is %f\n",cpu_time);
}
61 changes: 61 additions & 0 deletions OpenMP/Stencil/Stencil_par1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#include "malloc2D.h"
#include "timer.h"

#define SWAP_PTR(xnew,xold,xtmp) (xtmp=xnew, xnew=xold, xold=xtmp)

int main(int argc, char *argv[])
{
struct timespec tstart_cpu, tstop_cpu;
double cpu_time;
int imax=2002, jmax = 2002;
int niter=1000, nburst=100;

double** restrict xtmp;
double** restrict x = malloc2D(jmax, imax);
double** restrict xnew = malloc2D(jmax, imax);

#pragma omp target teams distribute parallel for simd \
map(x[0:jmax][0:imax], xnew[0:jmax][0:imax])
for (int j = 0; j < jmax; j++){
for (int i = 0; i < imax; i++){
xnew[j][i] = 0.0;
x[j][i] = 5.0;
}
}

#pragma omp target teams distribute parallel for simd \
map(x[0:jmax][0:imax], xnew[0:jmax][0:imax])
for (int j = jmax/2 - 5; j < jmax/2 + 5; j++){
for (int i = imax/2 - 5; i < imax/2 -1; i++){
x[j][i] = 400.0;
}
}

for (int iter = 0; iter < niter; iter+=nburst){

for (int ib = 0; ib < nburst; ib++){
cpu_timer_start(&tstart_cpu);
#pragma omp target teams distribute parallel for simd \
map(x[0:jmax][0:imax], xnew[0:jmax][0:imax])
for (int j = 1; j < jmax-1; j++){
for (int i = 1; i < imax-1; i++){
xnew[j][i] = ( x[j][i] + x[j][i-1] + x[j][i+1] + x[j-1][i] + x[j+1][i] )/5.0;
}
}
cpu_time += cpu_timer_stop(tstart_cpu);

SWAP_PTR(xnew, x, xtmp);
}

printf("Iter %d\n",iter+nburst);
}

free(x);
free(xnew);

printf("Timing is %f\n",cpu_time);
}
62 changes: 62 additions & 0 deletions OpenMP/Stencil/Stencil_par2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#include "malloc2D.h"
#include "timer.h"

#define SWAP_PTR(xnew,xold,xtmp) (xtmp=xnew, xnew=xold, xold=xtmp)

int main(int argc, char *argv[])
{
struct timespec tstart_cpu, tstop_cpu;
double cpu_time;
int imax=2002, jmax = 2002;
int niter=1000, nburst=100;

double** restrict xtmp;
double** restrict x = malloc2D(jmax, imax);
double** restrict xnew = malloc2D(jmax, imax);

#pragma omp target enter data map(:x[0:jmax][0:imax], xnew[0:jmax][0:imax])

#pragma omp target teams distribute parallel for simd
for (int j = 0; j < jmax; j++){
for (int i = 0; i < imax; i++){
xnew[j][i] = 0.0;
x[j][i] = 5.0;
}
}

#pragma omp target teams distribute parallel for simd
for (int j = jmax/2 - 5; j < jmax/2 + 5; j++){
for (int i = imax/2 - 5; i < imax/2 -1; i++){
x[j][i] = 400.0;
}
}

for (int iter = 0; iter < niter; iter+=nburst){

for (int ib = 0; ib < nburst; ib++){
cpu_timer_start(&tstart_cpu);
#pragma omp target teams distribute parallel for simd
for (int j = 1; j < jmax-1; j++){
for (int i = 1; i < imax-1; i++){
xnew[j][i] = ( x[j][i] + x[j][i-1] + x[j][i+1] + x[j-1][i] + x[j+1][i] )/5.0;
}
}
cpu_time += cpu_timer_stop(tstart_cpu);

SWAP_PTR(xnew, x, xtmp);
}

printf("Iter %d\n",iter+nburst);
}

#pragma omp target exit data map(from:x[0:jmax][0:imax], xnew[0:jmax][0:imax])

free(x);
free(xnew);

printf("Timing is %f\n",cpu_time);
}
Loading

0 comments on commit 35b3c79

Please sign in to comment.