Skip to content

Commit

Permalink
Exercise A solution in C++
Browse files Browse the repository at this point in the history
  • Loading branch information
tomdeakin committed Sep 2, 2013
1 parent c336d50 commit 8c45e3e
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Solutions/Exercise07/Cpp/mult
Solutions/Exercise08/Cpp/mult
Solutions/Exercise09/Cpp/pi_ocl
Solutions/Exercise13/Cpp/gameoflife
Solutions/ExerciseA/Cpp/pi_vocl


*.plist
Expand Down
36 changes: 36 additions & 0 deletions Solutions/ExerciseA/Cpp/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

ifndef CPPC
CPPC=g++
endif

CPP_COMMON = ../../Cpp_common

CCFLAGS=-std=c++11 -std=gnu++11

INC = -I $(CPP_COMMON)

LIBS = -lOpenCL -lrt

# Change this variable to specify the device type
# to the OpenCL device type of choice. You can also
# edit the variable in the source.
ifndef DEVICE
DEVICE = CL_DEVICE_TYPE_DEFAULT
endif

# Check our platform and make sure we define the APPLE variable
# and set up the right compiler flags and libraries
PLATFORM = $(shell uname -s)
ifeq ($(PLATFORM), Darwin)
CCFLAGS += -DAPPLE
LIBS = -framework OpenCL
endif

CCFLAGS += -D DEVICE=$(DEVICE)

pi_vocl: pi_vocl.cpp $(CPP_COMMON)/err_code.c
$(CPPC) $^ $(INC) $(CCFLAGS) $(LIBS) -o $@


clean:
rm -f pi_vocl
166 changes: 166 additions & 0 deletions Solutions/ExerciseA/Cpp/pi_vocl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
//
// Pi reduction - vectorized
//
// Numeric integration to estimate pi
// Asks the user to select a device at runtime
// Vector size must be present as a CLI argument
//
// History: C version written by Tim Mattson, May 2010
// Ported to the C++ Wrapper API by Benedict R. Gaster, September 2011
// C++ version Updated by Tom Deakin and Simon McIntosh-Smith, October 2012
// Updated by Tom Deakin, September 2013
//

#define __CL_ENABLE_EXCEPTIONS

#include "cl.hpp"
#include "util.hpp"

#include <vector>
#include <iostream>
#include <fstream>

//pick up device type from compiler command line or from
//the default type
#ifndef DEVICE
#define DEVICE CL_DEVICE_TYPE_DEFAULT
#endif

char* err_code(cl_int);

#define INSTEPS (512*512*512)

int main(int argc, char** argv)
{
if (argc != 2)
{
std::cout << "Usage:./pi_vocl num\n"
<< "\twhere num = 1, 4 or 8\n";
return EXIT_FAILURE;
}

int vector_size = atoi(argv[1]);

// Define some vector size specific constants
unsigned int ITERS, WGS;
if (vector_size == 1)
{
ITERS = 262144;
WGS = 8;
}
else if (vector_size == 4)
{
ITERS = 262144 / 4;
WGS = 32;
}
else if (vector_size == 8)
{
ITERS = 262144 / 8;
WGS = 64;
}
else
{
std::cerr << "Invalid vector size\n";
return EXIT_FAILURE;
}

// Set some default values:
// Default number of steps (updated later to device preferable)
unsigned int in_nsteps = INSTEPS;
// Defaultl number of iterations
unsigned int niters = ITERS;
unsigned int work_group_size = WGS;

try
{
// Create context, queue and build program
cl::Context context(DEVICE);
cl::CommandQueue queue(context);
cl::Program program(context, util::loadProgram("../pi_vocl.cl"), true);
cl::Kernel kernel;

// Now that we know the size of the work_groups, we can set the number of work
// groups, the actual number of steps, and the step size
unsigned int nwork_groups = in_nsteps/(work_group_size*niters);

// Get the max work group size for the kernel pi on our device
unsigned int max_size;
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
if (vector_size == 1)
{
kernel = cl::Kernel(program, "pi");
max_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(devices[0]);
}
else if (vector_size == 4)
{
kernel = cl::Kernel(program, "pi_vec4");
max_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(devices[0]);
}
else if (vector_size == 8)
{
kernel = cl::Kernel(program, "pi_vec8");
max_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(devices[0]);
}

if (max_size > work_group_size)
{
work_group_size = max_size;
nwork_groups = in_nsteps/(nwork_groups*niters);
}

if (nwork_groups < 1)
{
nwork_groups = devices[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
work_group_size = in_nsteps/(nwork_groups*niters);
}

unsigned int nsteps = work_group_size * niters * nwork_groups;
float step_size = 1.0f / (float) nsteps;

// Vector to hold partial sum
std::vector<float> h_psum(nwork_groups);

std::cout << nwork_groups << " work groups of size " << work_group_size << ".\n"
<< nsteps << " Integration steps\n";

cl::Buffer d_partial_sums(context, CL_MEM_WRITE_ONLY, sizeof(float) * nwork_groups);

// Start the timer
util::Timer timer;

// Execute the kernel over the entire range of our 1d input data et
// using the maximum number of work group items for this device
cl::NDRange global(nwork_groups * work_group_size);
cl::NDRange local(work_group_size);

kernel.setArg(0, niters);
kernel.setArg(1, step_size);
cl::LocalSpaceArg localmem = cl::Local(sizeof(float) * work_group_size);
kernel.setArg(2, localmem);
kernel.setArg(3, d_partial_sums);
queue.enqueueNDRangeKernel(kernel, cl::NullRange, global, local);

cl::copy(queue, d_partial_sums, begin(h_psum), end(h_psum));

// Complete the sum and compute the final integral value
float pi_res = 0.0;
for (float x : h_psum)
pi_res += x;
pi_res *= step_size;

// Stop the timer
double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.;
std::cout << "The calculation ran in " << rtime << " seconds\n"
<< " pi = " << pi_res << " for " << nsteps << " steps\n";

return EXIT_SUCCESS;


}
catch (cl::Error err)
{
std::cerr << "ERROR: " << err.what() << ":\n";
err_code(err.err());
return EXIT_FAILURE;
}
}

0 comments on commit 8c45e3e

Please sign in to comment.