Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
261 changes: 254 additions & 7 deletions src/AtomicMacro.hh
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#ifndef AtomicMacro_HH_
#define AtomicMacro_HH_

#define USE_MACRO_FUNCTIONS 1

//Determine which atomics to use based on platform being compiled for
//
//If compiling with CUDA
Expand All @@ -8,26 +13,260 @@
#define USE_OPENMP_ATOMICS
#endif

#ifdef HAVE_SYCL
#include <CL/sycl.hpp>
#include <cstdint>
#endif

// --------------------------------------------------
// Original Names -> Inline function names
// --------------------------------------------------
// ATOMIC_WRITE( x, v ) -> ATOMIC_WRITE
// ATOMIC_UPDATE( x ) -> ATOMIC_INCREMENT
// ATOMIC_ADD( x, v ) -> ATOMIC_ADD
// ATOMIC_CAPTURE( x, v, p ) -> ATOMIC_FETCH_ADD
// --------------------------------------------------

#if defined (USE_MACRO_FUNCTIONS)

#define ATOMIC_CAPTURE( x, v, p ) ATOMIC_FETCH_ADD((x),(v),(p))
#define ATOMIC_UPDATE( x ) ATOMIC_INCREMENT((x))

#if defined(HAVE_SYCL)

static const cl::sycl::memory_order order = cl::sycl::memory_order::relaxed;
static const cl::sycl::access::address_space space = cl::sycl::access::address_space::global_space;

template <typename T>
inline void ATOMIC_WRITE(T & x, T v) {
//x = v;
}

template <typename T>
inline void ATOMIC_INCREMENT(T& x) {
//atomicAdd( &x, 1 );
T one{1};
cl::sycl::atomic<T, space> y( (cl::sycl::multi_ptr<T, space>(&x)));
cl::sycl::atomic_fetch_add(y, one, order);
}

template <typename T>
inline void ATOMIC_ADD(T& x, T v) {
//atomicAdd( &x, v );
cl::sycl::atomic<T, space> y( (cl::sycl::multi_ptr<T, space>(&x)));
cl::sycl::atomic_fetch_add(y, v, order);
}

template <>
inline void ATOMIC_ADD(double& x, double v) {
static_assert(sizeof(double) == sizeof(uint64_t), "Unsafe: double is not 64-bits");
//atomicAdd( &x, v );
cl::sycl::atomic<uint64_t, space> t( (cl::sycl::multi_ptr<uint64_t, space>( reinterpret_cast<uint64_t*>(&x)) ));
uint64_t old_i = t.load(order);
double old_d;
do {
old_d = *reinterpret_cast<const double*>(&old_i);
const double new_d = old_d + v;
const uint64_t new_i = *reinterpret_cast<const uint64_t *>(&new_d);
if (t.compare_exchange_strong(old_i, new_i, order)) break;
} while (true);
// p = old_d;
}

template <typename T1, typename T2>
inline void ATOMIC_ADD(T1& x, T2 v) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
//atomicAdd( &x, v );
T1 val = static_cast<T2>(v);
cl::sycl::atomic<T1, space> y( (cl::sycl::multi_ptr<T1, space>(&x)));
cl::sycl::atomic_fetch_add(y, val, order);
}

template <typename T>
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
//p = atomicAdd( &x, v );
cl::sycl::atomic<T, space> y( (cl::sycl::multi_ptr<T, space>(&x)));
p = cl::sycl::atomic_fetch_add(y, v, order);
}

template <typename T1, typename T2>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
//p = atomicAdd( &x, v );
cl::sycl::atomic<T1, space> y( (cl::sycl::multi_ptr<T1, space>(&x)));
T1 val = static_cast<T2>(v);
p = cl::sycl::atomic_fetch_add(y, val, order);
}

template <typename T1, typename T2, typename T3>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
//p = atomicAdd( &x, v );
T1 val = static_cast<T2>(v);
cl::sycl::atomic<T1, space> y( (cl::sycl::multi_ptr<T1, space>(&x)));
p = cl::sycl::atomic_fetch_add(y, val, order);
}

#elif defined(HAVE_CUDA) && defined(__CUDA_ARCH__)

template <typename T>
inline void ATOMIC_WRITE(T & x, T v) {
x = v;
}

template <typename T>
inline void ATOMIC_INCREMENT(T& x) {
atomicAdd( &x, 1 );
}

template <typename T>
inline void ATOMIC_ADD(T& x, T v) {
atomicAdd( &x, v );
}

template <typename T1, typename T2>
inline void ATOMIC_ADD(T1& x, T2 v) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
atomicAdd( &x, v );
}

template <typename T>
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
p = atomicAdd( &x, v );
}

template <typename T1, typename T2>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
p = atomicAdd( &x, v );
}

template <typename T1, typename T2, typename T3>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
p = atomicAdd( &x, v );
}

#elif defined(USE_OPENMP_ATOMICS)

template <typename T>
inline void ATOMIC_WRITE(T & x, T v) {
_Pragma("omp atomic write")
x = v;
}

template <typename T>
inline void ATOMIC_INCREMENT(T& x) {
_Pragma("omp atomic update")
x++;
}

template <typename T>
inline void ATOMIC_ADD(T& x, T v) {
_Pragma("omp atomic")
x += v;
}

template <typename T1, typename T2>
inline void ATOMIC_ADD(T1& x, T2 v) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
_Pragma("omp atomic")
x += v;
}

template <typename T>
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
_Pragma("omp atomic capture")
{p = x; x = x + v;}
}

#if defined (HAVE_CUDA)
template <typename T1, typename T2>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
_Pragma("omp atomic capture")
{p = x; x = x + v;}
}

template <typename T1, typename T2, typename T3>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
_Pragma("omp atomic capture")
{p = x; x = x + v;}
}

#else // SEQUENTIAL

template <typename T>
inline void ATOMIC_WRITE(T & x, T v) {
x = v;
}

template <typename T>
inline void ATOMIC_INCREMENT(T& x) {
x++;
}

template <typename T>
inline void ATOMIC_ADD(T& x, T v) {
x += v;
}

template <typename T1, typename T2>
inline void ATOMIC_ADD(T1& x, T2 v) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
x += v;
}

template <typename T>
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
{p = x; x = x + v;}
}

template <typename T1, typename T2>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
{p = x; x = x + v;}
}

template <typename T1, typename T2, typename T3>
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
{p = x; x = x + v;}
}

#endif // BACKENDS

#else // ! USE_MACRO_FUNCTIONS

#if defined (HAVE_SYCL)

#error You must define USE_MACRO_FUNCTIONS with SYCL!

#elif defined (HAVE_CUDA)

//If in a CUDA GPU section use the CUDA atomics
#ifdef __CUDA_ARCH__

//Currently not atomic here. But its only used when it does not necissarially need to be atomic.
#define ATOMIC_WRITE( x, v ) \
x = v;
x = v;

#define ATOMIC_ADD( x, v ) \
atomicAdd( &x, v );

#define ATOMIC_UPDATE( x ) \
atomicAdd( &x, 1 );

#define ATOMIC_CAPTURE( x, v, p ) \
p = atomicAdd( &x, v );

//If in a CPU OpenMP section use the OpenMP atomics
#elif defined (USE_OPENMP_ATOMICS)

#define ATOMIC_WRITE( x, v ) \
_Pragma("omp atomic write") \
x = v;
Expand All @@ -46,6 +285,7 @@

//If in a serial section, no need to use atomics
#else

#define ATOMIC_WRITE( x, v ) \
x = v;

Expand All @@ -62,6 +302,7 @@

//If in a OpenMP section use the OpenMP atomics
#elif defined (USE_OPENMP_ATOMICS)

#define ATOMIC_WRITE( x, v ) \
_Pragma("omp atomic write") \
x = v;
Expand All @@ -74,12 +315,13 @@
_Pragma("omp atomic update") \
x++;

#define ATOMIC_CAPTURE( x, v, p ) \
_Pragma("omp atomic capture") \
{p = x; x = x + v;}
#define ATOMIC_CAPTURE( x, v, p ) \
_Pragma("omp atomic capture") \
{p = x; x = x + v;}

//If in a serial section, no need to use atomics
#else

#define ATOMIC_WRITE( x, v ) \
x = v;

Expand All @@ -91,4 +333,9 @@

#define ATOMIC_CAPTURE( x, v, p ) \
{p = x; x = x + v;}
#endif

#endif // BACKENDS

#endif // USE_MACRO_FUNCTIONS

#endif // AtomicMacro_HH_
27 changes: 19 additions & 8 deletions src/CollisionEvent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "PhysicalConstants.hh"
#include "DeclareMacro.hh"
#include "AtomicMacro.hh"
#include "mathHelp.hh"
#include "cudaUtils.hh"

#define MAX_PRODUCTION_SIZE 4

Expand All @@ -21,32 +23,31 @@
// Return true if the particle will continue.
//----------------------------------------------------------------------------------------------------------------------

HOST_DEVICE
HOST_DEVICE SYCL_EXTERNAL
void updateTrajectory( double energy, double angle, MC_Particle& particle )
{
particle.kinetic_energy = energy;
double cosTheta = angle;
double randomNumber = rngSample(&particle.random_number_seed);
double phi = 2 * 3.14159265 * randomNumber;
double sinPhi = sin(phi);
double cosPhi = cos(phi);
double sinTheta = sqrt((1.0 - (cosTheta*cosTheta)));
double sinPhi = SIN(phi);
double cosPhi = COS(phi);
double sinTheta = SQRT((1.0 - (cosTheta*cosTheta)));
particle.direction_cosine.Rotate3DVector(sinTheta, cosTheta, sinPhi, cosPhi);
double speed = (PhysicalConstants::_speedOfLight *
sqrt((1.0 - ((PhysicalConstants::_neutronRestMassEnergy *
SQRT((1.0 - ((PhysicalConstants::_neutronRestMassEnergy *
PhysicalConstants::_neutronRestMassEnergy) /
((energy + PhysicalConstants::_neutronRestMassEnergy) *
(energy + PhysicalConstants::_neutronRestMassEnergy))))));
particle.velocity.x = speed * particle.direction_cosine.alpha;
particle.velocity.y = speed * particle.direction_cosine.beta;
particle.velocity.z = speed * particle.direction_cosine.gamma;
randomNumber = rngSample(&particle.random_number_seed);
particle.num_mean_free_paths = -1.0*log(randomNumber);
particle.num_mean_free_paths = -1.0*LOG(randomNumber);
}
HOST_DEVICE_END

HOST_DEVICE

HOST_DEVICE SYCL_EXTERNAL
bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int tally_index)
{
const MC_Cell_State &cell = monteCarlo->domain[mc_particle.domain].cell_state[mc_particle.cell];
Expand Down Expand Up @@ -116,8 +117,18 @@ bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned i
ATOMIC_ADD( monteCarlo->_tallies->_balanceTask[tally_index]._produce, nOut);
break;
case NuclearDataReaction::Undefined:
{
#ifdef HAVE_SYCL
#if HAVE_SYCL_PRINTF
static const OPENCL_CONSTANT char format[] = "reactionType invalid\n";
syclx::printf(format);
#endif
#else
printf("reactionType invalid\n");
#endif
qs_assert(false);
break;
}
}

if( nOut == 0 ) return false;
Expand Down
2 changes: 1 addition & 1 deletion src/CollisionEvent.hh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class MonteCarlo;
class MC_Particle;

#include "DeclareMacro.hh"
HOST_DEVICE
HOST_DEVICE SYCL_EXTERNAL
bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int tally_index );
HOST_DEVICE_END

Expand Down
Loading