llnl · jeffhammond · May 27, 2020 · Jun 3, 2020 · Jun 5, 2020 · Jun 27, 2020
diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh
@@ -1,3 +1,8 @@
+#ifndef AtomicMacro_HH_
+#define AtomicMacro_HH_
+
+#define USE_MACRO_FUNCTIONS 1
+
 //Determine which atomics to use based on platform being compiled for
 //
 //If compiling with CUDA
@@ -8,26 +13,260 @@
     #define USE_OPENMP_ATOMICS
 #endif
 
+#ifdef HAVE_SYCL
+#include <CL/sycl.hpp>
+#include <cstdint>
+#endif
+
+// --------------------------------------------------
+// Original Names            -> Inline function names
+// --------------------------------------------------
+// ATOMIC_WRITE( x, v )      -> ATOMIC_WRITE
+// ATOMIC_UPDATE( x )        -> ATOMIC_INCREMENT
+// ATOMIC_ADD( x, v )        -> ATOMIC_ADD
+// ATOMIC_CAPTURE( x, v, p ) -> ATOMIC_FETCH_ADD
+// --------------------------------------------------
+
+#if defined (USE_MACRO_FUNCTIONS)
+
+#define ATOMIC_CAPTURE( x, v, p )  ATOMIC_FETCH_ADD((x),(v),(p))
+#define ATOMIC_UPDATE( x )         ATOMIC_INCREMENT((x))
+
+#if defined(HAVE_SYCL)
+
+static const cl::sycl::memory_order          order = cl::sycl::memory_order::relaxed;
+static const cl::sycl::access::address_space space = cl::sycl::access::address_space::global_space;
+
+template <typename T>
+inline void ATOMIC_WRITE(T & x, T v) {
+    //x = v;
+}
+
+template <typename T>
+inline void ATOMIC_INCREMENT(T& x) {
+    //atomicAdd( &x, 1 );
+    T one{1};
+    cl::sycl::atomic<T, space> y( (cl::sycl::multi_ptr<T, space>(&x)));
+    cl::sycl::atomic_fetch_add(y, one, order);
+}
+
+template <typename T>
+inline void ATOMIC_ADD(T& x, T v) {
+    //atomicAdd( &x, v );
+    cl::sycl::atomic<T, space> y( (cl::sycl::multi_ptr<T, space>(&x)));
+    cl::sycl::atomic_fetch_add(y, v, order);
+}
+
+template <>
+inline void ATOMIC_ADD(double& x, double v) {
+    static_assert(sizeof(double) == sizeof(uint64_t), "Unsafe: double is not 64-bits");
+    //atomicAdd( &x, v );
+    cl::sycl::atomic<uint64_t, space> t( (cl::sycl::multi_ptr<uint64_t, space>( reinterpret_cast<uint64_t*>(&x)) ));
+    uint64_t old_i = t.load(order);
+    double   old_d;
+    do {
+      old_d = *reinterpret_cast<const double*>(&old_i);
+      const double   new_d = old_d + v;
+      const uint64_t new_i = *reinterpret_cast<const uint64_t *>(&new_d);
+      if (t.compare_exchange_strong(old_i, new_i, order)) break;
+    } while (true);
+    // p = old_d;
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_ADD(T1& x, T2 v) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    //atomicAdd( &x, v );
+    T1 val = static_cast<T2>(v);
+    cl::sycl::atomic<T1, space> y( (cl::sycl::multi_ptr<T1, space>(&x)));
+    cl::sycl::atomic_fetch_add(y, val, order);
+}
+
+template <typename T>
+inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
+    //p = atomicAdd( &x, v );
+    cl::sycl::atomic<T, space> y( (cl::sycl::multi_ptr<T, space>(&x)));
+    p = cl::sycl::atomic_fetch_add(y, v, order);
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    //p = atomicAdd( &x, v );
+    cl::sycl::atomic<T1, space> y( (cl::sycl::multi_ptr<T1, space>(&x)));
+    T1 val = static_cast<T2>(v);
+    p = cl::sycl::atomic_fetch_add(y, val, order);
+}
+
+template <typename T1, typename T2, typename T3>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
+    //p = atomicAdd( &x, v );
+    T1 val = static_cast<T2>(v);
+    cl::sycl::atomic<T1, space> y( (cl::sycl::multi_ptr<T1, space>(&x)));
+    p = cl::sycl::atomic_fetch_add(y, val, order);
+}
+
+#elif defined(HAVE_CUDA) && defined(__CUDA_ARCH__)
+
+template <typename T>
+inline void ATOMIC_WRITE(T & x, T v) {
+    x = v;
+}
+
+template <typename T>
+inline void ATOMIC_INCREMENT(T& x) {
+    atomicAdd( &x, 1 );
+}
+
+template <typename T>
+inline void ATOMIC_ADD(T& x, T v) {
+    atomicAdd( &x, v );
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_ADD(T1& x, T2 v) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    atomicAdd( &x, v );
+}
+
+template <typename T>
+inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
+    p = atomicAdd( &x, v );
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    p = atomicAdd( &x, v );
+}
+
+template <typename T1, typename T2, typename T3>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
+    p = atomicAdd( &x, v );
+}
+
+#elif defined(USE_OPENMP_ATOMICS)
+
+template <typename T>
+inline void ATOMIC_WRITE(T & x, T v) {
+    _Pragma("omp atomic write")
+    x = v;
+}
+
+template <typename T>
+inline void ATOMIC_INCREMENT(T& x) {
+    _Pragma("omp atomic update")
+    x++;
+}
+
+template <typename T>
+inline void ATOMIC_ADD(T& x, T v) {
+    _Pragma("omp atomic")
+    x += v;
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_ADD(T1& x, T2 v) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    _Pragma("omp atomic")
+    x += v;
+}
+
+template <typename T>
+inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
+    _Pragma("omp atomic capture")
+    {p = x; x = x + v;}
+}
 
-#if defined (HAVE_CUDA)
+template <typename T1, typename T2>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    _Pragma("omp atomic capture")
+    {p = x; x = x + v;}
+}
+
+template <typename T1, typename T2, typename T3>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
+    _Pragma("omp atomic capture")
+    {p = x; x = x + v;}
+}
+
+#else // SEQUENTIAL
+
+template <typename T>
+inline void ATOMIC_WRITE(T & x, T v) {
+    x = v;
+}
+
+template <typename T>
+inline void ATOMIC_INCREMENT(T& x) {
+    x++;
+}
+
+template <typename T>
+inline void ATOMIC_ADD(T& x, T v) {
+    x += v;
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_ADD(T1& x, T2 v) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    x += v;
+}
+
+template <typename T>
+inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
+    {p = x; x = x + v;}
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    {p = x; x = x + v;}
+}
+
+template <typename T1, typename T2, typename T3>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
+    {p = x; x = x + v;}
+}
+
+#endif // BACKENDS
+
+#else // ! USE_MACRO_FUNCTIONS
+
+#if defined (HAVE_SYCL)
+
+#error You must define USE_MACRO_FUNCTIONS with SYCL!
+
+#elif defined (HAVE_CUDA)
 
     //If in a CUDA GPU section use the CUDA atomics
     #ifdef  __CUDA_ARCH__
 
         //Currently not atomic here. But its only used when it does not necissarially need to be atomic.
         #define ATOMIC_WRITE( x, v ) \
-            x = v;          
+            x = v;
 
         #define ATOMIC_ADD( x, v ) \
             atomicAdd( &x, v );
-        
+
         #define ATOMIC_UPDATE( x ) \
             atomicAdd( &x, 1 );
 
         #define ATOMIC_CAPTURE( x, v, p ) \
             p = atomicAdd( &x, v );
+
     //If in a CPU OpenMP section use the OpenMP atomics
     #elif defined (USE_OPENMP_ATOMICS)
+
         #define ATOMIC_WRITE( x, v ) \
             _Pragma("omp atomic write") \
             x = v;
@@ -46,6 +285,7 @@
 
     //If in a serial section, no need to use atomics
     #else
+
         #define ATOMIC_WRITE( x, v ) \
             x = v;
 
@@ -62,6 +302,7 @@
 
 //If in a OpenMP section use the OpenMP atomics
 #elif defined (USE_OPENMP_ATOMICS)
+
     #define ATOMIC_WRITE( x, v ) \
         _Pragma("omp atomic write") \
         x = v;
@@ -74,12 +315,13 @@
         _Pragma("omp atomic update") \
         x++;
 
-        #define ATOMIC_CAPTURE( x, v, p ) \
-            _Pragma("omp atomic capture") \
-            {p = x; x = x + v;}
+    #define ATOMIC_CAPTURE( x, v, p ) \
+        _Pragma("omp atomic capture") \
+        {p = x; x = x + v;}
 
 //If in a serial section, no need to use atomics
 #else
+
     #define ATOMIC_WRITE( x, v ) \
         x = v;
 
@@ -91,4 +333,9 @@
 
     #define ATOMIC_CAPTURE( x, v, p ) \
         {p = x; x = x + v;}
-#endif
+
+#endif // BACKENDS
+
+#endif // USE_MACRO_FUNCTIONS
+
+#endif // AtomicMacro_HH_
diff --git a/src/CollisionEvent.cc b/src/CollisionEvent.cc
@@ -11,6 +11,8 @@
 #include "PhysicalConstants.hh"
 #include "DeclareMacro.hh"
 #include "AtomicMacro.hh"
+#include "mathHelp.hh"
+#include "cudaUtils.hh"
 
 #define MAX_PRODUCTION_SIZE 4
 
@@ -21,32 +23,31 @@
 //  Return true if the particle will continue.
 //----------------------------------------------------------------------------------------------------------------------
 
-HOST_DEVICE
+HOST_DEVICE SYCL_EXTERNAL
 void updateTrajectory( double energy, double angle, MC_Particle& particle )
 {
     particle.kinetic_energy = energy;
     double cosTheta = angle;
     double randomNumber = rngSample(&particle.random_number_seed);
     double phi = 2 * 3.14159265 * randomNumber;
-    double sinPhi = sin(phi);
-    double cosPhi = cos(phi);
-    double sinTheta = sqrt((1.0 - (cosTheta*cosTheta)));
+    double sinPhi = SIN(phi);
+    double cosPhi = COS(phi);
+    double sinTheta = SQRT((1.0 - (cosTheta*cosTheta)));
     particle.direction_cosine.Rotate3DVector(sinTheta, cosTheta, sinPhi, cosPhi);
     double speed = (PhysicalConstants::_speedOfLight *
-            sqrt((1.0 - ((PhysicalConstants::_neutronRestMassEnergy *
+            SQRT((1.0 - ((PhysicalConstants::_neutronRestMassEnergy *
             PhysicalConstants::_neutronRestMassEnergy) /
             ((energy + PhysicalConstants::_neutronRestMassEnergy) *
             (energy + PhysicalConstants::_neutronRestMassEnergy))))));
     particle.velocity.x = speed * particle.direction_cosine.alpha;
     particle.velocity.y = speed * particle.direction_cosine.beta;
     particle.velocity.z = speed * particle.direction_cosine.gamma;
     randomNumber = rngSample(&particle.random_number_seed);
-    particle.num_mean_free_paths = -1.0*log(randomNumber);
+    particle.num_mean_free_paths = -1.0*LOG(randomNumber);
 }
 HOST_DEVICE_END
 
-HOST_DEVICE
-
+HOST_DEVICE SYCL_EXTERNAL
 bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int tally_index)
 {
    const MC_Cell_State &cell = monteCarlo->domain[mc_particle.domain].cell_state[mc_particle.cell];
@@ -116,8 +117,18 @@ bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned i
          ATOMIC_ADD( monteCarlo->_tallies->_balanceTask[tally_index]._produce, nOut);
          break;
       case NuclearDataReaction::Undefined:
+      {
+#ifdef HAVE_SYCL
+#if HAVE_SYCL_PRINTF
+         static const OPENCL_CONSTANT char format[] = "reactionType invalid\n";
+         syclx::printf(format);
+#endif
+#else
          printf("reactionType invalid\n");
+#endif
          qs_assert(false);
+         break;
+      }
    }
 
    if( nOut == 0 ) return false;

diff --git a/src/CollisionEvent.hh b/src/CollisionEvent.hh
@@ -5,7 +5,7 @@ class MonteCarlo;
 class MC_Particle;
 
 #include "DeclareMacro.hh"
-HOST_DEVICE
+HOST_DEVICE SYCL_EXTERNAL
 bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int tally_index );
 HOST_DEVICE_END