HIP Memory Advise : Set managed memory to coarse grain

For unsafe atomics to work on managed memory, we must set it to coarse grain. For gfx90a, we can explicitly use unsafeAtomicAdd.
AMReX-Codes · Jun 13, 2022 · 53c1437 · 53c1437
1 parent 1a2fc3d
commit 53c1437
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 12 deletions.
diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp
@@ -165,6 +165,11 @@ Arena::allocate_system (std::size_t nbytes)
                 (AMREX_HIP_SAFE_CALL(hipMallocManaged(&p, nbytes));,
                  AMREX_CUDA_SAFE_CALL(cudaMallocManaged(&p, nbytes));,
                  p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(), Gpu::Device::syclContext()));
+#ifdef AMREX_USE_HIP
+            // Otherwise atomiAdd won't work because we instruct the compiler to do unsafe atomics
+            AMREX_HIP_SAFE_CALL(hipMemAdvise(p, nbytes, hipMemAdviseSetCoarseGrain,
+                                             Gpu::Device::deviceId()));
+#endif
             if (arena_info.device_set_readonly)
             {
                 Gpu::Device::mem_advise_set_readonly(p, nbytes);

diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H
@@ -140,6 +140,21 @@ namespace detail {
 #endif
     }
 
+#if defined(AMREX_USE_HIP) && defined(__gfx90a__)
+    // https://github.com/ROCm-Developer-Tools/hipamd/blob/rocm-4.5.x/include/hip/amd_detail/amd_hip_unsafe_atomics.h
+    AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+    float Add_device (float* const sum, float const value) noexcept
+    {
+        return unsafeAtomicAdd(sum, value);
+    }
+
+    AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+    double Add_device (double* const sum, double const value) noexcept
+    {
+        return unsafeAtomicAdd(sum, value);
+    }
+#endif
+
 #ifdef AMREX_USE_DPCPP
 
     // Valid atomic types are available at

diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp
@@ -782,16 +782,17 @@ Device::executeGraph(const cudaGraphExec_t &graphExec, bool synch)
 void
 Device::mem_advise_set_preferred (void* p, const std::size_t sz, const int device)
 {
-    amrex::ignore_unused(p,sz,device);
-    // HIP does not support memory advise.
-#ifdef AMREX_USE_CUDA
-#ifndef AMREX_USE_HIP
+#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
     if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
-#endif
     {
-        AMREX_CUDA_SAFE_CALL(cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, device));
+        AMREX_HIP_OR_CUDA
+            (AMREX_HIP_SAFE_CALL(
+                 hipMemAdvise(p, sz, hipMemAdviseSetPreferredLocation, device)),
+             AMREX_CUDA_SAFE_CALL(
+                 cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, device)));
     }
 #elif defined(AMREX_USE_DPCPP)
+    amrex::ignore_unused(p,sz,device);
     // xxxxx DPCPP todo: mem_advise
     // if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
     // {
@@ -804,16 +805,17 @@ Device::mem_advise_set_preferred (void* p, const std::size_t sz, const int devic
 void
 Device::mem_advise_set_readonly (void* p, const std::size_t sz)
 {
-    amrex::ignore_unused(p,sz);
-    // HIP does not support memory advise.
-#ifdef AMREX_USE_CUDA
-#ifndef AMREX_USE_HIP
+#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
     if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
-#endif
     {
-        AMREX_CUDA_SAFE_CALL(cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, cudaCpuDeviceId));
+        AMREX_HIP_OR_CUDA
+            (AMREX_HIP_SAFE_CALL(
+                 hipMemAdvise(p, sz, hipMemAdviseSetReadMostly, hipCpuDeviceId)),
+             AMREX_CUDA_SAFE_CALL(
+                 cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, cudaCpuDeviceId)));
     }
 #elif defined(AMREX_USE_DPCPP)
+    amrex::ignore_unused(p,sz);
     // xxxxx DPCPP todo: mem_advise
     // if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
     // {