diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index f4b395b4928..c14fced3872 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -165,6 +165,11 @@ Arena::allocate_system (std::size_t nbytes) (AMREX_HIP_SAFE_CALL(hipMallocManaged(&p, nbytes));, AMREX_CUDA_SAFE_CALL(cudaMallocManaged(&p, nbytes));, p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(), Gpu::Device::syclContext())); +#ifdef AMREX_USE_HIP + // Otherwise atomiAdd won't work because we instruct the compiler to do unsafe atomics + AMREX_HIP_SAFE_CALL(hipMemAdvise(p, nbytes, hipMemAdviseSetCoarseGrain, + Gpu::Device::deviceId())); +#endif if (arena_info.device_set_readonly) { Gpu::Device::mem_advise_set_readonly(p, nbytes); diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index 7e9e9c59ea5..e6b2780abe0 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -140,6 +140,21 @@ namespace detail { #endif } +#if defined(AMREX_USE_HIP) && defined(__gfx90a__) + // https://github.com/ROCm-Developer-Tools/hipamd/blob/rocm-4.5.x/include/hip/amd_detail/amd_hip_unsafe_atomics.h + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + float Add_device (float* const sum, float const value) noexcept + { + return unsafeAtomicAdd(sum, value); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + double Add_device (double* const sum, double const value) noexcept + { + return unsafeAtomicAdd(sum, value); + } +#endif + #ifdef AMREX_USE_DPCPP // Valid atomic types are available at diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index 05790837111..11e307d0bf4 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -782,16 +782,17 @@ Device::executeGraph(const cudaGraphExec_t &graphExec, bool synch) void Device::mem_advise_set_preferred (void* p, const std::size_t sz, const int device) { - amrex::ignore_unused(p,sz,device); - // HIP does not support memory advise. -#ifdef AMREX_USE_CUDA -#ifndef AMREX_USE_HIP +#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1) -#endif { - AMREX_CUDA_SAFE_CALL(cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, device)); + AMREX_HIP_OR_CUDA + (AMREX_HIP_SAFE_CALL( + hipMemAdvise(p, sz, hipMemAdviseSetPreferredLocation, device)), + AMREX_CUDA_SAFE_CALL( + cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, device))); } #elif defined(AMREX_USE_DPCPP) + amrex::ignore_unused(p,sz,device); // xxxxx DPCPP todo: mem_advise // if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1) // { @@ -804,16 +805,17 @@ Device::mem_advise_set_preferred (void* p, const std::size_t sz, const int devic void Device::mem_advise_set_readonly (void* p, const std::size_t sz) { - amrex::ignore_unused(p,sz); - // HIP does not support memory advise. -#ifdef AMREX_USE_CUDA -#ifndef AMREX_USE_HIP +#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1) -#endif { - AMREX_CUDA_SAFE_CALL(cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, cudaCpuDeviceId)); + AMREX_HIP_OR_CUDA + (AMREX_HIP_SAFE_CALL( + hipMemAdvise(p, sz, hipMemAdviseSetReadMostly, hipCpuDeviceId)), + AMREX_CUDA_SAFE_CALL( + cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, cudaCpuDeviceId))); } #elif defined(AMREX_USE_DPCPP) + amrex::ignore_unused(p,sz); // xxxxx DPCPP todo: mem_advise // if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1) // {