Skip to content

Commit

Permalink
HIP Memory Advise : Set managed memory to coarse grain
Browse files Browse the repository at this point in the history
For unsafe atomics to work on managed memory, we must set it to coarse
grain.

For gfx90a, we can explicitly use unsafeAtomicAdd.
  • Loading branch information
WeiqunZhang committed Jun 13, 2022
1 parent 1a2fc3d commit 53c1437
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 12 deletions.
5 changes: 5 additions & 0 deletions Src/Base/AMReX_Arena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,11 @@ Arena::allocate_system (std::size_t nbytes)
(AMREX_HIP_SAFE_CALL(hipMallocManaged(&p, nbytes));,
AMREX_CUDA_SAFE_CALL(cudaMallocManaged(&p, nbytes));,
p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(), Gpu::Device::syclContext()));
#ifdef AMREX_USE_HIP
// Otherwise atomiAdd won't work because we instruct the compiler to do unsafe atomics
AMREX_HIP_SAFE_CALL(hipMemAdvise(p, nbytes, hipMemAdviseSetCoarseGrain,
Gpu::Device::deviceId()));
#endif
if (arena_info.device_set_readonly)
{
Gpu::Device::mem_advise_set_readonly(p, nbytes);
Expand Down
15 changes: 15 additions & 0 deletions Src/Base/AMReX_GpuAtomic.H
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,21 @@ namespace detail {
#endif
}

#if defined(AMREX_USE_HIP) && defined(__gfx90a__)
// https://github.com/ROCm-Developer-Tools/hipamd/blob/rocm-4.5.x/include/hip/amd_detail/amd_hip_unsafe_atomics.h
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
float Add_device (float* const sum, float const value) noexcept
{
return unsafeAtomicAdd(sum, value);
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
double Add_device (double* const sum, double const value) noexcept
{
return unsafeAtomicAdd(sum, value);
}
#endif

#ifdef AMREX_USE_DPCPP

// Valid atomic types are available at
Expand Down
26 changes: 14 additions & 12 deletions Src/Base/AMReX_GpuDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,16 +782,17 @@ Device::executeGraph(const cudaGraphExec_t &graphExec, bool synch)
void
Device::mem_advise_set_preferred (void* p, const std::size_t sz, const int device)
{
amrex::ignore_unused(p,sz,device);
// HIP does not support memory advise.
#ifdef AMREX_USE_CUDA
#ifndef AMREX_USE_HIP
#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
#endif
{
AMREX_CUDA_SAFE_CALL(cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, device));
AMREX_HIP_OR_CUDA
(AMREX_HIP_SAFE_CALL(
hipMemAdvise(p, sz, hipMemAdviseSetPreferredLocation, device)),
AMREX_CUDA_SAFE_CALL(
cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, device)));
}
#elif defined(AMREX_USE_DPCPP)
amrex::ignore_unused(p,sz,device);
// xxxxx DPCPP todo: mem_advise
// if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
// {
Expand All @@ -804,16 +805,17 @@ Device::mem_advise_set_preferred (void* p, const std::size_t sz, const int devic
void
Device::mem_advise_set_readonly (void* p, const std::size_t sz)
{
amrex::ignore_unused(p,sz);
// HIP does not support memory advise.
#ifdef AMREX_USE_CUDA
#ifndef AMREX_USE_HIP
#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
#endif
{
AMREX_CUDA_SAFE_CALL(cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, cudaCpuDeviceId));
AMREX_HIP_OR_CUDA
(AMREX_HIP_SAFE_CALL(
hipMemAdvise(p, sz, hipMemAdviseSetReadMostly, hipCpuDeviceId)),
AMREX_CUDA_SAFE_CALL(
cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, cudaCpuDeviceId)));
}
#elif defined(AMREX_USE_DPCPP)
amrex::ignore_unused(p,sz);
// xxxxx DPCPP todo: mem_advise
// if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
// {
Expand Down

0 comments on commit 53c1437

Please sign in to comment.