-
Notifications
You must be signed in to change notification settings - Fork 368
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add capability for ParallelFor to safely do reduction using deviceReduceSum, Min, etc. The user passes Gpu::KernelInfo{}.setReduction(true) to notify ParallelFor that this is a parallel reduction, and gives ParallelFor a callable that takes Gpu::Handler. A Gpu::Handler is needed to call deviceReduceSum. Also add Gpu::Buffer class, whose data pointer can be used as a device destination for deviceReduceSum. It also has a copyToHost method to copy the device result back to the host. See Tutorials/GPU/ParallelReduce for examples of how to use ParallelFor for reduction. Also note that the reduction function is OpenMP CPU threads safe. Thus the same code can run on with OpenMP when it is not built for GPU.
- Loading branch information
1 parent
c8cdfa6
commit 66b0f32
Showing
20 changed files
with
905 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
#ifndef AMREX_GPU_DEVICE_BUFFER_H_ | ||
#define AMREX_GPU_DEVICE_BUFFER_H_ | ||
#include <AMReX_Config.H> | ||
|
||
#include <AMReX_Arena.H> | ||
#include <AMReX_TypeTraits.H> | ||
#include <AMReX_GpuDevice.H> | ||
#include <cstring> | ||
#include <cstdlib> | ||
#include <initializer_list> | ||
#include <memory> | ||
|
||
namespace amrex { | ||
namespace Gpu { | ||
|
||
template <typename T, typename std::enable_if<AMREX_IS_TRIVIALLY_COPYABLE(T),int>::type = 0> | ||
class Buffer | ||
{ | ||
public: | ||
|
||
Buffer (std::initializer_list<T> init) | ||
: m_size(init.size()) | ||
{ | ||
if (m_size == 0) return; | ||
#ifdef AMREX_USE_GPU | ||
h_data = static_cast<T*>(The_Pinned_Arena()->alloc(m_size*sizeof(T))); | ||
#else | ||
h_data = static_cast<T*>(std::malloc(m_size*sizeof(T))); | ||
#endif | ||
std::memcpy(h_data, init.begin(), m_size*sizeof(T)); | ||
#ifdef AMREX_USE_GPU | ||
if (Gpu::inLaunchRegion()) | ||
{ | ||
d_data = static_cast<T*>(The_Arena()->alloc(m_size*sizeof(T))); | ||
Gpu::htod_memcpy_async(d_data, h_data, m_size*sizeof(T)); | ||
#ifdef AMREX_USE_DPCPP | ||
if (Gpu::onNullStream()) Gpu::synchronize(); | ||
#endif | ||
} | ||
#endif | ||
} | ||
|
||
Buffer (T const* h_p, const std::size_t n) | ||
: m_size(n) | ||
{ | ||
if (m_size == 0) return; | ||
#ifdef AMREX_USE_GPU | ||
h_data = static_cast<T*>(The_Pinned_Arena()->alloc(m_size*sizeof(T))); | ||
#else | ||
h_data = static_cast<T*>(std::malloc(m_size*sizeof(T))); | ||
#endif | ||
std::memcpy(h_data, h_p, m_size*sizeof(T)); | ||
#ifdef AMREX_USE_GPU | ||
if (Gpu::inLaunchRegion()) | ||
{ | ||
d_data = static_cast<T*>(The_Arena()->alloc(m_size*sizeof(T))); | ||
Gpu::htod_memcpy_async(d_data, h_data, m_size*sizeof(T)); | ||
#ifdef AMREX_USE_DPCPP | ||
if (Gpu::onNullStream()) Gpu::synchronize(); | ||
#endif | ||
} | ||
#endif | ||
} | ||
|
||
~Buffer () { clear(); } | ||
|
||
Buffer (Buffer const&) = delete; | ||
Buffer (Buffer &&) = delete; | ||
void operator= (Buffer const&) = delete; | ||
void operator= (Buffer &&) = delete; | ||
|
||
T const* data () const noexcept { return (d_data != nullptr) ? d_data : h_data; } | ||
T* data () noexcept { return (d_data != nullptr) ? d_data : h_data; } | ||
|
||
T const* hostData () const noexcept { return h_data; } | ||
T* hostDatat () noexcept { return h_data; } | ||
|
||
std::size_t size () const noexcept { return m_size; } | ||
|
||
void clear () | ||
{ | ||
#ifdef AMREX_USE_GPU | ||
if (d_data) The_Arena()->free(d_data); | ||
if (h_data) The_Pinned_Arena()->free(h_data); | ||
#else | ||
std::free(h_data); | ||
#endif | ||
d_data = nullptr; | ||
h_data = nullptr; | ||
} | ||
|
||
T* copyToHost () | ||
{ | ||
#ifdef AMREX_USE_GPU | ||
if (d_data) | ||
{ | ||
Gpu::dtoh_memcpy_async(h_data, d_data, m_size*sizeof(T)); | ||
Gpu::streamSynchronize(); | ||
} | ||
#endif | ||
return h_data; | ||
} | ||
|
||
private: | ||
std::size_t m_size; | ||
T* d_data = nullptr; | ||
T* h_data = nullptr; | ||
}; | ||
|
||
} | ||
} | ||
|
||
#endif |
Oops, something went wrong.