diff --git a/Src/Base/AMReX_Box.H b/Src/Base/AMReX_Box.H index 2aa64633cc3..a063816a47c 100644 --- a/Src/Base/AMReX_Box.H +++ b/Src/Base/AMReX_Box.H @@ -1767,6 +1767,18 @@ Box makeSlab (Box const& b, int direction, int slab_index) noexcept return r; } +AMREX_GPU_HOST_DEVICE +AMREX_FORCE_INLINE +Box makeSingleCellBox (int i, int j, int k, IndexType typ = IndexType::TheCellType()) +{ +#if (AMREX_SPACEDIM == 1) + amrex::ignore_unused(j,k); +#elif (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); +#endif + return Box(IntVect(AMREX_D_DECL(i,j,k)),IntVect(AMREX_D_DECL(i,j,k)),typ); +} + } -#endif /*BL_BOX_H*/ +#endif /*AMREX_BOX_H*/ diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H index f42dadd535a..04299b7a823 100644 --- a/Src/Base/AMReX_FabArrayUtility.H +++ b/Src/Base/AMReX_FabArrayUtility.H @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include namespace amrex { @@ -52,23 +52,13 @@ ReduceMF (FabArray const& fa, IntVect const& nghost, F&& f) using T = std::conditional_t::value || std::is_same::value, int, typename FAB::value_type>; - ReduceOps reduce_op; - ReduceData reduce_data(reduce_op); - using ReduceTuple = typename decltype(reduce_data)::Type; - - for (MFIter mfi(fa); mfi.isValid(); ++mfi) - { - const Box& bx = amrex::grow(mfi.validbox(),nghost); - const auto& arr = fa.const_array(mfi); - reduce_op.eval(bx, reduce_data, - [=] AMREX_GPU_DEVICE (Box const& b) -> ReduceTuple - { - return { static_cast(f(b, arr)) }; - }); - } - - ReduceTuple hv = reduce_data.value(reduce_op); - return amrex::get<0>(hv); + auto typ = fa.ixType(); + auto const& ma = fa.const_arrays(); + return ParReduce(TypeList{}, TypeList{}, fa, nghost, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple + { + return { static_cast(f(amrex::makeSingleCellBox(i,j,k,typ), ma[box_no])) }; + }); } template @@ -81,24 +71,15 @@ ReduceMF (FabArray const& fa1, FabArray const& fa2, IntVect const& n using T = std::conditional_t::value || std::is_same::value, int, typename FAB1::value_type>; - ReduceOps reduce_op; - ReduceData reduce_data(reduce_op); - using ReduceTuple = typename decltype(reduce_data)::Type; - - for (MFIter mfi(fa1); mfi.isValid(); ++mfi) - { - const Box& bx = amrex::grow(mfi.validbox(),nghost); - const auto& arr1 = fa1.const_array(mfi); - const auto& arr2 = fa2.const_array(mfi); - reduce_op.eval(bx, reduce_data, - [=] AMREX_GPU_DEVICE (Box const& b) -> ReduceTuple - { - return { static_cast(f(b, arr1, arr2)) }; - }); - } - - ReduceTuple hv = reduce_data.value(reduce_op); - return amrex::get<0>(hv); + auto typ = fa1.ixType(); + auto const& ma1 = fa1.const_arrays(); + auto const& ma2 = fa2.const_arrays(); + return ParReduce(TypeList{}, TypeList{}, fa1, nghost, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple + { + return { static_cast(f(amrex::makeSingleCellBox(i,j,k,typ), + ma1[box_no], ma2[box_no])) }; + }); } template @@ -112,25 +93,16 @@ ReduceMF (FabArray const& fa1, FabArray const& fa2, using T = std::conditional_t::value || std::is_same::value, int, typename FAB1::value_type>; - ReduceOps reduce_op; - ReduceData reduce_data(reduce_op); - using ReduceTuple = typename decltype(reduce_data)::Type; - - for (MFIter mfi(fa1); mfi.isValid(); ++mfi) - { - const Box& bx = amrex::grow(mfi.validbox(),nghost); - const auto& arr1 = fa1.const_array(mfi); - const auto& arr2 = fa2.const_array(mfi); - const auto& arr3 = fa3.const_array(mfi); - reduce_op.eval(bx, reduce_data, - [=] AMREX_GPU_DEVICE (Box const& b) -> ReduceTuple - { - return { static_cast(f(b, arr1, arr2, arr3)) }; - }); - } - - ReduceTuple hv = reduce_data.value(reduce_op); - return amrex::get<0>(hv); + auto typ = fa1.ixType(); + auto const& ma1 = fa1.const_arrays(); + auto const& ma2 = fa2.const_arrays(); + auto const& ma3 = fa3.const_arrays(); + return ParReduce(TypeList{}, TypeList{}, fa1, nghost, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple + { + return { static_cast(f(amrex::makeSingleCellBox(i,j,k,typ), + ma1[box_no], ma2[box_no], ma3[box_no])) }; + }); } template @@ -1449,14 +1421,13 @@ indexFromValue (FabArray const& mf, int comp, IntVect const& nghost, int* p = aa.data(); // This is a device ptr to 1+AMREX_SPACEDIM int zeros. // The first is used as an atomic bool and the others for intvect. - for (MFIter mfi(mf,MFItInfo().SetDeviceSync(false)); mfi.isValid(); ++mfi) { - const Box& bx = amrex::grow(mfi.validbox(), nghost); - auto const& arr = mf.const_array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + if (mf.isFusingCandidate()) { + auto const& ma = mf.const_arrays(); + ParallelFor(mf, nghost, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept { int* flag = p; if (*flag == 0) { - if (arr(i,j,k,comp) == value) { + if (ma[box_no](i,j,k,comp) == value) { if (Gpu::Atomic::Exch(flag,1) == 0) { AMREX_D_TERM(p[1] = i;, p[2] = j;, @@ -1465,6 +1436,24 @@ indexFromValue (FabArray const& mf, int comp, IntVect const& nghost, } } }); + } else { + for (MFIter mfi(mf,MFItInfo().SetDeviceSync(false)); mfi.isValid(); ++mfi) { + const Box& bx = amrex::grow(mfi.validbox(), nghost); + auto const& arr = mf.const_array(mfi); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int* flag = p; + if (*flag == 0) { + if (arr(i,j,k,comp) == value) { + if (Gpu::Atomic::Exch(flag,1) == 0) { + AMREX_D_TERM(p[1] = i;, + p[2] = j;, + p[3] = k;); + } + } + } + }); + } } int const* tmp = aa.copyToHost(); AMREX_D_TERM(loc[0] = tmp[1];,