diff --git a/Src/Base/AMReX_MultiFab.H b/Src/Base/AMReX_MultiFab.H index dfb75dacbf9..44c76d476f6 100644 --- a/Src/Base/AMReX_MultiFab.H +++ b/Src/Base/AMReX_MultiFab.H @@ -232,6 +232,13 @@ public: */ Real sum (int comp = 0, bool local = false) const; /** + * \brief Same as sum with local=false, but for non-cell-centered data, this + * skips non-unique points that are owned by multiple boxes. + */ + Real sum_unique (int comp = 0, + bool local = false, + const Periodicity& period = Periodicity::NonPeriodic()) const; + /** * \brief Adds the scalar value val to the value of each cell in the * specified subregion of the MultiFab. The subregion consists * of the num_comp components starting at component comp. diff --git a/Src/Base/AMReX_MultiFab.cpp b/Src/Base/AMReX_MultiFab.cpp index 9e2f37adf37..83664b307d4 100644 --- a/Src/Base/AMReX_MultiFab.cpp +++ b/Src/Base/AMReX_MultiFab.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef AMREX_MEM_PROFILING #include @@ -1586,6 +1587,58 @@ MultiFab::sum (int comp, bool local) const return sm; } +Real +MultiFab::sum_unique (int comp, + bool local, + const Periodicity& period) const +{ + BL_PROFILE("MultiFab::sum_unique()"); + + // no duplicatly distributed points if cell centered + if (ixType().cellCentered()) + return this->sum(comp, local); + + // Owner is the grid with the lowest grid number containing the data + std::unique_ptr owner_mask = OwnerMask(period); + + Real sm = Real(0.0); +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = this->const_arrays(); + auto const& msk = owner_mask->const_arrays(); + sm = ParReduce(TypeList{}, TypeList{}, *this, IntVect(0), + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> GpuTuple + { + return msk[box_no](i,j,k) ? ma[box_no](i,j,k,comp) : 0.0_rt; + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm) +#endif + for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.tilebox(); + Array4 const& a = this->const_array(mfi); + Array4 const& msk = owner_mask->const_array(mfi); + Real tmp = 0.0_rt; + AMREX_LOOP_3D(bx, i, j, k, + { + tmp += msk(i,j,k) ? a(i,j,k,comp) : 0.0_rt; + }); + sm += tmp; // Do it this way so that it does not break regression tests. + } + } + + if (!local) { + ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub()); + } + + return sm; +} + void MultiFab::minus (const MultiFab& mf, int strt_comp, int num_comp, int nghost) {