From ca17c460b9a731ccca3682bec452572d7cca6b7a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 17 Oct 2024 15:57:09 -0700 Subject: [PATCH] Update test --- Src/FFT/AMReX_FFT.H | 16 +++--- Src/FFT/AMReX_FFT_Helper.H | 73 ++++++++++++++++++++++++++ Src/FFT/CMakeLists.txt | 1 + Src/FFT/Make.package | 2 +- Tests/FFT/Poisson/GNUmakefile | 2 +- Tests/FFT/Poisson/main.cpp | 98 +++++++++++++++-------------------- 6 files changed, 129 insertions(+), 63 deletions(-) create mode 100644 Src/FFT/AMReX_FFT_Helper.H diff --git a/Src/FFT/AMReX_FFT.H b/Src/FFT/AMReX_FFT.H index e9139c8e1d..97451dd5a8 100644 --- a/Src/FFT/AMReX_FFT.H +++ b/Src/FFT/AMReX_FFT.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -26,9 +27,6 @@ namespace amrex::FFT { -enum struct Scaling { full, symmetric, none }; -enum struct Direction { forward, backward }; - template class R2C { @@ -37,7 +35,7 @@ public: MultiFab, FabArray > >; using cMF = FabArray > >; - R2C (Box const& domain); + explicit R2C (Box const& domain, Info const& info = Info{}); ~R2C (); @@ -146,6 +144,8 @@ private: static void destroy_plan (P plan); static std::pair make_c2c_plans (cMF& inout); + Info m_info; + Box m_real_domain; Box m_spectral_domain_x; Box m_spectral_domain_y; @@ -183,8 +183,9 @@ private: }; template -R2C::R2C (Box const& domain) - : m_real_domain(domain), +R2C::R2C (Box const& domain, Info const& info) + : m_info(info), + m_real_domain(domain), m_spectral_domain_x(IntVect(0), IntVect(AMREX_D_DECL(domain.length(0)/2, domain.bigEnd(1), domain.bigEnd(2)))), @@ -197,6 +198,9 @@ R2C::R2C (Box const& domain) { static_assert(std::is_same_v || std::is_same_v); AMREX_ALWAYS_ASSERT(m_real_domain.smallEnd() == 0 && m_real_domain.cellCentered()); +#if (AMREX_SPACEDIM != 3) + AMREX_ALWAYS_ASSERT(false == m_info.batch_mode); +#endif int myproc = ParallelDescriptor::MyProc(); int nprocs = ParallelDescriptor::NProcs(); diff --git a/Src/FFT/AMReX_FFT_Helper.H b/Src/FFT/AMReX_FFT_Helper.H new file mode 100644 index 0000000000..3f1f38afab --- /dev/null +++ b/Src/FFT/AMReX_FFT_Helper.H @@ -0,0 +1,73 @@ +#ifndef AMREX_FFT_HELPER_H_ +#define AMREX_FFT_HELPER_H_ +#include + +#include +#include +#include +#include +#include + +namespace amrex::FFT +{ + +enum struct Scaling { full, symmetric, none }; +enum struct Direction { forward, backward }; + +struct Info +{ + //! Supported only in 3D. When batch_mode is true, FFT is performed on + //! the first two dimensions only and the third dimension size is the + //! batch size. + bool batch_mode = false; + + Info& setBatchMode (bool x) { batch_mode = x; return *this; } +}; + +template +struct PoissonSpectral +{ + PoissonSpectral (Geometry const& geom) + : fac({AMREX_D_DECL(T(2)*Math::pi()/T(geom.ProbLength(0)), + T(2)*Math::pi()/T(geom.ProbLength(1)), + T(2)*Math::pi()/T(geom.ProbLength(2)))}), + dx({AMREX_D_DECL(T(geom.CellSize(0)), + T(geom.CellSize(1)), + T(geom.CellSize(2)))}), + scale(T(1.0/geom.Domain().d_numPts())), + len(geom.Domain().length()) + { + static_assert(std::is_floating_point_v); + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void operator() (int i, int j, int k, GpuComplex& spectral_data) const + { + amrex::ignore_unused(i,j,k); + // the values in the upper-half of the spectral array in y and z + // are here interpreted as negative wavenumbers + AMREX_D_TERM(T a = fac[0]*i;, + T b = (j < len[1]/2) ? fac[1]*j : fac[1]*(len[1]-j);, + T c = (k < len[2]/2) ? fac[2]*k : fac[2]*(len[2]-k)); + T k2 = AMREX_D_TERM(T(2)*(std::cos(a*dx[0])-T(1))/(dx[0]*dx[0]), + +T(2)*(std::cos(b*dx[1])-T(1))/(dx[1]*dx[1]), + +T(2)*(std::cos(c*dx[2])-T(1))/(dx[2]*dx[2])); + if (k2 != T(0)) { + spectral_data /= k2; + } else { + // interpretation here is that the average value of the + // solution is zero + spectral_data = 0; + } + spectral_data *= scale; + }; + + GpuArray fac; + GpuArray dx; + T scale; + IntVect len; +}; + +} + +#endif diff --git a/Src/FFT/CMakeLists.txt b/Src/FFT/CMakeLists.txt index 509092da56..0f8826a733 100644 --- a/Src/FFT/CMakeLists.txt +++ b/Src/FFT/CMakeLists.txt @@ -5,6 +5,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) PRIVATE AMReX_FFT.H AMReX_FFT.cpp + AMReX_FFT_Helper.H ) endforeach() diff --git a/Src/FFT/Make.package b/Src/FFT/Make.package index 54322566b7..1702840790 100644 --- a/Src/FFT/Make.package +++ b/Src/FFT/Make.package @@ -1,7 +1,7 @@ ifndef AMREX_FFT_MAKE AMREX_FFT_MAKE := 1 -CEXE_headers += AMReX_FFT.H +CEXE_headers += AMReX_FFT.H AMReX_FFT_Helper.H CEXE_sources += AMReX_FFT.cpp VPATH_LOCATIONS += $(AMREX_HOME)/Src/FFT diff --git a/Tests/FFT/Poisson/GNUmakefile b/Tests/FFT/Poisson/GNUmakefile index 4c15ced188..93376f4485 100644 --- a/Tests/FFT/Poisson/GNUmakefile +++ b/Tests/FFT/Poisson/GNUmakefile @@ -1,6 +1,6 @@ AMREX_HOME := ../../.. -DEBUG = TRUE +DEBUG = FALSE DIM = 3 diff --git a/Tests/FFT/Poisson/main.cpp b/Tests/FFT/Poisson/main.cpp index ee2175ffa9..a795436421 100644 --- a/Tests/FFT/Poisson/main.cpp +++ b/Tests/FFT/Poisson/main.cpp @@ -13,22 +13,22 @@ int main (int argc, char* argv[]) { BL_PROFILE("main"); - int n_cell_x = 64; - int n_cell_y = 64; - int n_cell_z = 64; + AMREX_D_TERM(int n_cell_x = 64;, + int n_cell_y = 64;, + int n_cell_z = 64); - Real prob_lo_x = 0.; - Real prob_lo_y = 0.; - Real prob_lo_z = 0.; - Real prob_hi_x = 1.; - Real prob_hi_y = 1.; - Real prob_hi_z = 1.; + AMREX_D_TERM(Real prob_lo_x = 0.;, + Real prob_lo_y = 0.;, + Real prob_lo_z = 0.); + AMREX_D_TERM(Real prob_hi_x = 1.;, + Real prob_hi_y = 1.;, + Real prob_hi_z = 1.); { ParmParse pp; - pp.query("n_cell_x", n_cell_x); - pp.query("n_cell_y", n_cell_y); - pp.query("n_cell_z", n_cell_z); + AMREX_D_TERM(pp.query("n_cell_x", n_cell_x);, + pp.query("n_cell_y", n_cell_y);, + pp.query("n_cell_z", n_cell_z)); } Box domain(IntVect(0),IntVect(AMREX_D_DECL(n_cell_x-1,n_cell_y-1,n_cell_z-1))); @@ -49,52 +49,34 @@ int main (int argc, char* argv[]) auto const& rhsma = rhs.arrays(); ParallelFor(rhs, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) { - Real x = (i+0.5_rt) * dx[0]; - Real y = (AMREX_SPACEDIM>=2) ? (j+0.5_rt) * dx[1] : 0._rt; - Real z = (AMREX_SPACEDIM==3) ? (k+0.5_rt) * dx[2] : 0._rt; - rhsma[b](i,j,k) = std::exp(-10._rt*((x-0.5_rt)*(x-0.5_rt)*1.05_rt + - (y-0.5_rt)*(y-0.5_rt)*0.90_rt + - (z-0.5_rt)*(z-0.5_rt))); + AMREX_D_TERM(Real x = (i+0.5_rt) * dx[0] - 0.5_rt;, + Real y = (j+0.5_rt) * dx[1] - 0.5_rt;, + Real z = (k+0.5_rt) * dx[2] - 0.5_rt); + rhsma[b](i,j,k) = std::exp(-10._rt* + (AMREX_D_TERM(x*x*1.05_rt, + y*y*0.90_rt, + z*z))); }); // Shift rhs so that its sum is zero. auto rhosum = rhs.sum(0); rhs.plus(-rhosum/geom.Domain().d_numPts(), 0, 1); - Real facx = 2._rt*Math::pi()/std::abs(prob_hi_x-prob_lo_x); - Real facy = 2._rt*Math::pi()/std::abs(prob_hi_y-prob_lo_y); - Real facz = 2._rt*Math::pi()/std::abs(prob_hi_z-prob_lo_z); - Real scale = 1._rt/(Real(n_cell_z)*Real(n_cell_y)*Real(n_cell_z)); - - auto post_forward = [=] AMREX_GPU_DEVICE (int i, int j, int k, - GpuComplex& spectral_data) - { - amrex::ignore_unused(j,k); - // the values in the upper-half of the spectral array in y and z - // are here interpreted as negative wavenumbers - AMREX_D_TERM(Real a = facx*i;, - Real b = (j < n_cell_y/2) ? facy*j : facy*(n_cell_y-j);, - Real c = (k < n_cell_z/2) ? facz*k : facz*(n_cell_z-k)); - Real k2 = AMREX_D_TERM(2._rt*(std::cos(a*dx[0])-1._rt)/(dx[0]*dx[0]), - +2._rt*(std::cos(b*dx[1])-1._rt)/(dx[1]*dx[1]), - +2._rt*(std::cos(c*dx[2])-1._rt)/(dx[2]*dx[2])); - if (k2 != 0._rt) { - spectral_data /= k2; - } else { - // interpretation here is that the average value of the - // solution is zero - spectral_data *= 0._rt; - } - spectral_data *= scale; - }; - auto t0 = amrex::second(); FFT::R2C fft(geom.Domain()); - fft.forwardThenBackward(rhs, soln, post_forward); + FFT::PoissonSpectral post_forward(geom); auto t1 = amrex::second(); - amrex::Print() << " AMReX FFT time: " << t1-t0 << "\n"; + + double tsolve; + for (int n = 0; n < 2; ++n) { + auto ta = amrex::second(); + fft.forwardThenBackward(rhs, soln, post_forward); + auto tb = amrex::second(); + tsolve = tb-ta; + } + + amrex::Print() << " AMReX FFT setup time: " << t1-t0 << ", solve time " + << tsolve << "\n"; { MultiFab phi(soln.boxArray(), soln.DistributionMap(), 1, 1); @@ -107,16 +89,22 @@ int main (int argc, char* argv[]) ParallelFor(res, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) { auto const& phia = phi_ma[b]; - auto lap = (phia(i-1,j,k)-2.*phia(i,j,k)+phia(i+1,j,k)) / (dx[0]*dx[0]) - + (phia(i,j-1,k)-2.*phia(i,j,k)+phia(i,j+1,k)) / (dx[1]*dx[1]) - + (phia(i,j,k-1)-2.*phia(i,j,k)+phia(i,j,k+1)) / (dx[2]*dx[2]); + auto lap = AMREX_D_TERM + (((phia(i-1,j,k)-2.*phia(i,j,k)+phia(i+1,j,k)) / (dx[0]*dx[0])), + + ((phia(i,j-1,k)-2.*phia(i,j,k)+phia(i,j+1,k)) / (dx[1]*dx[1])), + + ((phia(i,j,k-1)-2.*phia(i,j,k)+phia(i,j,k+1)) / (dx[2]*dx[2]))); res_ma[b](i,j,k) = rhs_ma[b](i,j,k) - lap; }); - amrex::Print() << " rhs.min & max: " << rhs.min(0) << " " << rhs.max(0) << "\n" - << " res.min & max: " << res.min(0) << " " << res.max(0) << "\n"; - VisMF::Write(soln, "soln"); - VisMF::Write(rhs, "rhs"); - VisMF::Write(res, "res"); + auto bnorm = rhs.norminf(); + auto rnorm = res.norminf(); + amrex::Print() << " rhs inf norm " << bnorm << "\n" + << " res inf norm " << rnorm << "\n"; +#ifdef AMREX_USE_FLOAT + auto eps = 1.e-3f; +#else + auto eps = 1.e-11; +#endif + AMREX_ALWAYS_ASSERT(rnorm < eps*bnorm); } } amrex::Finalize();