[fpe] Use memory pools in fpe solver

paranumal · noelchalmers · Nov 19, 2022 · Nov 17, 2022 · Nov 17, 2022 · Nov 17, 2022
commit a7eb5e22e7fd5d68c7239dbf23fed409316142f3
diff --git a/solvers/fokkerPlanck/fpe.hpp b/solvers/fokkerPlanck/fpe.hpp
@@ -89,11 +89,6 @@ class fpe_t: public solver_t {
   memory<dfloat> q;
   deviceMemory<dfloat> o_q;
 
-  deviceMemory<dfloat> o_Mq;
-
-  memory<dfloat> grad;
-  deviceMemory<dfloat> o_grad;
-
   //subcycling
   int Nsubcycles;
   timeStepper_t subStepper;

diff --git a/solvers/fokkerPlanck/src/fpeReport.cpp b/solvers/fokkerPlanck/src/fpeReport.cpp
@@ -31,9 +31,10 @@ void fpe_t::Report(dfloat time, int tstep){
   static int frame=0;
 
   //compute q.M*q
+  dlong Nentries = mesh.Nelements*mesh.Np;
+  deviceMemory<dfloat> o_Mq = platform.reserve<dfloat>(Nentries);
   mesh.MassMatrixApply(o_q, o_Mq);
 
-  dlong Nentries = mesh.Nelements*mesh.Np;
   dfloat norm2 = sqrt(platform.linAlg().innerProd(Nentries, o_q, o_Mq, mesh.comm));
 
   if(mesh.rank==0)

diff --git a/solvers/fokkerPlanck/src/fpeRun.cpp b/solvers/fokkerPlanck/src/fpeRun.cpp
@@ -66,9 +66,10 @@ void fpe_t::Run(){
   // output norm of final solution
   {
     //compute q.M*q
+    dlong Nentries = mesh.Nelements*mesh.Np;
+    deviceMemory<dfloat> o_Mq = platform.reserve<dfloat>(Nentries);
     mesh.MassMatrixApply(o_q, o_Mq);
 
-    dlong Nentries = mesh.Nelements*mesh.Np;
     dfloat norm2 = sqrt(platform.linAlg().innerProd(Nentries, o_q, o_Mq, mesh.comm));
 
     if(mesh.rank==0)

diff --git a/solvers/fokkerPlanck/src/fpeSetup.cpp b/solvers/fokkerPlanck/src/fpeSetup.cpp
@@ -141,16 +141,11 @@ void fpe_t::Setup(platform_t& _platform, mesh_t& _mesh,
   traceHalo = mesh.HaloTraceSetup(1); //one field
 
   // compute samples of q at interpolation nodes
-  q.malloc(Nlocal+Nhalo, 0.0);
-  o_q = platform.malloc<dfloat>(q);
+  q.malloc(Nlocal+Nhalo);
+  o_q = platform.malloc<dfloat>(Nlocal+Nhalo);
 
-  //storage for M*q during reporting
-  o_Mq = platform.malloc<dfloat>(q);
   mesh.MassMatrixKernelSetup(1); // mass matrix operator
 
-  grad.malloc((Nlocal+Nhalo)*4, 0.0);
-  o_grad  = platform.malloc<dfloat>(grad);
-
   // OCCA build stuff
   properties_t kernelInfo = mesh.props; //copy base occa properties
 

diff --git a/solvers/fokkerPlanck/src/fpeStep.cpp b/solvers/fokkerPlanck/src/fpeStep.cpp
@@ -28,8 +28,7 @@ SOFTWARE.
 
 dfloat fpe_t::MaxWaveSpeed(deviceMemory<dfloat>& o_Q, const dfloat T){
 
-  //Note: if this is on the critical path in the future, we should pre-allocate this
-  deviceMemory<dfloat> o_maxSpeed = platform.malloc<dfloat>(mesh.Nelements);
+  deviceMemory<dfloat> o_maxSpeed = platform.reserve<dfloat>(mesh.Nelements);
 
   maxWaveSpeedKernel(mesh.Nelements,
                      mesh.o_vgeo,
@@ -192,6 +191,9 @@ void fpe_t::Advection(deviceMemory<dfloat>& o_Q, deviceMemory<dfloat>& o_RHS, co
 
 void fpe_t::Diffusion(deviceMemory<dfloat>& o_Q, deviceMemory<dfloat>& o_RHS, const dfloat T) {
 
+  dlong Ntotal = (mesh.Nelements+mesh.totalHaloPairs)*mesh.Np;
+  deviceMemory<dfloat> o_grad = platform.reserve<dfloat>(4*Ntotal);
+
   //compute gradq and pack with q
   gradientKernel(mesh.Nelements,
                   mesh.o_vgeo,