Merge branch 'pgas' of github.com:BoxLib-Codes/BoxLib into pgas

wazedxwxx · Dec 9, 2015 · 9d483e8 · 9d483e8
2 parents 8008487 + 788ced4
commit 9d483e8
Show file tree

Hide file tree

Showing 23 changed files with 676 additions and 62 deletions.
diff --git a/MiniApps/PGAS_SMC/main.cpp b/MiniApps/PGAS_SMC/main.cpp
@@ -9,9 +9,10 @@ main (int argc, char* argv[])
 
     BL_PROFILE_VAR("main()", pmain);
 
-    SMC smc;
-
-    smc.evolve();
+    {
+	SMC smc;
+	smc.evolve();
+    }
 
     BL_PROFILE_VAR_STOP(pmain);
 

diff --git a/Src/C_BaseLib/FabArray.H b/Src/C_BaseLib/FabArray.H
@@ -427,6 +427,7 @@ public:
 	Array<int> indexMap;	
 	Array<int> localIndexMap;
 	Array<Box> tileArray;
+	TileArray () : nuse(-1) {;}
     };
 
     const TileArray* getTileArray (const IntVect& tilesize) const;
@@ -509,9 +510,20 @@ protected:
 	    ++nuse;
 	}
 	void print () {
+	    // in case some FabArrays have not been deleted.
+	    for (TA_outer_map::const_iterator it_o = m_TheTileArrayCache.begin();
+		 it_o != m_TheTileArrayCache.end(); ++it_o) 
+	    {
+		for (TA_inner_map::const_iterator it_i = it_o->second.begin();
+		     it_i != it_o->second.end(); ++it_i)
+		{
+		    maxuse = std::max(maxuse, it_i->second.nuse);
+		}
+	    }
 	    std::cout << "TileArrayCache: tot # of builds: " << nbuild 
-		      << ", tot # of erasures: " << nerase << ", tot # of uses: " << nuse << "\n"
-		      << "                max size: " << maxsize << ", max # uses: " << maxuse
+		      << ", tot # of erasures: " << nerase << ",\n"
+		      << "                tot # of uses: " << nuse
+		      << ", max size: " << maxsize << ", max # uses: " << maxuse
 		      << std::endl;
 	}
     };
@@ -1667,13 +1679,11 @@ FabArray<FAB>::defineDoit (const BoxArray&            bxs,
     }
 
     m_bdkey = getBDKey();
-    std::map<BDKey, int>::iterator it = m_BD_count.find(m_bdkey);
-    if (it == m_BD_count.end()) {
-	m_BD_count[m_bdkey] = 1;
+    int cnt = ++(m_BD_count[m_bdkey]);
+    if (cnt == 1) { // new one
 	m_FA_stats.recordMaxNumBoxArrays(m_BD_count.size());
     } else {
-	++(it->second);
-	m_FA_stats.recordMaxNumBAUse(it->second);
+	m_FA_stats.recordMaxNumBAUse(cnt);
     }
 
     if(alloc == Fab_allocate) {

diff --git a/Src/C_BaseLib/FabArray.cpp b/Src/C_BaseLib/FabArray.cpp
@@ -924,41 +924,19 @@ FabArrayBase::getTileArray (const IntVect& tilesize) const
 #pragma omp critical(gettilearray)
 #endif
     {
-	TA_outer_map& tao = FabArrayBase::m_TheTileArrayCache;
-
 	BL_ASSERT(getBDKey() == m_bdkey);
-
-	TA_outer_map::iterator tao_it = tao.find(m_bdkey);
-	if (tao_it == tao.end()) 
-	{
-	    std::pair<TA_outer_map::iterator,bool> ret =
-		tao.insert(std::make_pair(m_bdkey, TA_inner_map()));
-	    tao_it = ret.first;
-	}
-
-	TA_inner_map& tai = tao_it->second;
-
-	TA_inner_map::iterator tai_it = tai.find(tilesize);
-	if (tai_it == tai.end()) 
-	{
-	    std::pair<TA_inner_map::iterator,bool> ret =
-		tai.insert(std::make_pair(tilesize, TileArray()));
-	    p = &(ret.first->second);
+	p = &FabArrayBase::m_TheTileArrayCache[m_bdkey][tilesize];
+	if (p->nuse == -1) {
 	    buildTileArray(tilesize, *p);
 	    m_TAC_stats.recordBuild();
 	}
-	else
-	{
-	    p = &(tai_it->second);
-	}
 #ifdef _OPENMP
 #pragma omp master
 #endif
 	{
 	    ++(p->nuse);
 	    m_TAC_stats.recordUse();
         }
-
     }
 
     return p;
@@ -1275,4 +1253,7 @@ MFGhostIter::Initialize ()
 	lta.localIndexMap.push_back(alllocalindex[i+nskip]);
 	lta.tileArray.push_back(*bli++);
     }
+
+    currentIndex = beginIndex = 0;
+    endIndex = lta.indexMap.size();
 }
diff --git a/Src/C_BaseLib/MultiFab.cpp b/Src/C_BaseLib/MultiFab.cpp
@@ -1598,11 +1598,21 @@ MultiFab::SendMultiFabToSidecars (MultiFab *mf)
         const int *box_index_type = box.type().getVect();
         const int *smallEnd = box.smallEnd().getVect();
         const int *bigEnd = box.bigEnd().getVect();
-        // getVect() requires a constant pointer, but MPI buffers require
-        // non-constant pointers. Sorry this is awful.
-        ParallelDescriptor::Bcast(const_cast<int*>(box_index_type), BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
-        ParallelDescriptor::Bcast(const_cast<int*>(smallEnd)      , BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
-        ParallelDescriptor::Bcast(const_cast<int*>(bigEnd)        , BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
+
+        // getVect() returns a const pointer, but MPI buffers require
+        // non-constant pointers. So we have to copy the data to these
+        // temporary arrays and use those as the buffers for MPI.
+        int box_index_type_MPI_buff[BL_SPACEDIM];
+        int smallEnd_MPI_buff[BL_SPACEDIM];
+        int bigEnd_MPI_buff[BL_SPACEDIM];
+        for (unsigned int i = 0; i < BL_SPACEDIM; ++i) {
+            box_index_type_MPI_buff[i] = box_index_type[i];
+            smallEnd_MPI_buff[i] = smallEnd[i];
+            bigEnd_MPI_buff[i] = bigEnd[i];
+        }
+        ParallelDescriptor::Bcast(&box_index_type_MPI_buff[0], BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
+        ParallelDescriptor::Bcast(&smallEnd_MPI_buff[0]      , BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
+        ParallelDescriptor::Bcast(&bigEnd_MPI_buff[0]        , BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
       }
 
       int nComp = mf->nComp();

diff --git a/Src/F_BaseLib/plotfile.f90 b/Src/F_BaseLib/plotfile.f90
@@ -519,7 +519,11 @@ subroutine fab_bind(pf, i, j)
     hi(1:pf%dim) = upb(pf%grids(i)%fabs(j)%bx)
 
     ng = pf%grids(i)%fabs(j)%ng
-    allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng, nc))
+    if (lo(3) .eq. hi(3)) then
+       allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3):hi(3), nc))
+    else
+       allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng, nc))
+    endif
     call fabio_read_d(fd,              &
          pf%grids(i)%fabs(j)%offset,   &
          pf%grids(i)%fabs(j)%p(:,:,:,:), &
@@ -553,7 +557,13 @@ subroutine fab_bind_comp_vec(pf, i, j, c)
     lo(1:pf%dim) = lwb(pf%grids(i)%fabs(j)%bx)
     hi(1:pf%dim) = upb(pf%grids(i)%fabs(j)%bx)
     ng = pf%grids(i)%fabs(j)%ng
-    allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng, size(c)))
+
+    ! This handles the case where it really is 2D so there no ghost cells in the z-direction
+    if (lo(3) .eq. hi(3)) then
+        allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3):hi(3), size(c)))
+    else
+        allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng, size(c)))
+    end if
     do n = 1, size(c)
        call fabio_read_skip_d(fd,              &
             pf%grids(i)%fabs(j)%offset,        &

diff --git a/...inearSolvers/ComparisonTest/inputs-rt-Dir → ...Solvers/ComparisonTest/inputs-rt-Dir-ord2 b/...inearSolvers/ComparisonTest/inputs-rt-Dir → ...Solvers/ComparisonTest/inputs-rt-Dir-ord2
@@ -28,6 +28,13 @@ max_grid_size = 128
 tol_rel = 1.e-10
 tol_abs = 0.0
 
+# The Lp.maxorder flag sets the order of approximation used by the BoxLib_C solver
+# at Dirichlet boundaries.  
+# The mg.maxorder flag sets the order of approximation used by the BoxLib_F solver
+# at Dirichlet boundaries.  
+Lp.maxorder = 2
+mg.maxorder = 2
+
 #For BoxLib_F: 
 # mg.v = 0 shows just the Run time.
 # mg.v = 1 shows Initial rhs, Final Iter, and Run time

diff --git a/Tests/LinearSolvers/ComparisonTest/inputs-rt-Dir-ord3 b/Tests/LinearSolvers/ComparisonTest/inputs-rt-Dir-ord3
@@ -0,0 +1,59 @@
+# solver_type = BoxLib_F
+# solver_type = BoxLib_C
+# solver_type = Hypre
+solver_type = All
+
+# bc_type = Neumann
+# bc_type = Periodic
+bc_type = Dirichlet
+
+composite_solve = 1
+
+a = 1.e-3
+b = 1.0
+# a case with constant diffusion coefficient
+sigma = 1.0
+w     = 0.05
+# a case with variable diffusion coefficient
+# sigma = 10.0    # controls the size of jump
+# w     = 0.05    # controls the width of the jump 
+
+write_plot = 1 # write plotfile
+comp_norm = 1 # compute 2 norm of the error
+
+n_cell        = 128
+max_level     = 2
+max_grid_size = 128
+
+tol_rel = 1.e-10
+tol_abs = 0.0
+
+# The Lp.maxorder flag sets the order of approximation used by the BoxLib_C solver
+# at Dirichlet boundaries.  
+# The mg.maxorder flag sets the order of approximation used by the BoxLib_F solver
+# at Dirichlet boundaries.  
+Lp.maxorder = 3
+mg.maxorder = 3
+
+#For BoxLib_F: 
+# mg.v = 0 shows just the Run time.
+# mg.v = 1 shows Initial rhs, Final Iter, and Run time
+# mg.v = 2 shows Initial rhs, Residual at each V-cycle, Final Iter, and Run time
+#
+#For BoxLib_C: 
+# mg.v = 0 shows just the Run time.
+# mg.v = 1 shows Initial rhs, Final Iter, and Run time
+# mg.v = 2 shows Initial rhs, Residual at each V-cycle, Final Iter, and Run time
+
+mg.v = 2 
+
+# These are only relevant for the BoxLib_C solver -- 
+#  * set this to do a comparison with BoxLib_F and hypre
+mg.use_Anorm_for_convergence = 0
+#
+#  * set this to turn off verbosity of the BoxLib_C bottom solver
+cg.v = 0
+
+# These are only relevant for the hypre solver
+hypre.kdim = 5    # dimension of Krylov subspace
+hypre.verbose = 2 
diff --git a/Tests/LinearSolvers/ComparisonTest/inputs.3d b/Tests/LinearSolvers/ComparisonTest/inputs.3d
@@ -28,6 +28,13 @@ max_grid_size = 128
 tol_rel = 1.e-10
 tol_abs = 0.0
 
+# The Lp.maxorder flag sets the order of approximation used by the BoxLib_C solver
+# at Dirichlet boundaries.  
+# The mg.maxorder flag sets the order of approximation used by the BoxLib_F solver
+# at Dirichlet boundaries.  
+Lp.maxorder = 3
+mg.maxorder = 3
+
 #For BoxLib_F: 
 # mg.v = 0 shows just the Run time.
 # mg.v = 1 shows Initial rhs, Final Iter, and Run time

diff --git a/Tests/LinearSolvers/ComparisonTest/solve_with_F90.cpp b/Tests/LinearSolvers/ComparisonTest/solve_with_F90.cpp
@@ -20,13 +20,18 @@ void solve_with_F90(PArray<MultiFab>& soln, Real a, Real b,
 
   int composite_solve = 0;
   Real tolerance_rel, tolerance_abs;
+  int maxorder = 3;
   {
     ParmParse pp;
     pp.query("composite_solve", composite_solve);
 
     pp.get("tol_rel", tolerance_rel);
     pp.get("tol_abs", tolerance_abs);
   }
+  {
+      ParmParse pp("mg");
+      pp.query("maxorder", maxorder);
+  }
 
   int nlevel = geom.size();
 
@@ -72,6 +77,7 @@ void solve_with_F90(PArray<MultiFab>& soln, Real a, Real b,
       FMultiGrid fmg(geom);
 
       fmg.set_bc(mg_bc, soln[0]);
+      fmg.set_maxorder(maxorder);
 
       fmg.set_scalars(a, b);
       fmg.set_coefficients(const_cast<PArray<MultiFab>&>(alph), bcoeffs);
@@ -90,6 +96,7 @@ void solve_with_F90(PArray<MultiFab>& soln, Real a, Real b,
 	  } else {
 	      fmg.set_bc(mg_bc, soln[ilev-1], soln[ilev]);
 	  }
+	  fmg.set_maxorder(maxorder);
 
 	  fmg.set_scalars(a, b);
 	  fmg.set_coefficients(const_cast<MultiFab&>(alph[ilev]), bcoeffs[ilev]);

diff --git a/Tutorials/AMR_Adv_C/Exec/SingleVortex/inputs b/Tutorials/AMR_Adv_C/Exec/SingleVortex/inputs
@@ -11,7 +11,10 @@ amr.n_cell           =  64   64   64
 
 # TIME STEP CONTROL
 adv.cfl            = 0.7     # cfl number for hyperbolic system
-
+                             # In this test problem, the velocity is
+			     # time-dependent.  We could use 0.9 in
+			     # the 3D test, but need to use 0.7 in 2D
+			     # to satisfy CFL condition.
 # VERBOSITY
 adv.v              = 1       # verbosity in Adv
 amr.v              = 1       # verbosity in Amr

diff --git a/Tutorials/AMR_Adv_F/Exec/SingleVortex/inputs_2d b/Tutorials/AMR_Adv_F/Exec/SingleVortex/inputs_2d
@@ -6,6 +6,8 @@
   n_cell = 64                 ! number of cells on each side of the domain at the base level
   max_grid_size = 16          ! max number of cells on each side of an individual grid
   cfl = 0.7d0                 ! advective cfl
+                              ! In this test problem, the velocity is time-dependent.  
+                              ! We need to use 0.7 to satisfy CFL condition.
 
   ! The stopping criterion will be whichever of these is reached first:
   nsteps = 1000               ! number of time steps

diff --git a/Tutorials/AMR_Adv_F/Source/advance.f90 b/Tutorials/AMR_Adv_F/Source/advance.f90
@@ -45,6 +45,8 @@ subroutine advance(mla,phi_old,phi_new,velocity,bndry_flx,dx,dt,time,the_bc_towe
     ! of refinement have been completed
     integer :: num_steps_completed(mla%nlevel)
 
+    real(kind=dp_t) :: vmax
+
     dm    = mla%dim
     nlevs = mla%nlevel
     ng_p = phi_new(1)%ng
@@ -83,6 +85,23 @@ subroutine advance(mla,phi_old,phi_new,velocity,bndry_flx,dx,dt,time,the_bc_towe
 
           call set_velocity(mla,velocity,dx,time+0.5d0*dt(1))
 
+          ! make sure we are not violating cfl since the time step is based
+          ! on the velocity at t^n
+          do n=1,nlevs
+             vmax = -HUGE(1.d0)
+             do i=1,dm
+                vmax = max(vmax,norm_inf(velocity(n,i)))
+             end do
+             if (dt(n) .gt. dx(n)/vmax) then
+                if ( parallel_IOProcessor() ) then
+                   print*,'Violating CFL at level n=',n
+                   print*,'dt,vmax,dx',dt(n),vmax,dx
+                   print*,'sigma=dt*vmax/dx',dt(n)*vmax/dx(n)
+                end if
+                call bl_error()
+             end if
+          end do
+
           ! Copy phi_new from the previous time step into phi_old for this time step
           do n = 1, nlevs
              call multifab_copy(mdst=phi_old(n),msrc=phi_new(n),ng=ng_p)
@@ -120,7 +139,7 @@ recursive subroutine update_level(n,mla,phi_old,phi_new,velocity,bndry_flx,&
     integer        , intent(in   ) :: num_substeps
     integer        , intent(inout) :: num_steps_completed(:)
 
-    real(kind=dp_t) :: alpha, scale, tplushalf
+    real(kind=dp_t) :: alpha, scale, tplushalf, vmax
     integer         :: istep, i, dm, ng_p
     ! Array of edge-based multifabs; one for each direction
     type(multifab) :: flux(mla%dim)
@@ -137,6 +156,21 @@ recursive subroutine update_level(n,mla,phi_old,phi_new,velocity,bndry_flx,&
     ! compute velocity at half-time level
     call set_velocity(mla,velocity,dx,tplushalf)
 
+    ! make sure we are not violating cfl since the time step is based
+    ! on the velocity at t^n
+    vmax = -HUGE(1.d0)
+    do i=1,dm
+       vmax = max(vmax,norm_inf(velocity(n,i)))
+    end do
+    if (dt(n) .gt. dx(n)/vmax) then
+       if ( parallel_IOProcessor() ) then
+          print*,'Violating CFL at level n=',n
+          print*,'dt,vmax,dx',dt(n),vmax,dx
+          print*,'sigma=dt*vmax/dx',dt(n)*vmax/dx(n)
+       end if
+       call bl_error()
+    end if
+
     ! Copy phi_new from the previous time step into phi_old for this time step
     call multifab_copy(mdst=phi_old(n),msrc=phi_new(n),ng=ng_p)
 

diff --git a/Tutorials/MultiGrid_C/COEF_2D.F b/Tutorials/MultiGrid_C/COEF_2D.F
@@ -36,10 +36,10 @@ subroutine FORT_COMP_ASOL (
 
             if (ibnd .eq. 0 .or. ibnd.eq. LO_NEUMANN) then
                asol(i,j) = 1.d0 * cos(tpi*x) * cos(tpi*y)
-     $              + .25d0 * cos(fpi*x) * cos(fpi*y)
+     $                  + .25d0 * cos(fpi*x) * cos(fpi*y)
             else if (ibnd .eq. LO_DIRICHLET) then
                asol(i,j) = 1.d0 * sin(tpi*x) * sin(tpi*y)
-     $              + .25d0 * sin(fpi*x) * sin(fpi*y)
+     $                  + .25d0 * sin(fpi*x) * sin(fpi*y)
             else
                print *, 'FORT_COMP_ASOL: unknown boundary type'
                stop