Skip to content

Commit

Permalink
Merge branch 'pgas' of github.com:BoxLib-Codes/BoxLib into pgas
Browse files Browse the repository at this point in the history
  • Loading branch information
WeiqunZhang committed Dec 9, 2015
2 parents 8008487 + 788ced4 commit 9d483e8
Show file tree
Hide file tree
Showing 23 changed files with 676 additions and 62 deletions.
7 changes: 4 additions & 3 deletions MiniApps/PGAS_SMC/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ main (int argc, char* argv[])

BL_PROFILE_VAR("main()", pmain);

SMC smc;

smc.evolve();
{
SMC smc;
smc.evolve();
}

BL_PROFILE_VAR_STOP(pmain);

Expand Down
24 changes: 17 additions & 7 deletions Src/C_BaseLib/FabArray.H
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ public:
Array<int> indexMap;
Array<int> localIndexMap;
Array<Box> tileArray;
TileArray () : nuse(-1) {;}
};

const TileArray* getTileArray (const IntVect& tilesize) const;
Expand Down Expand Up @@ -509,9 +510,20 @@ protected:
++nuse;
}
void print () {
// in case some FabArrays have not been deleted.
for (TA_outer_map::const_iterator it_o = m_TheTileArrayCache.begin();
it_o != m_TheTileArrayCache.end(); ++it_o)
{
for (TA_inner_map::const_iterator it_i = it_o->second.begin();
it_i != it_o->second.end(); ++it_i)
{
maxuse = std::max(maxuse, it_i->second.nuse);
}
}
std::cout << "TileArrayCache: tot # of builds: " << nbuild
<< ", tot # of erasures: " << nerase << ", tot # of uses: " << nuse << "\n"
<< " max size: " << maxsize << ", max # uses: " << maxuse
<< ", tot # of erasures: " << nerase << ",\n"
<< " tot # of uses: " << nuse
<< ", max size: " << maxsize << ", max # uses: " << maxuse
<< std::endl;
}
};
Expand Down Expand Up @@ -1667,13 +1679,11 @@ FabArray<FAB>::defineDoit (const BoxArray& bxs,
}

m_bdkey = getBDKey();
std::map<BDKey, int>::iterator it = m_BD_count.find(m_bdkey);
if (it == m_BD_count.end()) {
m_BD_count[m_bdkey] = 1;
int cnt = ++(m_BD_count[m_bdkey]);
if (cnt == 1) { // new one
m_FA_stats.recordMaxNumBoxArrays(m_BD_count.size());
} else {
++(it->second);
m_FA_stats.recordMaxNumBAUse(it->second);
m_FA_stats.recordMaxNumBAUse(cnt);
}

if(alloc == Fab_allocate) {
Expand Down
29 changes: 5 additions & 24 deletions Src/C_BaseLib/FabArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -924,41 +924,19 @@ FabArrayBase::getTileArray (const IntVect& tilesize) const
#pragma omp critical(gettilearray)
#endif
{
TA_outer_map& tao = FabArrayBase::m_TheTileArrayCache;

BL_ASSERT(getBDKey() == m_bdkey);

TA_outer_map::iterator tao_it = tao.find(m_bdkey);
if (tao_it == tao.end())
{
std::pair<TA_outer_map::iterator,bool> ret =
tao.insert(std::make_pair(m_bdkey, TA_inner_map()));
tao_it = ret.first;
}

TA_inner_map& tai = tao_it->second;

TA_inner_map::iterator tai_it = tai.find(tilesize);
if (tai_it == tai.end())
{
std::pair<TA_inner_map::iterator,bool> ret =
tai.insert(std::make_pair(tilesize, TileArray()));
p = &(ret.first->second);
p = &FabArrayBase::m_TheTileArrayCache[m_bdkey][tilesize];
if (p->nuse == -1) {
buildTileArray(tilesize, *p);
m_TAC_stats.recordBuild();
}
else
{
p = &(tai_it->second);
}
#ifdef _OPENMP
#pragma omp master
#endif
{
++(p->nuse);
m_TAC_stats.recordUse();
}

}

return p;
Expand Down Expand Up @@ -1275,4 +1253,7 @@ MFGhostIter::Initialize ()
lta.localIndexMap.push_back(alllocalindex[i+nskip]);
lta.tileArray.push_back(*bli++);
}

currentIndex = beginIndex = 0;
endIndex = lta.indexMap.size();
}
20 changes: 15 additions & 5 deletions Src/C_BaseLib/MultiFab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1598,11 +1598,21 @@ MultiFab::SendMultiFabToSidecars (MultiFab *mf)
const int *box_index_type = box.type().getVect();
const int *smallEnd = box.smallEnd().getVect();
const int *bigEnd = box.bigEnd().getVect();
// getVect() requires a constant pointer, but MPI buffers require
// non-constant pointers. Sorry this is awful.
ParallelDescriptor::Bcast(const_cast<int*>(box_index_type), BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
ParallelDescriptor::Bcast(const_cast<int*>(smallEnd) , BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
ParallelDescriptor::Bcast(const_cast<int*>(bigEnd) , BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());

// getVect() returns a const pointer, but MPI buffers require
// non-constant pointers. So we have to copy the data to these
// temporary arrays and use those as the buffers for MPI.
int box_index_type_MPI_buff[BL_SPACEDIM];
int smallEnd_MPI_buff[BL_SPACEDIM];
int bigEnd_MPI_buff[BL_SPACEDIM];
for (unsigned int i = 0; i < BL_SPACEDIM; ++i) {
box_index_type_MPI_buff[i] = box_index_type[i];
smallEnd_MPI_buff[i] = smallEnd[i];
bigEnd_MPI_buff[i] = bigEnd[i];
}
ParallelDescriptor::Bcast(&box_index_type_MPI_buff[0], BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
ParallelDescriptor::Bcast(&smallEnd_MPI_buff[0] , BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
ParallelDescriptor::Bcast(&bigEnd_MPI_buff[0] , BL_SPACEDIM, MPI_IntraGroup_Broadcast_Rank, ParallelDescriptor::CommunicatorInter());
}

int nComp = mf->nComp();
Expand Down
14 changes: 12 additions & 2 deletions Src/F_BaseLib/plotfile.f90
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,11 @@ subroutine fab_bind(pf, i, j)
hi(1:pf%dim) = upb(pf%grids(i)%fabs(j)%bx)

ng = pf%grids(i)%fabs(j)%ng
allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng, nc))
if (lo(3) .eq. hi(3)) then
allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3):hi(3), nc))
else
allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng, nc))
endif
call fabio_read_d(fd, &
pf%grids(i)%fabs(j)%offset, &
pf%grids(i)%fabs(j)%p(:,:,:,:), &
Expand Down Expand Up @@ -553,7 +557,13 @@ subroutine fab_bind_comp_vec(pf, i, j, c)
lo(1:pf%dim) = lwb(pf%grids(i)%fabs(j)%bx)
hi(1:pf%dim) = upb(pf%grids(i)%fabs(j)%bx)
ng = pf%grids(i)%fabs(j)%ng
allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng, size(c)))

! This handles the case where it really is 2D so there no ghost cells in the z-direction
if (lo(3) .eq. hi(3)) then
allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3):hi(3), size(c)))
else
allocate(pf%grids(i)%fabs(j)%p(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng, size(c)))
end if
do n = 1, size(c)
call fabio_read_skip_d(fd, &
pf%grids(i)%fabs(j)%offset, &
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ max_grid_size = 128
tol_rel = 1.e-10
tol_abs = 0.0

# The Lp.maxorder flag sets the order of approximation used by the BoxLib_C solver
# at Dirichlet boundaries.
# The mg.maxorder flag sets the order of approximation used by the BoxLib_F solver
# at Dirichlet boundaries.
Lp.maxorder = 2
mg.maxorder = 2

#For BoxLib_F:
# mg.v = 0 shows just the Run time.
# mg.v = 1 shows Initial rhs, Final Iter, and Run time
Expand Down
59 changes: 59 additions & 0 deletions Tests/LinearSolvers/ComparisonTest/inputs-rt-Dir-ord3
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# solver_type = BoxLib_F
# solver_type = BoxLib_C
# solver_type = Hypre
solver_type = All

# bc_type = Neumann
# bc_type = Periodic
bc_type = Dirichlet

composite_solve = 1

a = 1.e-3
b = 1.0
# a case with constant diffusion coefficient
sigma = 1.0
w = 0.05
# a case with variable diffusion coefficient
# sigma = 10.0 # controls the size of jump
# w = 0.05 # controls the width of the jump

write_plot = 1 # write plotfile
comp_norm = 1 # compute 2 norm of the error

n_cell = 128
max_level = 2
max_grid_size = 128

tol_rel = 1.e-10
tol_abs = 0.0

# The Lp.maxorder flag sets the order of approximation used by the BoxLib_C solver
# at Dirichlet boundaries.
# The mg.maxorder flag sets the order of approximation used by the BoxLib_F solver
# at Dirichlet boundaries.
Lp.maxorder = 3
mg.maxorder = 3

#For BoxLib_F:
# mg.v = 0 shows just the Run time.
# mg.v = 1 shows Initial rhs, Final Iter, and Run time
# mg.v = 2 shows Initial rhs, Residual at each V-cycle, Final Iter, and Run time
#
#For BoxLib_C:
# mg.v = 0 shows just the Run time.
# mg.v = 1 shows Initial rhs, Final Iter, and Run time
# mg.v = 2 shows Initial rhs, Residual at each V-cycle, Final Iter, and Run time

mg.v = 2

# These are only relevant for the BoxLib_C solver --
# * set this to do a comparison with BoxLib_F and hypre
mg.use_Anorm_for_convergence = 0
#
# * set this to turn off verbosity of the BoxLib_C bottom solver
cg.v = 0

# These are only relevant for the hypre solver
hypre.kdim = 5 # dimension of Krylov subspace
hypre.verbose = 2
7 changes: 7 additions & 0 deletions Tests/LinearSolvers/ComparisonTest/inputs.3d
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ max_grid_size = 128
tol_rel = 1.e-10
tol_abs = 0.0

# The Lp.maxorder flag sets the order of approximation used by the BoxLib_C solver
# at Dirichlet boundaries.
# The mg.maxorder flag sets the order of approximation used by the BoxLib_F solver
# at Dirichlet boundaries.
Lp.maxorder = 3
mg.maxorder = 3

#For BoxLib_F:
# mg.v = 0 shows just the Run time.
# mg.v = 1 shows Initial rhs, Final Iter, and Run time
Expand Down
7 changes: 7 additions & 0 deletions Tests/LinearSolvers/ComparisonTest/solve_with_F90.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,18 @@ void solve_with_F90(PArray<MultiFab>& soln, Real a, Real b,

int composite_solve = 0;
Real tolerance_rel, tolerance_abs;
int maxorder = 3;
{
ParmParse pp;
pp.query("composite_solve", composite_solve);

pp.get("tol_rel", tolerance_rel);
pp.get("tol_abs", tolerance_abs);
}
{
ParmParse pp("mg");
pp.query("maxorder", maxorder);
}

int nlevel = geom.size();

Expand Down Expand Up @@ -72,6 +77,7 @@ void solve_with_F90(PArray<MultiFab>& soln, Real a, Real b,
FMultiGrid fmg(geom);

fmg.set_bc(mg_bc, soln[0]);
fmg.set_maxorder(maxorder);

fmg.set_scalars(a, b);
fmg.set_coefficients(const_cast<PArray<MultiFab>&>(alph), bcoeffs);
Expand All @@ -90,6 +96,7 @@ void solve_with_F90(PArray<MultiFab>& soln, Real a, Real b,
} else {
fmg.set_bc(mg_bc, soln[ilev-1], soln[ilev]);
}
fmg.set_maxorder(maxorder);

fmg.set_scalars(a, b);
fmg.set_coefficients(const_cast<MultiFab&>(alph[ilev]), bcoeffs[ilev]);
Expand Down
5 changes: 4 additions & 1 deletion Tutorials/AMR_Adv_C/Exec/SingleVortex/inputs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ amr.n_cell = 64 64 64

# TIME STEP CONTROL
adv.cfl = 0.7 # cfl number for hyperbolic system

# In this test problem, the velocity is
# time-dependent. We could use 0.9 in
# the 3D test, but need to use 0.7 in 2D
# to satisfy CFL condition.
# VERBOSITY
adv.v = 1 # verbosity in Adv
amr.v = 1 # verbosity in Amr
Expand Down
2 changes: 2 additions & 0 deletions Tutorials/AMR_Adv_F/Exec/SingleVortex/inputs_2d
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
n_cell = 64 ! number of cells on each side of the domain at the base level
max_grid_size = 16 ! max number of cells on each side of an individual grid
cfl = 0.7d0 ! advective cfl
! In this test problem, the velocity is time-dependent.
! We need to use 0.7 to satisfy CFL condition.

! The stopping criterion will be whichever of these is reached first:
nsteps = 1000 ! number of time steps
Expand Down
36 changes: 35 additions & 1 deletion Tutorials/AMR_Adv_F/Source/advance.f90
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ subroutine advance(mla,phi_old,phi_new,velocity,bndry_flx,dx,dt,time,the_bc_towe
! of refinement have been completed
integer :: num_steps_completed(mla%nlevel)

real(kind=dp_t) :: vmax

dm = mla%dim
nlevs = mla%nlevel
ng_p = phi_new(1)%ng
Expand Down Expand Up @@ -83,6 +85,23 @@ subroutine advance(mla,phi_old,phi_new,velocity,bndry_flx,dx,dt,time,the_bc_towe

call set_velocity(mla,velocity,dx,time+0.5d0*dt(1))

! make sure we are not violating cfl since the time step is based
! on the velocity at t^n
do n=1,nlevs
vmax = -HUGE(1.d0)
do i=1,dm
vmax = max(vmax,norm_inf(velocity(n,i)))
end do
if (dt(n) .gt. dx(n)/vmax) then
if ( parallel_IOProcessor() ) then
print*,'Violating CFL at level n=',n
print*,'dt,vmax,dx',dt(n),vmax,dx
print*,'sigma=dt*vmax/dx',dt(n)*vmax/dx(n)
end if
call bl_error()
end if
end do

! Copy phi_new from the previous time step into phi_old for this time step
do n = 1, nlevs
call multifab_copy(mdst=phi_old(n),msrc=phi_new(n),ng=ng_p)
Expand Down Expand Up @@ -120,7 +139,7 @@ recursive subroutine update_level(n,mla,phi_old,phi_new,velocity,bndry_flx,&
integer , intent(in ) :: num_substeps
integer , intent(inout) :: num_steps_completed(:)

real(kind=dp_t) :: alpha, scale, tplushalf
real(kind=dp_t) :: alpha, scale, tplushalf, vmax
integer :: istep, i, dm, ng_p
! Array of edge-based multifabs; one for each direction
type(multifab) :: flux(mla%dim)
Expand All @@ -137,6 +156,21 @@ recursive subroutine update_level(n,mla,phi_old,phi_new,velocity,bndry_flx,&
! compute velocity at half-time level
call set_velocity(mla,velocity,dx,tplushalf)

! make sure we are not violating cfl since the time step is based
! on the velocity at t^n
vmax = -HUGE(1.d0)
do i=1,dm
vmax = max(vmax,norm_inf(velocity(n,i)))
end do
if (dt(n) .gt. dx(n)/vmax) then
if ( parallel_IOProcessor() ) then
print*,'Violating CFL at level n=',n
print*,'dt,vmax,dx',dt(n),vmax,dx
print*,'sigma=dt*vmax/dx',dt(n)*vmax/dx(n)
end if
call bl_error()
end if

! Copy phi_new from the previous time step into phi_old for this time step
call multifab_copy(mdst=phi_old(n),msrc=phi_new(n),ng=ng_p)

Expand Down
4 changes: 2 additions & 2 deletions Tutorials/MultiGrid_C/COEF_2D.F
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ subroutine FORT_COMP_ASOL (

if (ibnd .eq. 0 .or. ibnd.eq. LO_NEUMANN) then
asol(i,j) = 1.d0 * cos(tpi*x) * cos(tpi*y)
$ + .25d0 * cos(fpi*x) * cos(fpi*y)
$ + .25d0 * cos(fpi*x) * cos(fpi*y)
else if (ibnd .eq. LO_DIRICHLET) then
asol(i,j) = 1.d0 * sin(tpi*x) * sin(tpi*y)
$ + .25d0 * sin(fpi*x) * sin(fpi*y)
$ + .25d0 * sin(fpi*x) * sin(fpi*y)
else
print *, 'FORT_COMP_ASOL: unknown boundary type'
stop
Expand Down
Loading

0 comments on commit 9d483e8

Please sign in to comment.