Skip to content

Commit

Permalink
Silver-Mueller: Avoid Managed Memory (#2019)
Browse files Browse the repository at this point in the history
Avoid relying on managed memory usage in Silver-Mueller boundary
conditions. Previously, we initialized the coefficients on the
host, copied them to device and then accidentially used the
device memory on the host again, as we calculated some constants.

This now keeps the initial host-memory around so we can use it
for host-only operations.
  • Loading branch information
ax3l authored Jun 17, 2021
1 parent f1a0d49 commit 495d052
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,17 @@ void FiniteDifferenceSolver::ApplySilverMuellerBoundary (
#ifdef WARPX_DIM_RZ
// Calculate relevant coefficients
amrex::Real const cdt = PhysConst::c*dt;
amrex::Real const cdt_over_dr = cdt*m_stencil_coefs_r[0];
amrex::Real const cdt_over_dr = cdt*m_h_stencil_coefs_r[0];
amrex::Real const coef1_r = (1._rt - cdt_over_dr)/(1._rt + cdt_over_dr);
amrex::Real const coef2_r = 2._rt*cdt_over_dr/(1._rt + cdt_over_dr) / PhysConst::c;
amrex::Real const coef3_r = cdt/(1._rt + cdt_over_dr) / PhysConst::c;
amrex::Real const cdt_over_dz = cdt*m_stencil_coefs_z[0];
amrex::Real const cdt_over_dz = cdt*m_h_stencil_coefs_z[0];
amrex::Real const coef1_z = (1._rt - cdt_over_dz)/(1._rt + cdt_over_dz);
amrex::Real const coef2_z = 2._rt*cdt_over_dz/(1._rt + cdt_over_dz) / PhysConst::c;

// Extract stencil coefficients
Real const * const AMREX_RESTRICT coefs_z = m_stencil_coefs_z.dataPtr();
int const n_coefs_z = m_stencil_coefs_z.size();
int const n_coefs_z = m_h_stencil_coefs_z.size();

// Extract cylindrical specific parameters
Real const dr = m_dr;
Expand Down Expand Up @@ -143,15 +143,15 @@ void FiniteDifferenceSolver::ApplySilverMuellerBoundary (
#else

// Calculate relevant coefficients
amrex::Real const cdt_over_dx = PhysConst::c*dt*m_stencil_coefs_x[0];
amrex::Real const cdt_over_dx = PhysConst::c*dt*m_h_stencil_coefs_x[0];
amrex::Real const coef1_x = (1._rt - cdt_over_dx)/(1._rt + cdt_over_dx);
amrex::Real const coef2_x = 2._rt*cdt_over_dx/(1._rt + cdt_over_dx) / PhysConst::c;
#ifdef WARPX_DIM_3D
amrex::Real const cdt_over_dy = PhysConst::c*dt*m_stencil_coefs_y[0];
amrex::Real const cdt_over_dy = PhysConst::c*dt*m_h_stencil_coefs_y[0];
amrex::Real const coef1_y = (1._rt - cdt_over_dy)/(1._rt + cdt_over_dy);
amrex::Real const coef2_y = 2._rt*cdt_over_dy/(1._rt + cdt_over_dy) / PhysConst::c;
#endif
amrex::Real const cdt_over_dz = PhysConst::c*dt*m_stencil_coefs_z[0];
amrex::Real const cdt_over_dz = PhysConst::c*dt*m_h_stencil_coefs_z[0];
amrex::Real const coef1_z = (1._rt - cdt_over_dz)/(1._rt + cdt_over_dz);
amrex::Real const coef2_z = 2._rt*cdt_over_dz/(1._rt + cdt_over_dz) / PhysConst::c;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,15 @@ class FiniteDifferenceSolver
#ifdef WARPX_DIM_RZ
amrex::Real m_dr, m_rmin;
int m_nmodes;
// host-only
amrex::Vector<amrex::Real> m_h_stencil_coefs_r, m_h_stencil_coefs_z;
// device copy after init
amrex::Gpu::DeviceVector<amrex::Real> m_stencil_coefs_r;
amrex::Gpu::DeviceVector<amrex::Real> m_stencil_coefs_z;
#else
// host-only
amrex::Vector<amrex::Real> m_h_stencil_coefs_x, m_h_stencil_coefs_y, m_h_stencil_coefs_z;
// device copy after init
amrex::Gpu::DeviceVector<amrex::Real> m_stencil_coefs_x;
amrex::Gpu::DeviceVector<amrex::Real> m_stencil_coefs_y;
amrex::Gpu::DeviceVector<amrex::Real> m_stencil_coefs_z;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,56 +36,52 @@ FiniteDifferenceSolver::FiniteDifferenceSolver (
m_nmodes = WarpX::GetInstance().n_rz_azimuthal_modes;
m_rmin = WarpX::GetInstance().Geom(0).ProbLo(0);
if (fdtd_algo == MaxwellSolverAlgo::Yee) {

amrex::Vector<amrex::Real> stencil_coefs_r, stencil_coefs_z;
CylindricalYeeAlgorithm::InitializeStencilCoefficients( cell_size,
stencil_coefs_r, stencil_coefs_z );
m_stencil_coefs_r.resize(stencil_coefs_r.size());
m_stencil_coefs_z.resize(stencil_coefs_z.size());
m_h_stencil_coefs_r, m_h_stencil_coefs_z );
m_stencil_coefs_r.resize(m_h_stencil_coefs_r.size());
m_stencil_coefs_z.resize(m_h_stencil_coefs_z.size());
amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
stencil_coefs_r.begin(), stencil_coefs_r.end(),
m_h_stencil_coefs_r.begin(), m_h_stencil_coefs_r.end(),
m_stencil_coefs_r.begin());
amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
stencil_coefs_z.begin(), stencil_coefs_z.end(),
m_h_stencil_coefs_z.begin(), m_h_stencil_coefs_z.end(),
m_stencil_coefs_z.begin());
amrex::Gpu::synchronize();
} else {
amrex::Abort("FiniteDifferenceSolver: Unknown algorithm");
}
#else
amrex::Vector<amrex::Real> stencil_coefs_x, stencil_coefs_y, stencil_coefs_z;

if (do_nodal) {

CartesianNodalAlgorithm::InitializeStencilCoefficients( cell_size,
stencil_coefs_x, stencil_coefs_y, stencil_coefs_z );
m_h_stencil_coefs_x, m_h_stencil_coefs_y, m_h_stencil_coefs_z );

} else if (fdtd_algo == MaxwellSolverAlgo::Yee) {

CartesianYeeAlgorithm::InitializeStencilCoefficients( cell_size,
stencil_coefs_x, stencil_coefs_y, stencil_coefs_z );
m_h_stencil_coefs_x, m_h_stencil_coefs_y, m_h_stencil_coefs_z );

} else if (fdtd_algo == MaxwellSolverAlgo::CKC) {

CartesianCKCAlgorithm::InitializeStencilCoefficients( cell_size,
stencil_coefs_x, stencil_coefs_y, stencil_coefs_z );
m_h_stencil_coefs_x, m_h_stencil_coefs_y, m_h_stencil_coefs_z );

} else {
amrex::Abort("FiniteDifferenceSolver: Unknown algorithm");
}

m_stencil_coefs_x.resize(stencil_coefs_x.size());
m_stencil_coefs_y.resize(stencil_coefs_y.size());
m_stencil_coefs_z.resize(stencil_coefs_z.size());
m_stencil_coefs_x.resize(m_h_stencil_coefs_x.size());
m_stencil_coefs_y.resize(m_h_stencil_coefs_y.size());
m_stencil_coefs_z.resize(m_h_stencil_coefs_z.size());

amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
stencil_coefs_x.begin(), stencil_coefs_x.end(),
m_h_stencil_coefs_x.begin(), m_h_stencil_coefs_x.end(),
m_stencil_coefs_x.begin());
amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
stencil_coefs_y.begin(), stencil_coefs_y.end(),
m_h_stencil_coefs_y.begin(), m_h_stencil_coefs_y.end(),
m_stencil_coefs_y.begin());
amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
stencil_coefs_z.begin(), stencil_coefs_z.end(),
m_h_stencil_coefs_z.begin(), m_h_stencil_coefs_z.end(),
m_stencil_coefs_z.begin());
amrex::Gpu::synchronize();
#endif
Expand Down

0 comments on commit 495d052

Please sign in to comment.