Skip to content

Consolidating OpenACC device-host memory transfers #1315

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 0 additions & 32 deletions src/core_atmosphere/dynamics/mpas_atm_boundaries.F
Original file line number Diff line number Diff line change
Expand Up @@ -310,18 +310,14 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
nullify(tend)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
!$acc enter data copyin(tend)
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
!$acc enter data copyin(tend_scalars)

! Ensure the integer pointed to by idx_ptr is copied to the gpu device
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)
idx = idx_ptr
end if
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')

!$acc parallel default(present)
if (associated(tend)) then
Expand All @@ -341,13 +337,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
end if
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
!$acc exit data delete(tend)
else
!$acc exit data delete(tend_scalars)
end if
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')

end subroutine mpas_atm_get_bdy_tend

Expand Down Expand Up @@ -448,9 +437,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
! query the field as a scalar constituent
!
if (associated(tend) .and. associated(state)) then
MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc enter data copyin(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(2)
Expand All @@ -461,20 +447,13 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc exit data delete(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2)
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)

idx=idx_ptr ! Avoid non-array pointer for OpenACC

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc enter data copyin(tend_scalars, state_scalars)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(2)
do i=1, horizDim+1
Expand All @@ -484,9 +463,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc exit data delete(tend_scalars, state_scalars)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
end if

end subroutine mpas_atm_get_bdy_state_2d
Expand Down Expand Up @@ -567,10 +543,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2)

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
!$acc enter data copyin(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(3)
do i=1, horizDim+1
Expand All @@ -582,10 +554,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
!$acc exit data delete(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')

end subroutine mpas_atm_get_bdy_state_3d


Expand Down
14 changes: 14 additions & 0 deletions src/core_atmosphere/dynamics/mpas_atm_iau.F
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,20 @@ module mpas_atm_iau
use mpas_dmpar
use mpas_constants
use mpas_log, only : mpas_log_write
use mpas_timer

!public :: atm_compute_iau_coef, atm_add_tend_anal_incr


#ifdef MPAS_OPENACC
#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
#else
#define MPAS_ACC_TIMER_START(X)
#define MPAS_ACC_TIMER_STOP(X)
#endif


contains

!==================================================================================================
Expand Down Expand Up @@ -137,6 +148,7 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten
call mpas_pool_get_array(state, 'scalars', scalars, 1)
call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2)
call mpas_pool_get_array(diag , 'rho_edge', rho_edge)
!$acc update self(theta_m, scalars, rho_zz, rho_edge)

call mpas_pool_get_dimension(state, 'moist_start', moist_start)
call mpas_pool_get_dimension(state, 'moist_end', moist_end)
Expand All @@ -149,6 +161,8 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten
! call mpas_pool_get_array(tend, 'rho_zz', tend_rho)
! call mpas_pool_get_array(tend, 'theta_m', tend_theta)
call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
!$acc update self(tend_scalars)
MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')

call mpas_pool_get_array(tend_iau, 'theta', theta_amb)
call mpas_pool_get_array(tend_iau, 'rho', rho_amb)
Expand Down
Loading