Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove INTEL20_WORKAROUND and port to mustang #462

Merged
merged 1 commit into from
Jun 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 2 additions & 134 deletions cicecore/cicedynB/dynamics/ice_transport_remap.F90
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ module ice_transport_remap

use ice_kinds_mod
use ice_blocks, only: nx_block, ny_block
use ice_communicate, only: my_task, ice_barrier
use ice_communicate, only: my_task
use ice_constants, only: c0, c1, c2, c12, p333, p4, p5, p6, &
eps13, eps16, &
field_loc_center, field_type_scalar, &
field_loc_NEcorner, field_type_vector
use ice_domain_size, only: max_blocks, ncat
use ice_fileunits, only: nu_diag, flush_fileunit
use ice_fileunits, only: nu_diag
use ice_exit, only: abort_ice
use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted
use icepack_intfc, only: icepack_query_parameters
Expand Down Expand Up @@ -381,127 +381,57 @@ subroutine horizontal_remap (dt, ntrace, &
ilo,ihi,jlo,jhi,&! beginning and end of physical domain
n, m ! ice category, tracer indices

! tcraig, the intel 20.0.1 compiler generates a segfault when entering this subroutine
! at runtime.
! This is probably a compiler bug and a workaround is to allocate the temporary data
! rather than define it statically. Initial results don't show any slowdown, but
! to keep the issue highlighted, an ifdef was created as a workaround.

#ifdef INTEL20_WORKAROUND
integer (kind=int_kind), dimension(:,:), allocatable :: &
#else
integer (kind=int_kind), dimension(0:ncat,max_blocks) :: &
#endif
icellsnc ! number of cells with ice

#ifdef INTEL20_WORKAROUND
integer (kind=int_kind), dimension(:,:), allocatable :: &
#else
integer (kind=int_kind), dimension(nx_block*ny_block,0:ncat) :: &
#endif
indxinc, indxjnc ! compressed i/j indices

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension(:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension(nx_block,ny_block) :: &
#endif
edgearea_e ,&! area of departure regions for east edges
edgearea_n ! area of departure regions for north edges

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension (:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
#endif
dpx ,&! x coordinates of departure points at cell corners
dpy ! y coordinates of departure points at cell corners

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension(:,:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension(nx_block,ny_block,0:ncat,max_blocks) :: &
#endif
mc ,&! mass at geometric center of cell
mx, my ! limited derivative of mass wrt x and y

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension(:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension(nx_block,ny_block,0:ncat) :: &
#endif
mmask ! = 1. if mass is present, = 0. otherwise

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension (:,:,:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat,max_blocks) :: &
#endif
tc ,&! tracer values at geometric center of cell
tx, ty ! limited derivative of tracer wrt x and y

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat) :: &
#endif
tmask ! = 1. if tracer is present, = 0. otherwise

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension (:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension (nx_block,ny_block,0:ncat) :: &
#endif
mflxe, mflxn ! mass transports across E and N cell edges

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat) :: &
#endif
mtflxe, mtflxn ! mass*tracer transports across E and N cell edges

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension (:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension (nx_block,ny_block,ngroups) :: &
#endif
triarea ! area of east-edge departure triangle

#ifdef INTEL20_WORKAROUND
real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: &
#else
real (kind=dbl_kind), dimension (nx_block,ny_block,0:nvert,ngroups) :: &
#endif
xp, yp ! x and y coordinates of special triangle points
! (need 4 points for triangle integrals)
#ifdef INTEL20_WORKAROUND
integer (kind=int_kind), dimension (:,:,:), allocatable :: &
#else
integer (kind=int_kind), dimension (nx_block,ny_block,ngroups) :: &
#endif
iflux ,&! i index of cell contributing transport
jflux ! j index of cell contributing transport

#ifdef INTEL20_WORKAROUND
integer (kind=int_kind), dimension(:,:), allocatable :: &
#else
integer (kind=int_kind), dimension(ngroups,max_blocks) :: &
#endif
icellsng ! number of cells with ice

#ifdef INTEL20_WORKAROUND
integer (kind=int_kind), dimension(:,:), allocatable :: &
#else
integer (kind=int_kind), dimension(nx_block*ny_block,ngroups) :: &
#endif
indxing, indxjng ! compressed i/j indices

#ifdef INTEL20_WORKAROUND
integer (kind=int_kind), dimension(:,:,:), allocatable :: &
#else
integer (kind=int_kind), dimension(nx_block,ny_block,max_blocks) :: &
#endif
halomask ! temporary mask for fast halo updates

logical (kind=log_kind) :: &
Expand All @@ -525,37 +455,6 @@ subroutine horizontal_remap (dt, ntrace, &
!---! Remap the open water area (without tracers).
!---!-------------------------------------------------------------------

#ifdef INTEL20_WORKAROUND
allocate(icellsnc(0:ncat,max_blocks))
allocate(indxinc(nx_block*ny_block,0:ncat))
allocate(indxjnc(nx_block*ny_block,0:ncat))
allocate(edgearea_e(nx_block,ny_block))
allocate(edgearea_n(nx_block,ny_block))
allocate(dpx(nx_block,ny_block,max_blocks))
allocate(dpy(nx_block,ny_block,max_blocks))
allocate(mc(nx_block,ny_block,0:ncat,max_blocks))
allocate(mx(nx_block,ny_block,0:ncat,max_blocks))
allocate(my(nx_block,ny_block,0:ncat,max_blocks))
allocate(mmask(nx_block,ny_block,0:ncat))
allocate(tc(nx_block,ny_block,ntrace,ncat,max_blocks))
allocate(tx(nx_block,ny_block,ntrace,ncat,max_blocks))
allocate(ty(nx_block,ny_block,ntrace,ncat,max_blocks))
allocate(tmask(nx_block,ny_block,ntrace,ncat))
allocate(mflxe(nx_block,ny_block,0:ncat))
allocate(mflxn(nx_block,ny_block,0:ncat))
allocate(mtflxe(nx_block,ny_block,ntrace,ncat))
allocate(mtflxn(nx_block,ny_block,ntrace,ncat))
allocate(triarea(nx_block,ny_block,ngroups))
allocate(xp(nx_block,ny_block,0:nvert,ngroups))
allocate(yp(nx_block,ny_block,0:nvert,ngroups))
allocate(iflux(nx_block,ny_block,ngroups))
allocate(jflux(nx_block,ny_block,ngroups))
allocate(icellsng(ngroups,max_blocks))
allocate(indxing(nx_block*ny_block,ngroups))
allocate(indxjng(nx_block*ny_block,ngroups))
allocate(halomask(nx_block,ny_block,max_blocks))
#endif

!--- tcraig, tcx, this omp loop leads to a seg fault in gnu
!--- need to check private variables and debug further
!$TCXOMP PARALLEL DO PRIVATE(iblk,ilo,ihi,jlo,jhi,this_block,n,m, &
Expand Down Expand Up @@ -948,37 +847,6 @@ subroutine horizontal_remap (dt, ntrace, &
enddo ! iblk
!$TCXOMP END PARALLEL DO

#ifdef INTEL20_WORKAROUND
deallocate(icellsnc)
deallocate(indxinc)
deallocate(indxjnc)
deallocate(edgearea_e)
deallocate(edgearea_n)
deallocate(dpx)
deallocate(dpy)
deallocate(mc)
deallocate(mx)
deallocate(my)
deallocate(mmask)
deallocate(tc)
deallocate(tx)
deallocate(ty)
deallocate(tmask)
deallocate(mflxe)
deallocate(mflxn)
deallocate(mtflxe)
deallocate(mtflxn)
deallocate(triarea)
deallocate(xp)
deallocate(yp)
deallocate(iflux)
deallocate(jflux)
deallocate(icellsng)
deallocate(indxing)
deallocate(indxjng)
deallocate(halomask)
#endif

end subroutine horizontal_remap

!=======================================================================
Expand Down
3 changes: 2 additions & 1 deletion configuration/scripts/cice.batch.csh
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,15 @@ cat >> ${jobfile} << EOFB
#PBS -l walltime=${batchtime}
EOFB

else if (${ICE_MACHINE} =~ thunder* || ${ICE_MACHINE} =~ gordon* || ${ICE_MACHINE} =~ conrad* || ${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr*) then
else if (${ICE_MACHINE} =~ thunder* || ${ICE_MACHINE} =~ gordon* || ${ICE_MACHINE} =~ conrad* || ${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr* || ${ICE_MACHINE} =~ mustang) then
cat >> ${jobfile} << EOFB
#PBS -N ${shortcase}
#PBS -q ${queue}
#PBS -A ${acct}
#PBS -l select=${nnodes}:ncpus=${maxtpn}:mpiprocs=${taskpernode}
#PBS -l walltime=${batchtime}
#PBS -j oe
#PBS -W umask=022
###PBS -M username@domain.com
###PBS -m be
EOFB
Expand Down
2 changes: 1 addition & 1 deletion configuration/scripts/cice.launch.csh
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ EOFR
endif

#=======
else if (${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr*) then
else if (${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr* || ${ICE_MACHINE} =~ mustang*) then
if (${ICE_COMMDIR} =~ serial*) then
cat >> ${jobfile} << EOFR
./cice >&! \$ICE_RUNLOG_FILE
Expand Down
46 changes: 46 additions & 0 deletions configuration/scripts/machines/Macros.mustang_intel18
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#==============================================================================
# Macros file for AFRL mustang, intel compiler
#==============================================================================

CPP := fpp
CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS}
CFLAGS := -c -O2 -fp-model precise -xHost

FIXEDFLAGS := -132
FREEFLAGS := -FR
FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost
FFLAGS_NOOPT:= -O0

ifeq ($(ICE_BLDDEBUG), true)
FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created
else
FFLAGS += -O2
endif

SCC := icc
SFC := ifort
MPICC := icc
MPIFC := ifort

ifeq ($(ICE_COMMDIR), mpi)
FC := $(MPIFC)
CC := $(MPICC)
else
FC := $(SFC)
CC := $(SCC)
endif
LD:= $(FC)

#defined by env
#NETCDF_PATH := $(NETCDF_PATH)

INCLDIR += -I$(NETCDF_PATH)/include
LIB_NETCDF := $(NETCDF_PATH)/lib
SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -lmpi

ifeq ($(ICE_THREADED), true)
LDFLAGS += -qopenmp
CFLAGS += -qopenmp
FFLAGS += -qopenmp
endif

46 changes: 46 additions & 0 deletions configuration/scripts/machines/Macros.mustang_intel19
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#==============================================================================
# Macros file for AFRL mustang, intel compiler
#==============================================================================

CPP := fpp
CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS}
CFLAGS := -c -O2 -fp-model precise -xHost

FIXEDFLAGS := -132
FREEFLAGS := -FR
FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost
FFLAGS_NOOPT:= -O0

ifeq ($(ICE_BLDDEBUG), true)
FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created
else
FFLAGS += -O2
endif

SCC := icc
SFC := ifort
MPICC := icc
MPIFC := ifort

ifeq ($(ICE_COMMDIR), mpi)
FC := $(MPIFC)
CC := $(MPICC)
else
FC := $(SFC)
CC := $(SCC)
endif
LD:= $(FC)

#defined by env
#NETCDF_PATH := $(NETCDF_PATH)

INCLDIR += -I$(NETCDF_PATH)/include
LIB_NETCDF := $(NETCDF_PATH)/lib
SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -lmpi

ifeq ($(ICE_THREADED), true)
LDFLAGS += -qopenmp
CFLAGS += -qopenmp
FFLAGS += -qopenmp
endif

46 changes: 46 additions & 0 deletions configuration/scripts/machines/Macros.mustang_intel20
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#==============================================================================
# Macros file for AFRL mustang, intel compiler
#==============================================================================

CPP := fpp
CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS}
CFLAGS := -c -O2 -fp-model precise -xHost

FIXEDFLAGS := -132
FREEFLAGS := -FR
FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost
FFLAGS_NOOPT:= -O0

ifeq ($(ICE_BLDDEBUG), true)
FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created
else
FFLAGS += -O2
endif

SCC := icc
SFC := ifort
MPICC := icc
MPIFC := ifort

ifeq ($(ICE_COMMDIR), mpi)
FC := $(MPIFC)
CC := $(MPICC)
else
FC := $(SFC)
CC := $(SCC)
endif
LD:= $(FC)

#defined by env
#NETCDF_PATH := $(NETCDF_PATH)

INCLDIR += -I$(NETCDF_PATH)/include
LIB_NETCDF := $(NETCDF_PATH)/lib
SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -lmpi

ifeq ($(ICE_THREADED), true)
LDFLAGS += -qopenmp
CFLAGS += -qopenmp
FFLAGS += -qopenmp
endif

2 changes: 1 addition & 1 deletion configuration/scripts/machines/env.izumi_intel
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ endif
setenv ICE_MACHINE_ENVNAME izumi
setenv ICE_MACHINE_COMPILER intel
setenv ICE_MACHINE_MAKE gmake
setenv ICE_MACHINE_CPPDEFS '"-DINTEL20_WORKAROUND"'
setenv ICE_MACHINE_CPPDEFS ""
setenv ICE_MACHINE_WKDIR /scratch/cluster/$user/CICE_RUNS
setenv ICE_MACHINE_INPUTDATA /fs/cgd/csm/inputdata
setenv ICE_MACHINE_BASELINE /scratch/cluster/$user/CICE_BASELINE
Expand Down
Loading