From 2b1727d51f6f92ab0cb952babea41ebc200147c3 Mon Sep 17 00:00:00 2001 From: Tony Craig Date: Sun, 7 Jun 2020 10:03:16 -0700 Subject: [PATCH] update izumi and implement intel20 workaround (#460) --- cice.setup | 13 ++ .../cicedynB/dynamics/ice_transport_remap.F90 | 163 ++++++++++++++++-- cicecore/shared/ice_init_column.F90 | 2 +- configuration/scripts/machines/env.izumi_gnu | 2 +- .../scripts/machines/env.izumi_intel | 3 +- configuration/scripts/machines/env.izumi_nag | 3 +- configuration/scripts/machines/env.izumi_pgi | 2 +- icepack | 2 +- 8 files changed, 166 insertions(+), 24 deletions(-) diff --git a/cice.setup b/cice.setup index 9038b5b93..a964478f2 100755 --- a/cice.setup +++ b/cice.setup @@ -572,6 +572,13 @@ EOF continue endif + # unset env variables that might not exist in env machine file + # to avoid any carry over during multi compiler suites + unsetenv ICE_MACHINE_MAXTHREADS + unsetenv ICE_MACHINE_MAXPES + unsetenv ICE_MACHINE_QUIETMODE + unsetenv ICE_MACHINE_CPPDEFS + unsetenv ICE_MACHINE_QSTAT source ${ICE_SCRIPTS}/machines/env.${machcomp} -nomodules || exit 2 # Obtain the test name, sets, grid, and PE information from .ts file @@ -782,6 +789,11 @@ EOF set quietmode = ${ICE_MACHINE_QUIETMODE} endif + set cppdefs = "" + if ($?ICE_MACHINE_CPPDEFS) then + set cppdefs = ${ICE_MACHINE_CPPDEFS} + endif + if (${acct} == ${spval}) then if (-e ~/.cice_proj) then set acct = `head -1 ~/.cice_proj` @@ -884,6 +896,7 @@ setenv ICE_BASELINE ${basedir_tmp} setenv ICE_BASEGEN ${baseGen} setenv ICE_BASECOM ${baseCom} setenv ICE_SPVAL ${spval} +setenv ICE_CPPDEFS ${cppdefs} setenv ICE_QUIETMODE ${quietmode} setenv ICE_TEST ${test} setenv ICE_TESTNAME ${testname_noid} diff --git a/cicecore/cicedynB/dynamics/ice_transport_remap.F90 b/cicecore/cicedynB/dynamics/ice_transport_remap.F90 index ff48f1497..2c333e3a8 100644 --- a/cicecore/cicedynB/dynamics/ice_transport_remap.F90 +++ b/cicecore/cicedynB/dynamics/ice_transport_remap.F90 @@ -30,13 +30,14 @@ module ice_transport_remap use ice_kinds_mod - use ice_communicate, only: my_task + use ice_blocks, only: nx_block, ny_block + use ice_communicate, only: my_task, ice_barrier use ice_constants, only: c0, c1, c2, c12, p333, p4, p5, p6, & eps13, eps16, & field_loc_center, field_type_scalar, & field_loc_NEcorner, field_type_vector use ice_domain_size, only: max_blocks, ncat - use ice_fileunits, only: nu_diag + use ice_fileunits, only: nu_diag, flush_fileunit use ice_exit, only: abort_ice use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted use icepack_intfc, only: icepack_query_parameters @@ -254,7 +255,6 @@ module ice_transport_remap subroutine init_remap use ice_domain, only: nblocks - use ice_blocks, only: nx_block, ny_block use ice_grid, only: xav, yav, xxav, yyav ! dxt, dyt, xyav, & ! xxxav, xxyav, xyyav, yyyav @@ -324,7 +324,7 @@ subroutine horizontal_remap (dt, ntrace, & use ice_boundary, only: ice_halo, ice_HaloMask, ice_HaloUpdate, & ice_HaloDestroy use ice_domain, only: nblocks, blocks_ice, halo_info, maskhalo_remap - use ice_blocks, only: block, get_block, nghost, nx_block, ny_block + use ice_blocks, only: block, get_block, nghost use ice_grid, only: HTE, HTN, dxu, dyu, & tarear, hm, & xav, yav, xxav, yyav @@ -381,62 +381,129 @@ subroutine horizontal_remap (dt, ntrace, & ilo,ihi,jlo,jhi,&! beginning and end of physical domain n, m ! ice category, tracer indices +! tcraig, the intel 20.0.1 compiler generates a segfault when entering this subroutine +! at runtime. +! This is probably a compiler bug and a workaround is to allocate the temporary data +! rather than define it statically. Initial results don't show any slowdown, but +! to keep the issue highlighted, an ifdef was created as a workaround. + +#ifdef INTEL20_WORKAROUND + integer (kind=int_kind), dimension(:,:), allocatable :: & +#else integer (kind=int_kind), dimension(0:ncat,max_blocks) :: & +#endif icellsnc ! number of cells with ice - integer (kind=int_kind), & - dimension(nx_block*ny_block,0:ncat) :: & +#ifdef INTEL20_WORKAROUND + integer (kind=int_kind), dimension(:,:), allocatable :: & +#else + integer (kind=int_kind), dimension(nx_block*ny_block,0:ncat) :: & +#endif indxinc, indxjnc ! compressed i/j indices +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension(:,:), allocatable :: & +#else real (kind=dbl_kind), dimension(nx_block,ny_block) :: & +#endif edgearea_e ,&! area of departure regions for east edges edgearea_n ! area of departure regions for north edges +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension (:,:,:), allocatable :: & +#else real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: & +#endif dpx ,&! x coordinates of departure points at cell corners dpy ! y coordinates of departure points at cell corners +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension(:,:,:,:), allocatable :: & +#else real (kind=dbl_kind), dimension(nx_block,ny_block,0:ncat,max_blocks) :: & +#endif mc ,&! mass at geometric center of cell mx, my ! limited derivative of mass wrt x and y +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension(:,:,:), allocatable :: & +#else real (kind=dbl_kind), dimension(nx_block,ny_block,0:ncat) :: & +#endif mmask ! = 1. if mass is present, = 0. otherwise - real (kind=dbl_kind), & - dimension (nx_block,ny_block,ntrace,ncat,max_blocks) :: & +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension (:,:,:,:,:), allocatable :: & +#else + real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat,max_blocks) :: & +#endif tc ,&! tracer values at geometric center of cell tx, ty ! limited derivative of tracer wrt x and y - real (kind=dbl_kind), & - dimension (nx_block,ny_block,ntrace,ncat) :: & +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: & +#else + real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat) :: & +#endif tmask ! = 1. if tracer is present, = 0. otherwise +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension (:,:,:), allocatable :: & +#else real (kind=dbl_kind), dimension (nx_block,ny_block,0:ncat) :: & +#endif mflxe, mflxn ! mass transports across E and N cell edges +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: & +#else real (kind=dbl_kind), dimension (nx_block,ny_block,ntrace,ncat) :: & +#endif mtflxe, mtflxn ! mass*tracer transports across E and N cell edges +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension (:,:,:), allocatable :: & +#else real (kind=dbl_kind), dimension (nx_block,ny_block,ngroups) :: & +#endif triarea ! area of east-edge departure triangle +#ifdef INTEL20_WORKAROUND + real (kind=dbl_kind), dimension (:,:,:,:), allocatable :: & +#else real (kind=dbl_kind), dimension (nx_block,ny_block,0:nvert,ngroups) :: & +#endif xp, yp ! x and y coordinates of special triangle points ! (need 4 points for triangle integrals) - - integer (kind=int_kind), & - dimension (nx_block,ny_block,ngroups) :: & +#ifdef INTEL20_WORKAROUND + integer (kind=int_kind), dimension (:,:,:), allocatable :: & +#else + integer (kind=int_kind), dimension (nx_block,ny_block,ngroups) :: & +#endif iflux ,&! i index of cell contributing transport jflux ! j index of cell contributing transport +#ifdef INTEL20_WORKAROUND + integer (kind=int_kind), dimension(:,:), allocatable :: & +#else integer (kind=int_kind), dimension(ngroups,max_blocks) :: & +#endif icellsng ! number of cells with ice - integer (kind=int_kind), & - dimension(nx_block*ny_block,ngroups) :: & +#ifdef INTEL20_WORKAROUND + integer (kind=int_kind), dimension(:,:), allocatable :: & +#else + integer (kind=int_kind), dimension(nx_block*ny_block,ngroups) :: & +#endif indxing, indxjng ! compressed i/j indices +#ifdef INTEL20_WORKAROUND + integer (kind=int_kind), dimension(:,:,:), allocatable :: & +#else + integer (kind=int_kind), dimension(nx_block,ny_block,max_blocks) :: & +#endif + halomask ! temporary mask for fast halo updates + logical (kind=log_kind) :: & l_stop ! if true, abort the model @@ -446,9 +513,6 @@ subroutine horizontal_remap (dt, ntrace, & character (len=char_len) :: & edge ! 'north' or 'east' - integer (kind=int_kind), & - dimension(nx_block,ny_block,max_blocks) :: halomask - type (ice_halo) :: halo_info_tracer type (block) :: & @@ -461,6 +525,37 @@ subroutine horizontal_remap (dt, ntrace, & !---! Remap the open water area (without tracers). !---!------------------------------------------------------------------- +#ifdef INTEL20_WORKAROUND + allocate(icellsnc(0:ncat,max_blocks)) + allocate(indxinc(nx_block*ny_block,0:ncat)) + allocate(indxjnc(nx_block*ny_block,0:ncat)) + allocate(edgearea_e(nx_block,ny_block)) + allocate(edgearea_n(nx_block,ny_block)) + allocate(dpx(nx_block,ny_block,max_blocks)) + allocate(dpy(nx_block,ny_block,max_blocks)) + allocate(mc(nx_block,ny_block,0:ncat,max_blocks)) + allocate(mx(nx_block,ny_block,0:ncat,max_blocks)) + allocate(my(nx_block,ny_block,0:ncat,max_blocks)) + allocate(mmask(nx_block,ny_block,0:ncat)) + allocate(tc(nx_block,ny_block,ntrace,ncat,max_blocks)) + allocate(tx(nx_block,ny_block,ntrace,ncat,max_blocks)) + allocate(ty(nx_block,ny_block,ntrace,ncat,max_blocks)) + allocate(tmask(nx_block,ny_block,ntrace,ncat)) + allocate(mflxe(nx_block,ny_block,0:ncat)) + allocate(mflxn(nx_block,ny_block,0:ncat)) + allocate(mtflxe(nx_block,ny_block,ntrace,ncat)) + allocate(mtflxn(nx_block,ny_block,ntrace,ncat)) + allocate(triarea(nx_block,ny_block,ngroups)) + allocate(xp(nx_block,ny_block,0:nvert,ngroups)) + allocate(yp(nx_block,ny_block,0:nvert,ngroups)) + allocate(iflux(nx_block,ny_block,ngroups)) + allocate(jflux(nx_block,ny_block,ngroups)) + allocate(icellsng(ngroups,max_blocks)) + allocate(indxing(nx_block*ny_block,ngroups)) + allocate(indxjng(nx_block*ny_block,ngroups)) + allocate(halomask(nx_block,ny_block,max_blocks)) +#endif + !--- tcraig, tcx, this omp loop leads to a seg fault in gnu !--- need to check private variables and debug further !$TCXOMP PARALLEL DO PRIVATE(iblk,ilo,ihi,jlo,jhi,this_block,n,m, & @@ -515,6 +610,7 @@ subroutine horizontal_remap (dt, ntrace, & mmask (:,:,0) ) ! ice categories + do n = 1, ncat call construct_fields(nx_block, ny_block, & @@ -852,6 +948,37 @@ subroutine horizontal_remap (dt, ntrace, & enddo ! iblk !$TCXOMP END PARALLEL DO +#ifdef INTEL20_WORKAROUND + deallocate(icellsnc) + deallocate(indxinc) + deallocate(indxjnc) + deallocate(edgearea_e) + deallocate(edgearea_n) + deallocate(dpx) + deallocate(dpy) + deallocate(mc) + deallocate(mx) + deallocate(my) + deallocate(mmask) + deallocate(tc) + deallocate(tx) + deallocate(ty) + deallocate(tmask) + deallocate(mflxe) + deallocate(mflxn) + deallocate(mtflxe) + deallocate(mtflxn) + deallocate(triarea) + deallocate(xp) + deallocate(yp) + deallocate(iflux) + deallocate(jflux) + deallocate(icellsng) + deallocate(indxing) + deallocate(indxjng) + deallocate(halomask) +#endif + end subroutine horizontal_remap !======================================================================= diff --git a/cicecore/shared/ice_init_column.F90 b/cicecore/shared/ice_init_column.F90 index fbcc8413b..b41e71aa1 100644 --- a/cicecore/shared/ice_init_column.F90 +++ b/cicecore/shared/ice_init_column.F90 @@ -1937,6 +1937,7 @@ subroutine count_tracers nbtrcr = 0 nbtrcr_sw = 0 + nt_zbgc_frac = 0 ! vectors of size icepack_max_algae nlt_bgc_N(:) = 0 @@ -2184,7 +2185,6 @@ subroutine count_tracers enddo ! mm endif ! tr_zaero - nt_zbgc_frac = 0 if (nbtrcr > 0) then nt_zbgc_frac = ntrcr + 1 ntrcr = ntrcr + nbtrcr diff --git a/configuration/scripts/machines/env.izumi_gnu b/configuration/scripts/machines/env.izumi_gnu index 9f9938d68..b49be991a 100755 --- a/configuration/scripts/machines/env.izumi_gnu +++ b/configuration/scripts/machines/env.izumi_gnu @@ -10,7 +10,7 @@ if ("$inp" != "-nomodules") then source /usr/share/Modules/init/csh module purge -module load compiler/gnu/8.2.0 +module load compiler/gnu/9.3.0 setenv OMP_STACKSIZE 64M diff --git a/configuration/scripts/machines/env.izumi_intel b/configuration/scripts/machines/env.izumi_intel index 218d388be..4c7c7a648 100755 --- a/configuration/scripts/machines/env.izumi_intel +++ b/configuration/scripts/machines/env.izumi_intel @@ -10,7 +10,7 @@ if ("$inp" != "-nomodules") then source /usr/share/Modules/init/csh module purge -module load compiler/intel/19.0.2 +module load compiler/intel/20.0.1 setenv OMP_STACKSIZE 64M @@ -19,6 +19,7 @@ endif setenv ICE_MACHINE_ENVNAME izumi setenv ICE_MACHINE_COMPILER intel setenv ICE_MACHINE_MAKE gmake +setenv ICE_MACHINE_CPPDEFS '"-DINTEL20_WORKAROUND"' setenv ICE_MACHINE_WKDIR /scratch/cluster/$user/CICE_RUNS setenv ICE_MACHINE_INPUTDATA /fs/cgd/csm/inputdata setenv ICE_MACHINE_BASELINE /scratch/cluster/$user/CICE_BASELINE diff --git a/configuration/scripts/machines/env.izumi_nag b/configuration/scripts/machines/env.izumi_nag index 785cc410a..f87740e8e 100755 --- a/configuration/scripts/machines/env.izumi_nag +++ b/configuration/scripts/machines/env.izumi_nag @@ -10,7 +10,8 @@ if ("$inp" != "-nomodules") then source /usr/share/Modules/init/csh module purge -module load compiler/nag/6.2 +#module load compiler/nag/7.0 +module load compiler/nag/6.2-8.1.0 setenv OMP_STACKSIZE 64M diff --git a/configuration/scripts/machines/env.izumi_pgi b/configuration/scripts/machines/env.izumi_pgi index b89eafeb8..11de0e8cf 100755 --- a/configuration/scripts/machines/env.izumi_pgi +++ b/configuration/scripts/machines/env.izumi_pgi @@ -10,7 +10,7 @@ if ("$inp" != "-nomodules") then source /usr/share/Modules/init/csh module purge -module load compiler/pgi/18.10 +module load compiler/pgi/20.1 setenv OMP_STACKSIZE 64M diff --git a/icepack b/icepack index 09a5e19f0..2b27a78aa 160000 --- a/icepack +++ b/icepack @@ -1 +1 @@ -Subproject commit 09a5e19f006f62f60f6b940a4385feb47451368e +Subproject commit 2b27a78aaecb3635d14b94464d918a67df750ff0