Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add machine defined limits on maximum processors and batch wall clock time #349

Merged
merged 4 commits into from
Aug 23, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 58 additions & 32 deletions cice.setup
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ if (${doabort} == true) then
exit -1
endif

# Create a new sets_base variable to store sets passed to cice.setup
set sets_base = "${sets}"
set bfbcomp_base = "$bfbcomp"
foreach compiler ( $ncompilers )
Expand All @@ -458,13 +459,14 @@ EOF
continue
endif

source ${ICE_SCRIPTS}/machines/env.${machcomp} -nomodules || exit 2

# Obtain the test name, sets, grid, and PE information from .ts file
set test = `echo $line | cut -d' ' -f1`
set grid = `echo $line | cut -d' ' -f2`
set pesx = `echo $line | cut -d' ' -f3`
set sets_tmp = `echo $line | cut -d' ' -f4`
set bfbcomp_tmp = `echo $line | cut -d' ' -f5`
# Create a new sets_base variable to store sets passed to cice.setup

# Append sets from .ts file to the $sets variable
set sets = "$sets_base,$sets_tmp"
Expand All @@ -482,6 +484,61 @@ EOF
set fbfbcomp = ${machcomp}_${bfbcomp}
endif

#------------------------------------------------------------
# Parse pesx with strict checking, limit pes for machine

set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = `echo ${pesx} | cut -d x -f 3`
set blcky = `echo ${pesx} | cut -d x -f 4`
set mblck = `echo ${pesx} | cut -d x -f 5`
if ($?ICE_MACHINE_MAXPES) then
@ pesreq = ${task} * ${thrd}
if (${pesreq} > ${ICE_MACHINE_MAXPES}) then
@ task = ${ICE_MACHINE_MAXPES} / ${thrd}
@ mblck = ${mblck} * ((${pesreq} / ${ICE_MACHINE_MAXPES}) + 1)
endif
endif
set pesx = ${task}x${thrd}x${blckx}x${blcky}x${mblck}
else
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = `echo ${pesx} | cut -d x -f 3`
set blcky = `echo ${pesx} | cut -d x -f 4`
set mblck = 0
if ($?ICE_MACHINE_MAXPES) then
@ pesreq = ${task} * ${thrd}
if (${pesreq} > ${ICE_MACHINE_MAXPES}) then
@ task = ${ICE_MACHINE_MAXPES} / ${thrd}
endif
endif
set pesx = ${task}x${thrd}x${blckx}x${blcky}
else
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = 0
set blcky = 0
set mblck = 0
if ($?ICE_MACHINE_MAXPES) then
@ pesreq = ${task} * ${thrd}
if (${pesreq} > ${ICE_MACHINE_MAXPES}) then
@ task = ${ICE_MACHINE_MAXPES} / ${thrd}
endif
endif
set pesx = ${task}x${thrd}
else
echo "${0}: ERROR in -p argument, ${pesx}, must be [m]x[n], [m]x[n]x[bx]x[by], or [m]x[n]x[bx]x[by]x[mb] "
exit -1
endif
endif
endif

set testname_noid = ${spval}
# create case for test cases
if (${docase} == 0) then
Expand Down Expand Up @@ -560,7 +617,6 @@ EOF
end

cd ${casedir}
source ./env.${machcomp} -nomodules || exit 2

set quietmode = false
if ($?ICE_MACHINE_QUIETMODE) then
Expand All @@ -586,36 +642,6 @@ EOF
#------------------------------------------------------------
# Compute a default blocksize

set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = `echo ${pesx} | cut -d x -f 3`
set blcky = `echo ${pesx} | cut -d x -f 4`
set mblck = `echo ${pesx} | cut -d x -f 5`
else
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = `echo ${pesx} | cut -d x -f 3`
set blcky = `echo ${pesx} | cut -d x -f 4`
set mblck = 0
else
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = 0
set blcky = 0
set mblck = 0
else
echo "${0}: ERROR in -p argument, ${pesx}, must be [m]x[n], [m]x[n]x[bx]x[by], or [m]x[n]x[bx]x[by]x[mb] "
exit -1
endif
endif
endif

setenv ICE_DECOMP_GRID ${grid}
setenv ICE_DECOMP_NTASK ${task}
setenv ICE_DECOMP_NTHRD ${thrd}
Expand Down
25 changes: 16 additions & 9 deletions configuration/scripts/cice.batch.csh
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,24 @@ if (${taskpernodelimit} > ${ntasks}) set taskpernodelimit = ${ntasks}
set ptile = $taskpernode
if ($ptile > ${maxtpn} / 2) @ ptile = ${maxtpn} / 2

set runlength = ${ICE_RUNLENGTH}
if ($?ICE_MACHINE_MAXRUNLENGTH) then
if (${runlength} > ${ICE_MACHINE_MAXRUNLENGTH}) then
set runlength = ${ICE_MACHINE_MAXRUNLENGTH}
endif
endif

set queue = "${ICE_QUEUE}"
set batchtime = "00:15:00"
if (${ICE_RUNLENGTH} > 0) set batchtime = "00:29:00"
if (${ICE_RUNLENGTH} == 1) set batchtime = "00:59:00"
if (${ICE_RUNLENGTH} == 2) set batchtime = "2:00:00"
if (${ICE_RUNLENGTH} == 3) set batchtime = "3:00:00"
if (${ICE_RUNLENGTH} == 4) set batchtime = "4:00:00"
if (${ICE_RUNLENGTH} == 5) set batchtime = "5:00:00"
if (${ICE_RUNLENGTH} == 6) set batchtime = "6:00:00"
if (${ICE_RUNLENGTH} == 7) set batchtime = "7:00:00"
if (${ICE_RUNLENGTH} >= 8) set batchtime = "8:00:00"
if (${runlength} == 0) set batchtime = "00:29:00"
if (${runlength} == 1) set batchtime = "00:59:00"
if (${runlength} == 2) set batchtime = "2:00:00"
if (${runlength} == 3) set batchtime = "3:00:00"
if (${runlength} == 4) set batchtime = "4:00:00"
if (${runlength} == 5) set batchtime = "5:00:00"
if (${runlength} == 6) set batchtime = "6:00:00"
if (${runlength} == 7) set batchtime = "7:00:00"
if (${runlength} >= 8) set batchtime = "8:00:00"

set shortcase = `echo ${ICE_CASENAME} | cut -c1-15`

Expand Down
2 changes: 1 addition & 1 deletion configuration/scripts/cice.settings
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ setenv ICE_BASEGEN undefined
setenv ICE_BASECOM undefined
setenv ICE_BFBCOMP undefined
setenv ICE_SPVAL undefined
setenv ICE_RUNLENGTH 0
setenv ICE_RUNLENGTH -1
setenv ICE_ACCOUNT undefined
setenv ICE_QUEUE undefined

Expand Down
4 changes: 3 additions & 1 deletion configuration/scripts/machines/env.conrad_intel
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ setenv ICE_MACHINE_BASELINE $WORKDIR/CICE_BASELINE
setenv ICE_MACHINE_SUBMIT "qsub "
setenv ICE_MACHINE_ACCT P00000000
setenv ICE_MACHINE_QUEUE "debug"
setenv ICE_MACHINE_TPNODE 32 # tasks per node
setenv ICE_MACHINE_TPNODE 32 # tasks per node
setenv ICE_MACHINE_MAXPES 8000 # maximum total pes (tasks * threads) available
setenv ICE_MACHINE_MAXRUNLENGTH 168 # maximum batch wall time limit in hours (integer)
setenv ICE_MACHINE_BLDTHRDS 4
setenv ICE_MACHINE_QSTAT "qstat "
4 changes: 3 additions & 1 deletion configuration/scripts/machines/env.travisCI_gnu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ setenv ICE_MACHINE_WKDIR ~/CICE_RUNS
setenv ICE_MACHINE_INPUTDATA ~
setenv ICE_MACHINE_BASELINE ~/CICE_BASELINE
setenv ICE_MACHINE_SUBMIT " "
setenv ICE_MACHINE_TPNODE 4
setenv ICE_MACHINE_TPNODE 4 # maximum tasks per node
setenv ICE_MACHINE_MAXPES 4 # maximum total pes (tasks * threads) available
setenv ICE_MACHINE_MAXRUNLENGTH 1 # maximum batch wall time limit in hours (integer)
setenv ICE_MACHINE_ACCT P0000000
setenv ICE_MACHINE_QUEUE "default"
setenv ICE_MACHINE_BLDTHRDS 1
Expand Down
30 changes: 30 additions & 0 deletions doc/source/user_guide/ug_running.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,35 @@ directory back to **configuration/scripts/machines/** and update
the **configuration/scripts/cice.batch.csh** file, retest,
and then add and commit the updated machine files to the repository.

.. _machvars:

Machine variables
~~~~~~~~~~~~~~~~~~~~~~

There are several machine specific variables defined in the **env.$[machine]**. These
variables are used to generate working cases for a given machine, compiler, and batch
system. Some variables are optional.

.. csv-table:: *Machine Settings*
:header: "variable", "format", "description"
:widths: 15, 15, 25

"ICE_MACHINE_ENVNAME", "string", "machine name"
"ICE_MACHINE_COMPILER", "string", "compiler"
"ICE_MACHINE_MAKE", "string", "make command"
"ICE_MACHINE_WKDIR", "string", "root work directory"
"ICE_MACHINE_INPUTDATA", "string", "root input data directory"
"ICE_MACHINE_BASELINE", "string", "root regression baseline directory"
"ICE_MACHINE_SUBMIT", "string", "batch job submission command"
"ICE_MACHINE_TPNODE", "integer", "machine maximum MPI tasks per node"
"ICE_MACHINE_MAXPES", "integer", "machine maximum total processors per job (optional)"
"ICE_MACHINE_MAXRUNLENGTH", "integer", "batch wall time limit in hours (optional)"
"ICE_MACHINE_ACCT", "string", "batch default account"
"ICE_MACHINE_QUEUE", "string", "batch default queue"
"ICE_MACHINE_BLDTHRDS", "integer", "number of threads used during build"
"ICE_MACHINE_QSTAT", "string", "batch job status command (optional)"
"ICE_MACHINE_QUIETMODE", "true/false", "flag to reduce build output (optional)"

.. _cross_compiling:

Cross-compiling
Expand Down Expand Up @@ -370,3 +399,4 @@ should be rebuilt before being resubmitted. It is always recommended that users
modify the scripts and input settings in the case directory, NOT the run directory.
In general, files in the run directory are overwritten by versions in the case
directory when the model is built, submitted, and run.