-
Notifications
You must be signed in to change notification settings - Fork 177
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Jenkins Declartive Pipeline for CI with gfs/gefs multibuilds (#2246)
Adding top level Jenkins file for CI tests running on Jenkins Controller: - Declarative Multi-branch Pipeline (has enhanced restart capabilities on a per section bases) - Starts Pipeline from Label PR same as BASH system (for now) - Progress and restarts can me managed with CAC Login at [EPIC OAR Jenkins](https://jenkins.epic.oarcloud.noaa.gov) - Has logic for multi **gfs/gefs** system builds (arguments based on a configuration file `ci/casts/yamls/build.yaml`) - Any number of **systems** may be added by manual adding an ele- ment to the matrix in the Jenkinsfile - _It may be possible to dynamic add matrix values with a specialty plug-in_ - Currently only runs on **Orion** and **Hera** using `mterry` account Resolves #2119 Resolves #2118
- Loading branch information
1 parent
43429e2
commit 54daa31
Showing
6 changed files
with
315 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
def Machine = 'none' | ||
def machine = 'none' | ||
def HOME = 'none' | ||
def localworkspace = 'none' | ||
def commonworkspace = 'none' | ||
|
||
pipeline { | ||
agent { label 'built-in' } | ||
|
||
options { | ||
skipDefaultCheckout() | ||
buildDiscarder(logRotator(numToKeepStr: '2')) | ||
} | ||
|
||
stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR | ||
// which is used to designate the Nodes in the Jenkins Controler by the agent label | ||
// Each Jenknis Node is connected to said machine via an JAVA agent via an ssh tunnel | ||
|
||
stage('Get Machine') { | ||
agent { label 'built-in' } | ||
steps { | ||
script { | ||
localworkspace = env.WORKSPACE | ||
machine = 'none' | ||
for (label in pullRequest.labels) { | ||
echo "Label: ${label}" | ||
if ((label.matches("CI-Hera-Ready"))) { | ||
machine = 'hera' | ||
} else if ((label.matches("CI-Orion-Ready"))) { | ||
machine = 'orion' | ||
} else if ((label.matches("CI-Hercules-Ready"))) { | ||
machine = 'hercules' | ||
} | ||
} // createing a second machine varible with first letter capital | ||
// because the first letter of the machine name is captitalized in the GitHub labels | ||
Machine = machine[0].toUpperCase() + machine.substring(1) | ||
} | ||
} | ||
} | ||
|
||
stage('Get Common Workspace') { | ||
agent { label "${machine}-emc" } | ||
steps { | ||
script { | ||
properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in','Hera-EMC','Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])]) | ||
HOME = "${WORKSPACE}/TESTDIR" | ||
commonworkspace = "${WORKSPACE}" | ||
sh( script: "mkdir -p ${HOME}/RUNTESTS", returnStatus: true) | ||
pullRequest.addLabel("CI-${Machine}-Building") | ||
if ( pullRequest.labels.any{ value -> value.matches("CI-${Machine}-Ready") } ) { | ||
pullRequest.removeLabel("CI-${Machine}-Ready") | ||
} | ||
} | ||
} | ||
} | ||
|
||
stage('Build System') { | ||
matrix { | ||
agent { label "${machine}-emc" } | ||
//options { | ||
// throttle(['global_matrix_build']) | ||
//} | ||
axes { | ||
axis { | ||
name "system" | ||
values "gfs", "gefs" | ||
} | ||
} | ||
stages { | ||
stage("build system") { | ||
steps { | ||
script { | ||
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to build the system on per system basis under the common workspace HOME | ||
sh( script: "mkdir -p ${HOMEgfs}", returnStatus: true) | ||
ws(HOMEgfs) { | ||
env.MACHINE_ID = machine // MACHINE_ID is used in the build scripts to determine the machine and is added to the shell environment | ||
if (fileExists("${HOMEgfs}/sorc/BUILT_semaphor")) { // if the system is already built, skip the build in the case of re-runs | ||
sh( script: "cat ${HOMEgfs}/sorc/BUILT_semaphor", returnStdout: true).trim() // TODO: and user configurable control to manage build semphore | ||
ws(commonworkspace) { pullRequest.comment("Cloned PR already built (or build skipped) on ${machine} in directory ${HOMEgfs}") } | ||
} else { | ||
checkout scm | ||
sh( script: "source workflow/gw_setup.sh;which git;git --version;git submodule update --init --recursive", returnStatus: true) | ||
def builds_file = readYaml file: "ci/cases/yamls/build.yaml" | ||
def build_args_list = builds_file['builds'] | ||
def build_args = build_args_list[system].join(" ").trim().replaceAll("null", "") | ||
dir("${HOMEgfs}/sorc") { | ||
sh( script: "${build_args}", returnStatus: true) | ||
sh( script: "./link_workflow.sh", returnStatus: true) | ||
sh( script: "echo ${HOMEgfs} > BUILT_semaphor", returnStatus: true) | ||
} | ||
} | ||
if ( pullRequest.labels.any{ value -> value.matches("CI-${Machine}-Building") } ) { | ||
pullRequest.removeLabel("CI-${Machine}-Building") | ||
} | ||
pullRequest.addLabel("CI-${Machine}-Running") | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
stage('Run Tests') { | ||
matrix { | ||
agent { label "${machine}-emc" } | ||
axes { | ||
axis { | ||
name "Case" | ||
values "C48_ATM", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar" // TODO add dynamic list of cases from env vars (needs addtional plugins) | ||
} | ||
} | ||
stages { | ||
stage('Create Experiment') { | ||
steps { | ||
script { | ||
sh( script: "sed -n '/{.*}/!p' ${HOME}/gfs/ci/cases/pr/${Case}.yaml > ${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp", returnStatus: true) | ||
def yaml_case = readYaml file: "${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp" | ||
system = yaml_case.experiment.system | ||
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to populate the XML on per system basis | ||
env.RUNTESTS = "${HOME}/RUNTESTS" | ||
sh( script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStatus: true) | ||
} | ||
} | ||
} | ||
stage('Run Experiments') { | ||
steps { | ||
script { | ||
HOMEgfs = "${HOME}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments | ||
ws(HOMEgfs) { | ||
pslot = sh( script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${HOME}/RUNTESTS ${Case}", returnStdout: true ).trim() | ||
pullRequest.comment("**Running experiments: ${Case} on ${Machine}**<br>Built against system **${system}** in directory:<br>`${HOMEgfs}`<br>With the experiment in directory:<br>`${HOME}/RUNTESTS/${pslot}`") | ||
try { | ||
sh( script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOME} ${pslot}", returnStatus: true) | ||
} catch (Exception e) { | ||
pullRequest.comment("**FAILURE** running experiments: ${Case} on ${Machine}") | ||
error("Failed to run experiments ${Case} on ${Machine}") | ||
} | ||
pullRequest.comment("**SUCCESS** running experiments: ${Case} on ${Machine}") | ||
} | ||
} | ||
} | ||
post { | ||
always { | ||
script { | ||
ws (HOMEgfs) { | ||
for (label in pullRequest.labels) { | ||
if (label.contains("${Machine}")) { | ||
pullRequest.removeLabel(label) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
success { | ||
script { | ||
ws (HOMEgfs) { | ||
pullRequest.addLabel("CI-${Machine}-Passed") | ||
def timestamp = new Date().format("MM dd HH:mm:ss", TimeZone.getTimeZone('America/New_York')) | ||
pullRequest.comment("**CI SUCCESS** ${Machine} at ${timestamp}\n\nBuilt and ran in directory `${HOME}`") | ||
} | ||
} | ||
} | ||
failure { | ||
script { | ||
ws (HOMEgfs) { | ||
pullRequest.addLabel("CI-${Machine}-Failed") | ||
def timestamp = new Date().format("MM dd HH:mm:ss", TimeZone.getTimeZone('America/New_York')) | ||
pullRequest.comment("**CI FAILED** ${Machine} at ${timestamp}<br>Built and ran in directory `${HOME}`") | ||
if (fileExists('${HOME}/RUNTESTS/ci.log')) { | ||
def fileContent = readFile '${HOME}/RUNTESTS/ci.log' | ||
fileContent.eachLine { line -> | ||
if( line.contains(".log")) { | ||
archiveArtifacts artifacts: "${line}", fingerprint: true | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
builds: | ||
- gefs: './build_all.sh' | ||
- gfs: './build_all.sh -gu' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,120 @@ | ||
#!/bin/env bash | ||
|
||
function cancel_slurm_jobs() { | ||
function determine_scheduler() { | ||
if command -v sbatch &> /dev/null; then | ||
echo "slurm"; | ||
elif command -v qsub &> /dev/null; then | ||
echo "torque"; | ||
else | ||
echo "unknown" | ||
fi | ||
} | ||
|
||
# Usage: cancel_slurm_jobs <substring> | ||
# Example: cancel_slurm_jobs "C48_ATM_3c4e7f74" | ||
function cancel_batch_jobs() { | ||
# Usage: cancel_batch_jobs <substring> | ||
# Example: cancel_batch_jobs "C48_ATM_3c4e7f74" | ||
# | ||
# Cancel all Slurm jobs that have the given substring in their name | ||
# Cancel all batch jobs that have the given substring in their name | ||
# So like in the example all jobs with "C48_ATM_3c4e7f74" | ||
# in their name will be canceled | ||
|
||
local substring=$1 | ||
local job_ids | ||
job_ids=$(squeue -u "${USER}" -h -o "%i") | ||
|
||
for job_id in ${job_ids}; do | ||
job_name=$(sacct -j "${job_id}" --format=JobName%100 | head -3 | tail -1 | sed -r 's/\s+//g') || true | ||
if [[ "${job_name}" =~ ${substring} ]]; then | ||
echo "Canceling Slurm Job ${job_name} with: scancel ${job_id}" | ||
scancel "${job_id}" | ||
continue | ||
fi | ||
done | ||
|
||
scheduler=$(determine_scheduler) | ||
|
||
if [[ "${schduler}" == "torque" ]]; then | ||
job_ids=$(qstat -u "${USER}" | awk '{print $1}') || true | ||
|
||
for job_id in ${job_ids}; do | ||
job_name=$(qstat -f "${job_id}" | grep Job_Name | awk '{print $3}') || true | ||
if [[ "${job_name}" =~ ${substring} ]]; then | ||
echo "Canceling PBS Job ${job_name} with: qdel ${job_id}" | ||
qdel "${job_id}" | ||
continue | ||
fi | ||
done | ||
|
||
elif [[ "${scheduler}" == "slurm" ]]; then | ||
|
||
job_ids=$(squeue -u "${USER}" -h -o "%i") | ||
|
||
for job_id in ${job_ids}; do | ||
job_name=$(sacct -j "${job_id}" --format=JobName%100 | head -3 | tail -1 | sed -r 's/\s+//g') || true | ||
if [[ "${job_name}" =~ ${substring} ]]; then | ||
echo "Canceling Slurm Job ${job_name} with: scancel ${job_id}" | ||
scancel "${job_id}" | ||
continue | ||
fi | ||
done | ||
|
||
else | ||
echo "FATAL: Unknown/unsupported job scheduler" | ||
exit 1 | ||
fi | ||
} | ||
|
||
|
||
function get_pr_case_list () { | ||
|
||
############################################################# | ||
# loop over every yaml file in the PR's ci/cases | ||
# and create an run directory for each one for this PR loop | ||
############################################################# | ||
for yaml_config in "${HOMEgfs}/ci/cases/pr/"*.yaml; do | ||
case=$(basename "${yaml_config}" .yaml) || true | ||
echo "${case}" | ||
done | ||
} | ||
|
||
function get_pslot_list () { | ||
|
||
local RUNTESTS="${1}" | ||
|
||
############################################################# | ||
# loop over expdir directories in RUNTESTS | ||
# and create list of the directory names (pslot) with the hash tag | ||
############################################################# | ||
for pslot_dir in "${RUNTESTS}/EXPDIR/"*; do | ||
pslot=$(basename "${pslot_dir}") || true | ||
echo "${pslot}" | ||
done | ||
|
||
} | ||
|
||
function get_pslot () { | ||
|
||
local RUNTESTS="${1}" | ||
local case="${2}" | ||
|
||
############################################################# | ||
# loop over expdir directories in RUNTESTS | ||
# and return the name of the pslot with its tag that matches the case | ||
############################################################# | ||
for pslot_dir in "${RUNTESTS}/EXPDIR/"*; do | ||
pslot=$(basename "${pslot_dir}") | ||
check_case=$(echo "${pslot}" | rev | cut -d"_" -f2- | rev) || true | ||
if [[ "${check_case}" == "${case}" ]]; then | ||
echo "${pslot}" | ||
break | ||
fi | ||
done | ||
|
||
} | ||
|
||
function create_experiment () { | ||
|
||
local yaml_config="${1}" | ||
cd "${HOMEgfs}" || exit 1 | ||
pr_sha=$(git rev-parse --short HEAD) | ||
case=$(basename "${yaml_config}" .yaml) || true | ||
export pslot=${case}_${pr_sha} | ||
|
||
source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}" | ||
source "${HOMEgfs}/workflow/gw_setup.sh" | ||
|
||
# system=$(grep "system:" "${yaml_config}" | cut -d":" -f2 | tr -d " ") || true | ||
|
||
"${HOMEgfs}/${system}/workflow/create_experiment.py" --overwrite --yaml "${yaml_config}" | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/usr/bin/env bash | ||
|
||
HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )" | ||
source "${HOMEgfs}/ush/detect_machine.sh" | ||
|
||
utitilty_function="${1}" | ||
|
||
source "${HOMEgfs}/ci/scripts/utils/ci_utils.sh" | ||
${utitilty_function} "${@:2}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters