Skip to content

Commit

Permalink
Merge pull request #2377 from CliMA/gb/up_deps
Browse files Browse the repository at this point in the history
Use ClimaCore 0.11 and MPITrampoline
  • Loading branch information
Sbozzolo authored Nov 23, 2023
2 parents 914a844 + ff74ef7 commit 4a55565
Show file tree
Hide file tree
Showing 12 changed files with 381 additions and 352 deletions.
13 changes: 3 additions & 10 deletions .buildkite/JuliaProject.toml
Original file line number Diff line number Diff line change
@@ -1,22 +1,15 @@
[extras]
CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc"
HDF5_jll = "0234f1f7-429e-5d53-9886-15a909be8d59"
MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"

[preferences.CUDA_Driver_jll]
compat = false

[preferences.CUDA_Runtime_jll]
version = "local"

[preferences.HDF5_jll]
libhdf5_path = "libhdf5"
libhdf5_hl_path = "libhdf5_hl"
version = "12.2"
local = "true"

[preferences.MPIPreferences]
_format = "1.0"
abi = "OpenMPI"
binary = "system"
libmpi = "libmpi"
mpiexec = "mpiexec"
binary = "MPItrampoline_jll"
13 changes: 10 additions & 3 deletions .buildkite/gpu_pipeline/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
agents:
queue: clima
slurm_mem: 8G
modules: julia/1.9.3 cuda/julia-pref openmpi/4.1.5
modules: julia/1.9.4 cuda/julia-pref openmpi/4.1.5-mpitrampoline

env:
OPENBLAS_NUM_THREADS: 1
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
JULIA_NVTX_CALLBACKS: gc
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
GPU_CONFIG_PATH: "config/gpu_configs/"
OPENBLAS_NUM_THREADS: 1
OMPI_MCA_opal_warn_on_missing_libcuda: 0
SLURM_KILL_BAD_EXIT: 1
SLURM_GPU_BIND: none # https://github.com/open-mpi/ompi/issues/11949#issuecomment-1737712291
CONFIG_PATH: "config/model_configs"
GPU_CONFIG_PATH: "config/gpu_configs/"
PERF_CONFIG_PATH: "config/perf_configs"
MPI_CONFIG_PATH: "config/mpi_configs"

steps:
- label: "init :GPU:"
Expand Down
23 changes: 16 additions & 7 deletions .buildkite/longruns/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
agents:
queue: central
slurm_mem_per_cpu: 8G
modules: julia/1.9.3 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 hdf5/1.12.2-ompi415 nsight-systems/2023.3.1
modules: julia/1.9.4 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 nsight-systems/2023.3.1

env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
OPENBLAS_NUM_THREADS: 1
JULIA_NVTX_CALLBACKS: gc
OMPI_MCA_opal_warn_on_missing_libcuda: 0
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/cpu"
JULIA_CPU_TARGET: 'broadwell;skylake'
CONFIG_PATH: "config/longrun_configs"
CONFIG_PATH: "config/model_configs"
GPU_CONFIG_PATH: "config/gpu_configs/"
PERF_CONFIG_PATH: "config/perf_configs"
MPI_CONFIG_PATH: "config/mpi_configs"
SLURM_KILL_BAD_EXIT: 1
JULIA_NVTX_CALLBACKS: gc
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
MPITRAMPOLINE_LIB: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/lib64/libmpiwrapper.so"
MPITRAMPOLINE_MPIEXEC: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/bin/mpiwrapperexec"

timeout_in_minutes: 1440

Expand All @@ -23,7 +32,7 @@ steps:
- "julia --project -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
- "julia --project -e 'using Pkg; Pkg.precompile()'"
- "julia --project -e 'using Pkg; Pkg.status()'"

- echo "--- Configure CUDA"
# force the initialization of the CUDA runtime as it is lazily loaded by default
- "julia --project=cuda_env -e 'using Pkg; Pkg.resolve(); Pkg.instantiate(;verbose=true);using CUDA; CUDA.precompile_runtime()'"
Expand Down Expand Up @@ -94,7 +103,7 @@ steps:
slurm_time: 24:00:00
env:
JOB_NAME: "longrun_ssp_bw_rhoe_equil_highres"

- label: ":computer: held-suarez, dry, high-topped (55km), high-sponge (35km), helem_16 np_3"
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
Expand Down Expand Up @@ -122,7 +131,7 @@ steps:
slurm_ntasks: 64
slurm_mem_per_cpu: 16GB
slurm_time: 24:00:00

- label: ":computer: aquaplanet, equilmoist, high-topped (55km), gray-radiation, vertdiff, high-sponge (35km), helem_16 np_3"
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
Expand Down Expand Up @@ -160,7 +169,7 @@ steps:
slurm_time: 24:00:00
env:
JOB_NAME: "longrun_aquaplanet_rhoe_equilmoist_nz63_0M_55km_rs35km_clearsky_tvinsolation_earth"

- label: ":computer: baroclinic wave (ρe_tot) equilmoist high resolution topography (earth)"
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
Expand Down Expand Up @@ -213,7 +222,7 @@ steps:
slurm_time: 24:00:00
env:
JOB_NAME: "longrun_sphere_hydrostatic_balance_rhoe"


- group: "Experimental Long runs"

Expand Down
13 changes: 9 additions & 4 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
agents:
queue: central
slurm_mem: 8G
modules: julia/1.9.3 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 hdf5/1.12.2-ompi415 nsight-systems/2023.3.1
modules: julia/1.9.4 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 nsight-systems/2023.3.1

env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
OPENBLAS_NUM_THREADS: 1
JULIA_NVTX_CALLBACKS: gc
OMPI_MCA_opal_warn_on_missing_libcuda: 0
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/cpu"
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default"
JULIA_CPU_TARGET: 'broadwell;skylake'
CONFIG_PATH: "config/model_configs"
GPU_CONFIG_PATH: "config/gpu_configs/"
PERF_CONFIG_PATH: "config/perf_configs"
MPI_CONFIG_PATH: "config/mpi_configs"
SLURM_KILL_BAD_EXIT: 1
JULIA_NVTX_CALLBACKS: gc
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
MPITRAMPOLINE_LIB: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/lib64/libmpiwrapper.so"
MPITRAMPOLINE_MPIEXEC: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/bin/mpiwrapperexec"

steps:
- label: "init :computer:"
key: "init_cpu_env"
command:
- "echo $$JULIA_DEPOT_PATH"

- echo "--- Configure MPI"
- julia -e 'using Pkg; Pkg.add("MPIPreferences"); using MPIPreferences; use_system_binary()'
- echo "--- Remove MPIPreferences"
- "rm -f ${JULIA_DEPOT_PATH}/environments/v1.9/LocalPreferences.toml"

- echo "--- Instantiate project"
- "julia --project -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
Expand Down
13 changes: 9 additions & 4 deletions .buildkite/scaling/pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ for i in "${!resolutions[@]}"; do
folder_name="${resolution}_res_Float32"
mkdir -p "$parent_folder/$folder_name"
filepath="$parent_folder/$folder_name/$filename"

echo "job_id: sphere_held_suarez_${resolution}_res_rhoe_${nprocs}" > "$filepath"
echo "forcing: held_suarez" >> "$filepath"
echo "FLOAT_TYPE: $FT" >> "$filepath"
echo "tracer_upwinding: none" >> "$filepath"

case "$resolution" in
"low")
echo -e "$low_resolution_lines" >> "$filepath"
Expand All @@ -68,7 +68,7 @@ done
cat << 'EOM'
agents:
queue: central
modules: julia/1.9.3 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 hdf5/1.12.2-ompi415 nsight-systems/2023.3.1
modules: julia/1.9.4 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 nsight-systems/2023.3.1
env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
Expand All @@ -78,6 +78,11 @@ env:
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
JULIA_CPU_TARGET: 'broadwell;skylake'
SLURM_KILL_BAD_EXIT: 1
JULIA_NVTX_CALLBACKS: gc
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
MPITRAMPOLINE_LIB: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/lib64/libmpiwrapper.so"
MPITRAMPOLINE_MPIEXEC: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/bin/mpiwrapperexec"
steps:
- label: "init :computer:"
Expand Down Expand Up @@ -209,7 +214,7 @@ cat << EOM
- wait: ~
continue_on_failure: true
- label: ":broom: clean up config files"
- label: ":broom: clean up config files"
command: "rm -rf $parent_folder"
EOM
14 changes: 7 additions & 7 deletions .dev/Manifest.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is machine-generated - editing it directly is not advised

julia_version = "1.9.3"
julia_version = "1.9.4"
manifest_format = "2.0"
project_hash = "b280bcf3b481823c97fa1a62c1b5e65114b8fa18"

Expand Down Expand Up @@ -86,12 +86,12 @@ version = "1.0.42"
[[deps.LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
version = "0.6.3"
version = "0.6.4"

[[deps.LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
version = "7.84.0+0"
version = "8.4.0+0"

[[deps.LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
Expand All @@ -100,7 +100,7 @@ uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[deps.LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
version = "1.10.2+0"
version = "1.11.0+1"

[[deps.Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Expand Down Expand Up @@ -129,9 +129,9 @@ uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
version = "1.2.0"

[[deps.OrderedCollections]]
git-tree-sha1 = "2e73fe17cac3c62ad1aebe70d44c963c3cfdc3e3"
git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.6.2"
version = "1.6.3"

[[deps.Parsers]]
deps = ["Dates", "PrecompileTools", "UUIDs"]
Expand Down Expand Up @@ -213,7 +213,7 @@ version = "1.2.13+0"
[[deps.nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
version = "1.48.0+0"
version = "1.52.0+1"

[[deps.p7zip_jll]]
deps = ["Artifacts", "Libdl"]
Expand Down
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
HDF5_jll = "0234f1f7-429e-5d53-9886-15a909be8d59"
ImageFiltering = "6a3955dd-da59-5b1f-98d4-e7296123deb5"
Insolation = "e98cc03f-d57e-4e3c-b70c-8d51efe9e0d8"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
Expand Down Expand Up @@ -51,7 +50,7 @@ Artifacts = "1"
AtmosphericProfilesLibrary = "0.1"
CLIMAParameters = "0.7.25"
ClimaComms = "0.5.6"
ClimaCore = "0.10.55"
ClimaCore = "0.11.0"
ClimaTimeSteppers = "0.7.14"
CloudMicrophysics = "0.15.0"
Colors = "0.12"
Expand All @@ -62,7 +61,6 @@ DiffEqBase = "6"
DiffEqCallbacks = "2"
DocStringExtensions = "0.8, 0.9"
FastGaussQuadrature = "0.4, 0.5, 1"
HDF5_jll = "~1.12"
ImageFiltering = "0.7"
Insolation = "0.8"
Interpolations = "0.14"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ ClimaAtmos.jl is the atmosphere components of the CliMA software stack. We striv

## Installation instructions

Recommended Julia: Stable release v1.9.3
Recommended Julia: Stable release v1.9.4

ClimaAtmos.jl is a [registered Julia package](https://julialang.org/packages/). To install

Expand Down
Loading

0 comments on commit 4a55565

Please sign in to comment.