Skip to content

Commit

Permalink
Use ClimaCore 0.11 and MPITrampoline
Browse files Browse the repository at this point in the history
  • Loading branch information
Sbozzolo committed Nov 21, 2023
1 parent 765bbda commit 8eb839c
Show file tree
Hide file tree
Showing 9 changed files with 318 additions and 287 deletions.
9 changes: 1 addition & 8 deletions .buildkite/JuliaProject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@ compat = false
[preferences.CUDA_Runtime_jll]
version = "local"

[preferences.HDF5_jll]
libhdf5_path = "libhdf5"
libhdf5_hl_path = "libhdf5_hl"

[preferences.MPIPreferences]
_format = "1.0"
abi = "OpenMPI"
binary = "system"
libmpi = "libmpi"
mpiexec = "mpiexec"
binary = "MPItrampoline_jll"
13 changes: 13 additions & 0 deletions .buildkite/gpu_pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,23 @@ agents:
modules: julia/1.9.3 cuda/julia-pref openmpi/4.1.5

env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
OPENBLAS_NUM_THREADS: 1
JULIA_NVTX_CALLBACKS: gc
OMPI_MCA_opal_warn_on_missing_libcuda: 0
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/cpu"
JULIA_CPU_TARGET: 'broadwell;skylake'
CONFIG_PATH: "config/model_configs"
GPU_CONFIG_PATH: "config/gpu_configs/"
PERF_CONFIG_PATH: "config/perf_configs"
MPI_CONFIG_PATH: "config/mpi_configs"
SLURM_KILL_BAD_EXIT: 1
JULIA_NVTX_CALLBACKS: gc
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
MPITRAMPOLINE_LIB: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/lib64/libmpiwrapper.so"
MPITRAMPOLINE_MPIEXEC: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/bin/mpiwrapperexec"
SLURM_GPU_BIND: none # https://github.com/open-mpi/ompi/issues/11949#issuecomment-1737712291

steps:
Expand Down
23 changes: 16 additions & 7 deletions .buildkite/longruns/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
agents:
queue: central
slurm_mem_per_cpu: 8G
modules: julia/1.9.3 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 hdf5/1.12.2-ompi415 nsight-systems/2023.3.1
modules: julia/1.9.3 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 nsight-systems/2023.3.1

env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
OPENBLAS_NUM_THREADS: 1
JULIA_NVTX_CALLBACKS: gc
OMPI_MCA_opal_warn_on_missing_libcuda: 0
JULIA_MAX_NUM_PRECOMPILE_FILES: 100
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/cpu"
JULIA_CPU_TARGET: 'broadwell;skylake'
CONFIG_PATH: "config/longrun_configs"
CONFIG_PATH: "config/model_configs"
GPU_CONFIG_PATH: "config/gpu_configs/"
PERF_CONFIG_PATH: "config/perf_configs"
MPI_CONFIG_PATH: "config/mpi_configs"
SLURM_KILL_BAD_EXIT: 1
JULIA_NVTX_CALLBACKS: gc
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
MPITRAMPOLINE_LIB: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/lib64/libmpiwrapper.so"
MPITRAMPOLINE_MPIEXEC: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/bin/mpiwrapperexec"

timeout_in_minutes: 1440

Expand All @@ -23,7 +32,7 @@ steps:
- "julia --project -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
- "julia --project -e 'using Pkg; Pkg.precompile()'"
- "julia --project -e 'using Pkg; Pkg.status()'"

- echo "--- Configure CUDA"
# force the initialization of the CUDA runtime as it is lazily loaded by default
- "julia --project=cuda_env -e 'using Pkg; Pkg.resolve(); Pkg.instantiate(;verbose=true);using CUDA; CUDA.precompile_runtime()'"
Expand Down Expand Up @@ -94,7 +103,7 @@ steps:
slurm_time: 24:00:00
env:
JOB_NAME: "longrun_ssp_bw_rhoe_equil_highres"

- label: ":computer: held-suarez, dry, high-topped (55km), high-sponge (35km), helem_16 np_3"
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
Expand Down Expand Up @@ -122,7 +131,7 @@ steps:
slurm_ntasks: 64
slurm_mem_per_cpu: 16GB
slurm_time: 24:00:00

- label: ":computer: aquaplanet, equilmoist, high-topped (55km), gray-radiation, vertdiff, high-sponge (35km), helem_16 np_3"
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
Expand Down Expand Up @@ -160,7 +169,7 @@ steps:
slurm_time: 24:00:00
env:
JOB_NAME: "longrun_aquaplanet_rhoe_equilmoist_nz63_0M_55km_rs35km_clearsky_tvinsolation_earth"

- label: ":computer: baroclinic wave (ρe_tot) equilmoist high resolution topography (earth)"
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml
Expand Down Expand Up @@ -213,7 +222,7 @@ steps:
slurm_time: 24:00:00
env:
JOB_NAME: "longrun_sphere_hydrostatic_balance_rhoe"


- group: "Experimental Long runs"

Expand Down
7 changes: 6 additions & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
agents:
queue: central
slurm_mem: 8G
modules: julia/1.9.3 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 hdf5/1.12.2-ompi415 nsight-systems/2023.3.1
modules: julia/1.9.3 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 nsight-systems/2023.3.1

env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
Expand All @@ -16,6 +16,11 @@ env:
PERF_CONFIG_PATH: "config/perf_configs"
MPI_CONFIG_PATH: "config/mpi_configs"
SLURM_KILL_BAD_EXIT: 1
JULIA_NVTX_CALLBACKS: gc
JULIA_CUDA_MEMORY_POOL: none
JULIA_MPI_HAS_CUDA: "true"
MPITRAMPOLINE_LIB: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/lib64/libmpiwrapper.so"
MPITRAMPOLINE_MPIEXEC: "/groups/esm/software/MPIwrapper/ompi4.1.5_cuda-12.2/bin/mpiwrapperexec"

steps:
- label: "init :computer:"
Expand Down
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
HDF5_jll = "0234f1f7-429e-5d53-9886-15a909be8d59"
ImageFiltering = "6a3955dd-da59-5b1f-98d4-e7296123deb5"
Insolation = "e98cc03f-d57e-4e3c-b70c-8d51efe9e0d8"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
Expand Down Expand Up @@ -51,7 +50,7 @@ Artifacts = "1"
AtmosphericProfilesLibrary = "0.1"
CLIMAParameters = "0.7.25"
ClimaComms = "0.5.6"
ClimaCore = "0.10.55"
ClimaCore = "0.11.0"
ClimaTimeSteppers = "0.7.14"
CloudMicrophysics = "0.15.0"
Colors = "0.12"
Expand All @@ -62,7 +61,6 @@ DiffEqBase = "6"
DiffEqCallbacks = "2"
DocStringExtensions = "0.8, 0.9"
FastGaussQuadrature = "0.4, 0.5, 1"
HDF5_jll = "~1.12"
ImageFiltering = "0.7"
Insolation = "0.8"
Interpolations = "0.14"
Expand Down
Loading

0 comments on commit 8eb839c

Please sign in to comment.