Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .play_minio.json

This file was deleted.

96 changes: 49 additions & 47 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,43 +16,50 @@
# run_conda run conda backend with seeds + repeats
# run_oras run oras backend with seeds + repeats
# run_envs run envmodules backend with seeds + repeats
# knit_report generate RMarkdown reports - not fully tested
# knit_report generate RMarkdown reports and an aggregated CSV - not fully tested
#
# Environment:
# - MAX_CORES controls parallelism (default: 50).
# - MAX_CORES controls num concurrent rules
# - EASYBUILD_PREFIX needs to be tuned to access the envmodules built extending EESSI <--------------!!!!
# see: https://github.com/omnibenchmark/clustering_example/pull/43
#
# ============================================================


MAX_CORES ?= 50
MAX_CORES ?= 250

# EasyBuild installation prefix (imallona; edit accordingly)
# EasyBuild installation prefix (imallona; edit accordingly) ## <------------------------------------!!!!
EASYBUILD_PREFIX ?= /data/imallona/.local/easybuild
export EASYBUILD_PREFIX

# omnibenchmark command template
OB_CMD = ob run benchmark --local-storage --cores ${MAX_CORES}

# actual benchmark plan repository - to be pinned (the commit/tag)
CLUSTERING_REPO = https://github.com/omnibenchmark/clustering_example
CLUSTERING_DIR = clustering_example
CLUSTERING_REPO = https://github.com/omnibenchmark/clustering_example
CLUSTERING_BRANCH = longer_yamls
CLUSTERING_DIR = clustering_example

# legacy reports in the wrong repository; to be moved to this one
REPORTS_REPO = https://github.com/imallona/clustering_report
REPORTS_DIR = clustering_report

## seeds to explore
SEEDS := 2 54 546 744 1443

## repeated runs per seed
RUNS := 1 2 3

all: clone_yamls clone_reports run_conda run_oras run_envs knit_report

# clone the clustering_example repo if not already present
clone_yamls:
@if [ ! -d "$(CLUSTERING_DIR)" ]; then \
echo "Cloning clustering_example repo..."; \
git clone --branch easyconfigs_py3126 $(CLUSTERING_REPO); \
git clone --branch ${CLUSTERING_BRANCH} $(CLUSTERING_REPO); \
else \
echo "clustering_example repo already present, pulling latest..."; \
cd $(CLUSTERING_DIR) && git fetch && git checkout easyconfigs_py3126 && git pull; \
cd $(CLUSTERING_DIR) && git fetch && git checkout ${CLUSTERING_BRANCH} && git pull; \
fi

# clone the clustering_report repo (mark branch) if not already present
Expand All @@ -66,57 +73,52 @@ clone_reports:
fi

run_conda: clone_yamls
@for seed in 2 54 546 744 1443; do \
mkdir -p results
@for seed in $(SEEDS); do \
echo "Running conda benchmark with seed $$seed..."; \
cp $(CLUSTERING_DIR)/Clustering_conda.yml $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \
sed -i "s/--seed, [0-9]\+/--seed, $$seed/" $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \
for i in 1 2 3; do \
echo " Run $$i for seed $$seed..."; \
${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \
cp $(CLUSTERING_DIR)/Clustering_conda_tmp.yml out; \
mv out out_conda_seed_$$seed\_run_$$i; \
sed -i "s/--seed\",[[:space:]]*[0-9]\+/--seed\", $$seed/" $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \
for i in $(RUNS); do \
echo " Run $$i for seed $$seed and run $$i."; \
echo "DEST: results/out_conda_seed_$$seed\_run_$$i" ;\
${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_conda_tmp.yml --out-dir results/out_conda_seed_$$seed\_run_$$i; \
cp $(CLUSTERING_DIR)/Clustering_conda_tmp.yml results/out_conda_seed_$$seed\_run_$$i/; \
done; \
rm $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \
done
Comment on lines 75 to 87
Copy link

Copilot AI Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The temporary YAML files created in each run_* target (Clustering_conda_tmp.yml, Clustering_oras_tmp.yml, Clustering_envmodules_tmp.yml) are never cleaned up. These should be removed after all runs complete to avoid leaving temporary files in the repository. Consider adding cleanup commands at the end of each target loop.

Copilot uses AI. Check for mistakes.

run_oras: clone_yamls
@for seed in 2 54 546 744 1443; do \
@for seed in $(SEEDS); do \
echo "Running oras benchmark with seed $$seed..."; \
cp $(CLUSTERING_DIR)/Clustering_oras.yml $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \
sed -i "s/--seed, [0-9]\+/--seed, $$seed/" $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \
for i in 1 2 3; do \
echo " Run $$i for seed $$seed..."; \
${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \
cp $(CLUSTERING_DIR)/Clustering_oras_tmp.yml out; \
mv out out_oras_seed_$$seed\_run_$$i; \
sed -i "s/--seed\",[[:space:]]*[0-9]\+/--seed\", $$seed/" $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \
for i in $(RUNS); do \
echo " Run $$i for seed $$seed and run $$i."; \
${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_oras_tmp.yml --out-dir results/out_oras_seed_$$seed\_run_$$i/; \
cp $(CLUSTERING_DIR)/Clustering_oras_tmp.yml results/out_oras_seed_$$seed\_run_$$i/; \
done; \
rm $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \
done

run_envs: clone_yamls
@bash -c '\
source /cvmfs/software.eessi.io/versions/2025.06/init/lmod/bash && \
module load EESSI-extend/2025.06-easybuild && \
export MODULEPATH="$(EASYBUILD_PREFIX)/software/modules/all:$$MODULEPATH" && \
module use $$MODULEPATH && \
echo $$MODULEPATH && \
for seed in 2 54 546 744 1443; do \
echo "Running envmodules benchmark with seed $$seed..."; \
cp $(CLUSTERING_DIR)/Clustering_envmodules.yml $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \
sed -i "s/--seed, [0-9]\+/--seed, $$seed/" $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \
for i in 1 2 3; do \
echo " Run $$i for seed $$seed..."; \
${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \
cp $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml out; \
mv out out_envmodules_seed_$$seed\_run_$$i; \
done; \
rm $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \
done \
'

source /cvmfs/software.eessi.io/versions/2025.06/init/lmod/bash && \
module load EESSI-extend/2025.06-easybuild && \
export MODULEPATH="$(EASYBUILD_PREFIX)/software/modules/all:$$MODULEPATH" && \
module use $$MODULEPATH && \
echo $$MODULEPATH && \
for seed in $(SEEDS); do \
echo "Running envmodules benchmark with seed $$seed..."; \
cp $(CLUSTERING_DIR)/Clustering_envmodules.yml $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \
sed -i "s/--seed\",[[:space:]]*[0-9]\+/--seed\", $$seed/" $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \
for i in $(RUNS); do \
echo " Run $$i for seed $$seed and run $$i..."; \
${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml --out-dir results/out_envmodules_seed_$$seed\_run_$$i/; \
cp $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml results/out_envmodules_seed_$$seed\_run_$$i/; \
done; \
done \
'

knit_report: clone_reports
R -e 'rmarkdown::render("$(REPORTS_DIR)/07_metrics_across_backends.Rmd", params = list(performance_bn = "performance-results.rds", metrics_bn = "metrics-results.rds", clustering_dir = "."))'
R -e 'rmarkdown::render("$(REPORTS_DIR)/08_performances_across_backends.Rmd", params = list(performance_bn = "performance-results.rds", metrics_bn = "metrics-results.rds", clustering_dir = "."))'
python parse_results.py
R -e 'rmarkdown::render("analyze_results.Rmd")'
## R -e 'rmarkdown::render("$(REPORTS_DIR)/07_metrics_across_backends.Rmd", params = list(performance_bn = "performance-results.rds", metrics_bn = "metrics-results.rds", clustering_dir = "."))'
## R -e 'rmarkdown::render("$(REPORTS_DIR)/08_performances_across_backends.Rmd", params = list(performance_bn = "performance-results.rds", metrics_bn = "metrics-results.rds", clustering_dir = "."))'
python parse_results.py > aggregated_results.json
R -e 'rmarkdown::render("analyze_results_izaskun.Rmd")'
Loading