Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
013682f
plugged actions in
Aug 24, 2025
3ac539c
try modifying snakemake rule order
Aug 24, 2025
9bb8caa
try to avoid building container calling explicitely for caching
Aug 24, 2025
139d846
try to avoid building container calling explicitely for caching
Aug 24, 2025
715e441
try to avoid building container calling explicitely for caching
Aug 24, 2025
1e2a24a
try to avoid building container calling explicitely for caching
Aug 24, 2025
0084635
move additional config in the root workflow configuration and clean d…
Aug 24, 2025
199a439
removed file shared.yaml tout-court
Aug 24, 2025
0dd1ce2
fixed folder name (cannot be taken from job name)
Aug 24, 2025
c351519
compiled model must be named compiled_model.c otherwise gcc fails
Aug 25, 2025
8e0cc5c
cleaned-up (commenting for now) parts moved to scikinC
Aug 25, 2025
242a886
fixed SHARED_OBJECT env variable name
Aug 25, 2025
f024e21
shortened training for gha testing purpose
Aug 25, 2025
fdd53b5
attempt parallel processing of training
Aug 25, 2025
7ed055a
fixed report for training, fixed validation
Aug 25, 2025
52cd87b
rolled-back to single-job pipeline for comparison
Aug 25, 2025
2fb8e67
try to go parallel
Aug 25, 2025
586bd05
training two jobs in parallel takes twice the time
Aug 25, 2025
d1bfb74
test zip report
Aug 25, 2025
a8fd8f4
test at full steam
Aug 25, 2025
9bce8d2
a super-short run just creating the reports in zip format rather than…
Aug 25, 2025
8acbd22
full prod
Aug 25, 2025
274422f
quick test on releasing
Aug 26, 2025
17ab5b6
full prod
Aug 26, 2025
061c63d
added custom model prefix (was trk) and re-added validation
Aug 26, 2025
280dcbe
reduced training time
Aug 26, 2025
4218407
restored quantile transformer for rich preprocessing
Aug 27, 2025
fd863d5
shrinked ProbNN preprocessing
Aug 27, 2025
72a4f63
fix a stupid typo breaking ProbNN preprocessing deployment
Aug 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Train models

on:
pull_request:
branches:
- main

concurrency:
group: pull_request-${{ github.head_ref }}
cancel-in-progress: true


jobs:
pid-2016MagUp-Sim10b:
uses: LamarrSim/release-models/.github/workflows/release.yaml@main
permissions:
contents: write
pull-requests: write
with:
name: pp-2016-MU-Sim10b-${{ github.head_ref }}
snakemake_dir: pidgan
target: cache_container validate_all
model_prefix: pid
additional_config: |
storage_folder: pid-2016MagUp-Sim10b

additional_profile: |
jobs: 1

generated_model_path: /tmp/compiled_model.c
secrets:
SECRETS_YAML: ${{ secrets.STORAGE_SECRETS }}

5 changes: 2 additions & 3 deletions pidgan/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ apptainer_cached_image: /tmp/lb-pidsim-train.sif
sample: 2016MU
storage_folder: tests
temp_dir: /tmp
training_time_limit_seconds: 60 #0
entries_for_validation: 10000 #00
training_time_limit_seconds: 600
entries_for_validation: 1000000

nbconvert_args:
- --allow-errors
Expand All @@ -19,7 +19,6 @@ training_data:
- training-data/2016MU-sim10b/LamarrTraining-j109.10.root
- training-data/2016MU-sim10b/LamarrTraining-j109.100.root
- training-data/2016MU-sim10b/LamarrTraining-j109.101.root
other:

- training-data/2016MU-sim10b/LamarrTraining-j109.102.root
- training-data/2016MU-sim10b/LamarrTraining-j109.103.root
Expand Down
38 changes: 20 additions & 18 deletions pidgan/workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@ PARTICLES = ['muon', 'pion', 'kaon', 'proton']
ITEMS = ['model.keras', 'tX.pkl', 'tY.pkl']


rule build_container:
rule validate_all:
input: smkstore(expand("validate-{particle}.html", particle=PARTICLES), 's3reports')


rule build_apptainer_image:
input:
definition_file="workflow/envs/pidgan.def"
definition_file=ancient("workflow/envs/pidgan.def")

output:
sif=smkstore("lhcb-pidsim.sif", 's3images')
Expand All @@ -38,7 +42,7 @@ rule build_container:


rule cache_container:
input: ancient(smkstore("lhcb-pidsim.sif", 's3images'))
input: smkstore("lhcb-pidsim.sif", 's3images')
output: config['apptainer_cached_image']
shell: "cp {input} {output}"

Expand All @@ -59,7 +63,7 @@ rule preprocessing:
report=report(
smkstore("{model}-{particle}-preprocessing.html", 's3reports'),
category="Preprocessing",
labels=dict(model="{model}", particle="{particle}")
labels=dict(model="{model}", particle="{particle}", type='html')
)

log: smkstore("{model}-{particle}-preprocessing.html", 's3reports')
Expand Down Expand Up @@ -120,16 +124,16 @@ rule train:
history_csv=report(
smkstore("history/generator-{model}-{particle}.csv", 's3reports'),
category="Training",
labels=dict(model="{model}", particle="{particle}")
labels=dict(model="{model}", particle="{particle}", type='csv')
),

output_validation_set=smkstore("ready2validate/{model}-{particle}.npz", 's3reports'),
output_model=smkstore("{model}/{particle}/model.keras", "s3models"),

report=report(
smkstore("{model}-{particle}-train.html", 's3reports'),
category="Preprocessing",
labels=dict(model="{model}", particle="{particle}")
category="Training",
labels=dict(model="{model}", particle="{particle}", type='html')
)

log: smkstore("{model}-{particle}-train.html", 's3reports')
Expand All @@ -138,8 +142,8 @@ rule train:


resources:
cpu=8,
mem_mb=32000,
cpu=4,
mem_mb=15000,
gpu=1

params:
Expand Down Expand Up @@ -193,12 +197,12 @@ rule deploy:

output:
hook=touch("/tmp/deploy.touch"),
generated_c_file="/tmp/compiled_pid_model.c",
generated_c_file="/tmp/compiled_model.c",
generated_library=smkstore("pid_compiled_model/generated.so", 's3models'),
report=report(
smkstore("deploy.html", 's3reports'),
category="Deploy",
labels=dict(model="ALL", particle="ALL")
labels=dict(model="ALL", particle="ALL", type='html')
)

params:
Expand Down Expand Up @@ -243,8 +247,8 @@ rule validate:

report=report(
smkstore("validate-{particle}.html", 's3reports'),
category="Preprocessing",
labels=dict(model="ALL", particle="{particle}")
category="Validation",
labels=dict(model="ALL", particle="{particle}", type="html")
)

log: smkstore("validate-{particle}.html", 's3reports')
Expand All @@ -259,15 +263,15 @@ rule validate:
max_entries=config['entries_for_validation'],

resources:
cpu=8,
mem_mb=32000,
cpu=4,
mem_mb=15000,
gpu=0

shell:
"""
INPUT_FILES='{params.data_files}' \
PARTICLE='{wildcards.particle}' \
GENERATED_LIBRARY='{input.generated_library}' \
SHARED_OBJECT='{input.generated_library}' \
MAX_ENTRIES='{params.max_entries}' \
SAMPLE='{params.sample}' \
jupyter nbconvert --to html --execute \
Expand All @@ -278,7 +282,5 @@ rule validate:
{input.notebook} --output {log}
"""

rule validate_all:
input: smkstore(expand("validate-{particle}.html", particle=PARTICLES), 's3reports')


Loading
Loading