@@ -9,147 +9,147 @@ This workflow demonstrates Snakemake features with a simple pandas-based analysi
99
1010Usage:
1111 # Run full workflow
12- snakemake --cores 1 --config output_dir=/path/to/output
12+ snakemake --cores 1 -d /path/to/output
13+
14+ # Run with container (requires apptainer/singularity)
15+ snakemake --cores 1 --sdm apptainer -d /path/to/output
1316
1417 # Dry run
15- snakemake -n --config output_dir= /path/to/output
18+ snakemake -n -d /path/to/output
1619
1720 # Generate report
18- snakemake --report report.html --config output_dir= /path/to/output
21+ snakemake --report report.html -d /path/to/output
1922"""
2023
21- from pathlib import Path
22-
2324from snakemake .utils import min_version
2425
2526min_version ("8.0" )
2627
2728
29+ # Base directory (where Snakefile is located)
30+ BASEDIR = workflow .basedir
31+
32+
2833# Load configuration
29- configfile : " config/config.yaml"
34+ configfile : f" { BASEDIR } / config/config.yaml"
3035
3136
3237# Global report
33- report : "report/workflow.rst"
34-
38+ report : f"{ BASEDIR } /report/workflow.rst"
3539
36- # Validate required config
37- if config .get ("output_dir" ) is None :
38- raise ValueError ("output_dir must be provided via --config output_dir=/path/to/output" )
3940
40- OUTPUT_DIR = Path (config ["output_dir" ])
41- DATA_DIR = OUTPUT_DIR / "data"
42- RESULTS_DIR = OUTPUT_DIR / "results"
43- FIGURES_DIR = OUTPUT_DIR / "figures"
44- LOGS_DIR = OUTPUT_DIR / "logs"
41+ # Container image for all rules (used with --sdm apptainer)
42+ container : config ["container" ]
4543
4644
47- # Create output directories at workflow start
48- onstart :
49- shell (f"mkdir -p { DATA_DIR } { RESULTS_DIR } { FIGURES_DIR } { LOGS_DIR } " )
45+ # Output directories (relative to working directory set via -d)
46+ DATA_DIR = "data"
47+ RESULTS_DIR = "results"
48+ FIGURES_DIR = "figures"
49+ LOGS_DIR = "logs"
5050
5151
5252# Default target
5353rule all :
5454 input :
55- FIGURES_DIR / " correlation_heatmap.png" ,
55+ f" { FIGURES_DIR } / correlation_heatmap.png" ,
5656
5757
5858# Step 1a: Download meaningful variables data
5959rule download_meaningful_variables :
6060 output :
61- DATA_DIR / " meaningful_variables.csv" ,
61+ f" { DATA_DIR } / meaningful_variables.csv" ,
6262 params :
6363 url = config ["meaningful_variables_url" ],
6464 log :
65- OUTPUT_DIR / "logs" / " download_meaningful_variables.log" ,
65+ f" { LOGS_DIR } / download_meaningful_variables.log" ,
6666 conda :
67- " envs/simple.yml"
67+ f" { BASEDIR } / envs/simple.yml"
6868 script :
69- " scripts/download_data.py"
69+ f" { BASEDIR } / scripts/download_data.py"
7070
7171
7272# Step 1b: Download demographics data
7373rule download_demographics :
7474 output :
75- DATA_DIR / " demographics.csv" ,
75+ f" { DATA_DIR } / demographics.csv" ,
7676 params :
7777 url = config ["demographics_url" ],
7878 log :
79- OUTPUT_DIR / "logs" / " download_demographics.log" ,
79+ f" { LOGS_DIR } / download_demographics.log" ,
8080 conda :
81- " envs/simple.yml"
81+ f" { BASEDIR } / envs/simple.yml"
8282 script :
83- " scripts/download_data.py"
83+ f" { BASEDIR } / scripts/download_data.py"
8484
8585
8686# Step 2a: Filter meaningful variables to numerical columns
8787rule filter_meaningful_variables :
8888 input :
89- DATA_DIR / " meaningful_variables.csv" ,
89+ f" { DATA_DIR } / meaningful_variables.csv" ,
9090 output :
91- DATA_DIR / " meaningful_variables_numerical.csv" ,
91+ f" { DATA_DIR } / meaningful_variables_numerical.csv" ,
9292 log :
93- OUTPUT_DIR / "logs" / " filter_meaningful_variables.log" ,
93+ f" { LOGS_DIR } / filter_meaningful_variables.log" ,
9494 conda :
95- " envs/simple.yml"
95+ f" { BASEDIR } / envs/simple.yml"
9696 script :
97- " scripts/filter_data.py"
97+ f" { BASEDIR } / scripts/filter_data.py"
9898
9999
100100# Step 2b: Filter demographics to numerical columns
101101rule filter_demographics :
102102 input :
103- DATA_DIR / " demographics.csv" ,
103+ f" { DATA_DIR } / demographics.csv" ,
104104 output :
105- DATA_DIR / " demographics_numerical.csv" ,
105+ f" { DATA_DIR } / demographics_numerical.csv" ,
106106 log :
107- OUTPUT_DIR / "logs" / " filter_demographics.log" ,
107+ f" { LOGS_DIR } / filter_demographics.log" ,
108108 conda :
109- " envs/simple.yml"
109+ f" { BASEDIR } / envs/simple.yml"
110110 script :
111- " scripts/filter_data.py"
111+ f" { BASEDIR } / scripts/filter_data.py"
112112
113113
114114# Step 3: Join the two datasets
115115rule join_datasets :
116116 input :
117- meaningful_vars = DATA_DIR / " meaningful_variables_numerical.csv" ,
118- demographics = DATA_DIR / " demographics_numerical.csv" ,
117+ meaningful_vars = f" { DATA_DIR } / meaningful_variables_numerical.csv" ,
118+ demographics = f" { DATA_DIR } / demographics_numerical.csv" ,
119119 output :
120- DATA_DIR / " joined_data.csv" ,
120+ f" { DATA_DIR } / joined_data.csv" ,
121121 log :
122- OUTPUT_DIR / "logs" / " join_datasets.log" ,
122+ f" { LOGS_DIR } / join_datasets.log" ,
123123 conda :
124- " envs/simple.yml"
124+ f" { BASEDIR } / envs/simple.yml"
125125 script :
126- " scripts/join_data.py"
126+ f" { BASEDIR } / scripts/join_data.py"
127127
128128
129129# Step 4: Compute correlation matrix
130130rule compute_correlation :
131131 input :
132- DATA_DIR / " joined_data.csv" ,
132+ f" { DATA_DIR } / joined_data.csv" ,
133133 output :
134- RESULTS_DIR / " correlation_matrix.csv" ,
134+ f" { RESULTS_DIR } / correlation_matrix.csv" ,
135135 params :
136136 method = config ["correlation_method" ],
137137 log :
138- OUTPUT_DIR / "logs" / " compute_correlation.log" ,
138+ f" { LOGS_DIR } / compute_correlation.log" ,
139139 conda :
140- " envs/simple.yml"
140+ f" { BASEDIR } / envs/simple.yml"
141141 script :
142- " scripts/compute_correlation.py"
142+ f" { BASEDIR } / scripts/compute_correlation.py"
143143
144144
145145# Step 5: Generate clustered heatmap
146146rule generate_heatmap :
147147 input :
148- RESULTS_DIR / " correlation_matrix.csv" ,
148+ f" { RESULTS_DIR } / correlation_matrix.csv" ,
149149 output :
150150 report (
151- FIGURES_DIR / " correlation_heatmap.png" ,
152- caption = " report/heatmap.rst" ,
151+ f" { FIGURES_DIR } / correlation_heatmap.png" ,
152+ caption = f" { BASEDIR } / report/heatmap.rst" ,
153153 category = "Results" ,
154154 ),
155155 params :
@@ -158,8 +158,8 @@ rule generate_heatmap:
158158 vmin = config ["heatmap" ]["vmin" ],
159159 vmax = config ["heatmap" ]["vmax" ],
160160 log :
161- OUTPUT_DIR / "logs" / " generate_heatmap.log" ,
161+ f" { LOGS_DIR } / generate_heatmap.log" ,
162162 conda :
163- " envs/simple.yml"
163+ f" { BASEDIR } / envs/simple.yml"
164164 script :
165- " scripts/generate_heatmap.py"
165+ f" { BASEDIR } / scripts/generate_heatmap.py"
0 commit comments