Skip to content

Commit affcccc

Browse files
committed
add conda and apptainer support
1 parent 71865db commit affcccc

File tree

9 files changed

+101
-112
lines changed

9 files changed

+101
-112
lines changed

.dockerignore

Lines changed: 0 additions & 41 deletions
This file was deleted.

snakemake_workflow/Makefile

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,44 @@
11
OUTPUT_DIR := /Users/poldrack/data_unsynced/BCBS/simple_workflow/wf_snakemake
2+
DOCKER_IMAGE := poldrack/simple-workflow
3+
DOCKER_TAG := latest
24

3-
.PHONY: run report graph
5+
.PHONY: run run-conda run-apptainer report graph clean docker-build docker-push
46

57
clean:
68
-rm -rf .snakemake
79
-rm -rf $(OUTPUT_DIR)/*
810

911
run:
10-
uv run snakemake --cores 1 --config output_dir=$(OUTPUT_DIR)
12+
uv run snakemake --cores 1 -d $(OUTPUT_DIR)
1113

1214
run-conda:
13-
uv run snakemake --cores 1 --sdm conda --config output_dir=$(OUTPUT_DIR)
15+
uv run snakemake --cores 1 --sdm conda -d $(OUTPUT_DIR)
16+
17+
run-apptainer:
18+
uv run snakemake --cores 1 --sdm apptainer -d $(OUTPUT_DIR)
1419

1520
lint:
16-
snakemake --sdm conda --lint --cores 1 --config output_dir=$(OUTPUT_DIR)
21+
snakemake --lint --cores 1 -d $(OUTPUT_DIR)
1722

1823
dryrun:
19-
snakemake --dry-run --cores 1 --config output_dir=$(OUTPUT_DIR)
24+
snakemake --dry-run --cores 1 -d $(OUTPUT_DIR)
25+
26+
dryrun-apptainer:
27+
snakemake --dry-run --cores 1 --sdm apptainer -d $(OUTPUT_DIR)
2028

2129
report:
22-
snakemake --report $(OUTPUT_DIR)/report.html --config output_dir=$(OUTPUT_DIR)
30+
snakemake --report $(OUTPUT_DIR)/report.html -d $(OUTPUT_DIR)
2331

2432
graph:
25-
snakemake --rulegraph --config output_dir=$(OUTPUT_DIR) --cores 2 | dot -Tpng -Gdpi=300 > output/rulegraph.png
33+
snakemake --rulegraph -d $(OUTPUT_DIR) --cores 2 | dot -Tpng -Gdpi=300 > $(OUTPUT_DIR)/rulegraph.png
2634

2735
export-env:
2836
-mkdir envs
2937
conda env export -n bettercode > envs/bettercode.yml
38+
39+
# Container image management
40+
docker-build:
41+
docker build -t $(DOCKER_IMAGE):$(DOCKER_TAG) .
42+
43+
docker-push:
44+
docker push $(DOCKER_IMAGE):$(DOCKER_TAG)

snakemake_workflow/Snakefile

Lines changed: 55 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -9,147 +9,147 @@ This workflow demonstrates Snakemake features with a simple pandas-based analysi
99
1010
Usage:
1111
# Run full workflow
12-
snakemake --cores 1 --config output_dir=/path/to/output
12+
snakemake --cores 1 -d /path/to/output
13+
14+
# Run with container (requires apptainer/singularity)
15+
snakemake --cores 1 --sdm apptainer -d /path/to/output
1316
1417
# Dry run
15-
snakemake -n --config output_dir=/path/to/output
18+
snakemake -n -d /path/to/output
1619
1720
# Generate report
18-
snakemake --report report.html --config output_dir=/path/to/output
21+
snakemake --report report.html -d /path/to/output
1922
"""
2023

21-
from pathlib import Path
22-
2324
from snakemake.utils import min_version
2425

2526
min_version("8.0")
2627

2728

29+
# Base directory (where Snakefile is located)
30+
BASEDIR = workflow.basedir
31+
32+
2833
# Load configuration
29-
configfile: "config/config.yaml"
34+
configfile: f"{BASEDIR}/config/config.yaml"
3035

3136

3237
# Global report
33-
report: "report/workflow.rst"
34-
38+
report: f"{BASEDIR}/report/workflow.rst"
3539

36-
# Validate required config
37-
if config.get("output_dir") is None:
38-
raise ValueError("output_dir must be provided via --config output_dir=/path/to/output")
3940

40-
OUTPUT_DIR = Path(config["output_dir"])
41-
DATA_DIR = OUTPUT_DIR / "data"
42-
RESULTS_DIR = OUTPUT_DIR / "results"
43-
FIGURES_DIR = OUTPUT_DIR / "figures"
44-
LOGS_DIR = OUTPUT_DIR / "logs"
41+
# Container image for all rules (used with --sdm apptainer)
42+
container: config["container"]
4543

4644

47-
# Create output directories at workflow start
48-
onstart:
49-
shell(f"mkdir -p {DATA_DIR} {RESULTS_DIR} {FIGURES_DIR} {LOGS_DIR}")
45+
# Output directories (relative to working directory set via -d)
46+
DATA_DIR = "data"
47+
RESULTS_DIR = "results"
48+
FIGURES_DIR = "figures"
49+
LOGS_DIR = "logs"
5050

5151

5252
# Default target
5353
rule all:
5454
input:
55-
FIGURES_DIR / "correlation_heatmap.png",
55+
f"{FIGURES_DIR}/correlation_heatmap.png",
5656

5757

5858
# Step 1a: Download meaningful variables data
5959
rule download_meaningful_variables:
6060
output:
61-
DATA_DIR / "meaningful_variables.csv",
61+
f"{DATA_DIR}/meaningful_variables.csv",
6262
params:
6363
url=config["meaningful_variables_url"],
6464
log:
65-
OUTPUT_DIR / "logs" / "download_meaningful_variables.log",
65+
f"{LOGS_DIR}/download_meaningful_variables.log",
6666
conda:
67-
"envs/simple.yml"
67+
f"{BASEDIR}/envs/simple.yml"
6868
script:
69-
"scripts/download_data.py"
69+
f"{BASEDIR}/scripts/download_data.py"
7070

7171

7272
# Step 1b: Download demographics data
7373
rule download_demographics:
7474
output:
75-
DATA_DIR / "demographics.csv",
75+
f"{DATA_DIR}/demographics.csv",
7676
params:
7777
url=config["demographics_url"],
7878
log:
79-
OUTPUT_DIR / "logs" / "download_demographics.log",
79+
f"{LOGS_DIR}/download_demographics.log",
8080
conda:
81-
"envs/simple.yml"
81+
f"{BASEDIR}/envs/simple.yml"
8282
script:
83-
"scripts/download_data.py"
83+
f"{BASEDIR}/scripts/download_data.py"
8484

8585

8686
# Step 2a: Filter meaningful variables to numerical columns
8787
rule filter_meaningful_variables:
8888
input:
89-
DATA_DIR / "meaningful_variables.csv",
89+
f"{DATA_DIR}/meaningful_variables.csv",
9090
output:
91-
DATA_DIR / "meaningful_variables_numerical.csv",
91+
f"{DATA_DIR}/meaningful_variables_numerical.csv",
9292
log:
93-
OUTPUT_DIR / "logs" / "filter_meaningful_variables.log",
93+
f"{LOGS_DIR}/filter_meaningful_variables.log",
9494
conda:
95-
"envs/simple.yml"
95+
f"{BASEDIR}/envs/simple.yml"
9696
script:
97-
"scripts/filter_data.py"
97+
f"{BASEDIR}/scripts/filter_data.py"
9898

9999

100100
# Step 2b: Filter demographics to numerical columns
101101
rule filter_demographics:
102102
input:
103-
DATA_DIR / "demographics.csv",
103+
f"{DATA_DIR}/demographics.csv",
104104
output:
105-
DATA_DIR / "demographics_numerical.csv",
105+
f"{DATA_DIR}/demographics_numerical.csv",
106106
log:
107-
OUTPUT_DIR / "logs" / "filter_demographics.log",
107+
f"{LOGS_DIR}/filter_demographics.log",
108108
conda:
109-
"envs/simple.yml"
109+
f"{BASEDIR}/envs/simple.yml"
110110
script:
111-
"scripts/filter_data.py"
111+
f"{BASEDIR}/scripts/filter_data.py"
112112

113113

114114
# Step 3: Join the two datasets
115115
rule join_datasets:
116116
input:
117-
meaningful_vars=DATA_DIR / "meaningful_variables_numerical.csv",
118-
demographics=DATA_DIR / "demographics_numerical.csv",
117+
meaningful_vars=f"{DATA_DIR}/meaningful_variables_numerical.csv",
118+
demographics=f"{DATA_DIR}/demographics_numerical.csv",
119119
output:
120-
DATA_DIR / "joined_data.csv",
120+
f"{DATA_DIR}/joined_data.csv",
121121
log:
122-
OUTPUT_DIR / "logs" / "join_datasets.log",
122+
f"{LOGS_DIR}/join_datasets.log",
123123
conda:
124-
"envs/simple.yml"
124+
f"{BASEDIR}/envs/simple.yml"
125125
script:
126-
"scripts/join_data.py"
126+
f"{BASEDIR}/scripts/join_data.py"
127127

128128

129129
# Step 4: Compute correlation matrix
130130
rule compute_correlation:
131131
input:
132-
DATA_DIR / "joined_data.csv",
132+
f"{DATA_DIR}/joined_data.csv",
133133
output:
134-
RESULTS_DIR / "correlation_matrix.csv",
134+
f"{RESULTS_DIR}/correlation_matrix.csv",
135135
params:
136136
method=config["correlation_method"],
137137
log:
138-
OUTPUT_DIR / "logs" / "compute_correlation.log",
138+
f"{LOGS_DIR}/compute_correlation.log",
139139
conda:
140-
"envs/simple.yml"
140+
f"{BASEDIR}/envs/simple.yml"
141141
script:
142-
"scripts/compute_correlation.py"
142+
f"{BASEDIR}/scripts/compute_correlation.py"
143143

144144

145145
# Step 5: Generate clustered heatmap
146146
rule generate_heatmap:
147147
input:
148-
RESULTS_DIR / "correlation_matrix.csv",
148+
f"{RESULTS_DIR}/correlation_matrix.csv",
149149
output:
150150
report(
151-
FIGURES_DIR / "correlation_heatmap.png",
152-
caption="report/heatmap.rst",
151+
f"{FIGURES_DIR}/correlation_heatmap.png",
152+
caption=f"{BASEDIR}/report/heatmap.rst",
153153
category="Results",
154154
),
155155
params:
@@ -158,8 +158,8 @@ rule generate_heatmap:
158158
vmin=config["heatmap"]["vmin"],
159159
vmax=config["heatmap"]["vmax"],
160160
log:
161-
OUTPUT_DIR / "logs" / "generate_heatmap.log",
161+
f"{LOGS_DIR}/generate_heatmap.log",
162162
conda:
163-
"envs/simple.yml"
163+
f"{BASEDIR}/envs/simple.yml"
164164
script:
165-
"scripts/generate_heatmap.py"
165+
f"{BASEDIR}/scripts/generate_heatmap.py"

snakemake_workflow/config/config.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Configuration for the simple correlation Snakemake workflow
22

3+
# Container image (used with --sdm apptainer)
4+
container: "docker://poldrack/simple-workflow:latest"
5+
36
# Data URLs
47
meaningful_variables_url: "https://raw.githubusercontent.com/IanEisenberg/Self_Regulation_Ontology/refs/heads/master/Data/Complete_02-16-2019/meaningful_variables_clean.csv"
58
demographics_url: "https://raw.githubusercontent.com/IanEisenberg/Self_Regulation_Ontology/refs/heads/master/Data/Complete_02-16-2019/demographics.csv"

snakemake_workflow/envs/simple.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
name: bettercode
1+
name: simple_workflow
22
channels:
33
- conda-forge
44
dependencies:
5-
- numpy=2.4.0
6-
- pandas=2.3.3
7-
- matplotlib==3.10.8
8-
- seaborn==0.13.2
5+
- python>=3.10
6+
- pandas
7+
- matplotlib
8+
- seaborn

snakemake_workflow/scripts/compute_correlation.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""Snakemake script for computing correlation matrix."""
22

3+
import sys
34
from pathlib import Path
45

56
import pandas as pd
67

7-
from bettercode.simple_workflow import compute_correlation_matrix
8+
# Add workflow directory to path for local simple_workflow module
9+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
10+
from simple_workflow import compute_correlation_matrix
811

912
def main():
1013
"""Compute Spearman correlation matrix."""

snakemake_workflow/scripts/filter_data.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""Snakemake script for filtering data to numerical columns."""
22

3+
import sys
34
from pathlib import Path
45

56
import pandas as pd
67

7-
from bettercode.simple_workflow import filter_numerical_columns
8+
# Add workflow directory to path for local simple_workflow module
9+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
10+
from simple_workflow import filter_numerical_columns
811

912
def main():
1013
"""Filter data to numerical columns."""

snakemake_workflow/scripts/generate_heatmap.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
"""Snakemake script for generating clustered heatmap."""
22

3+
import sys
34
from pathlib import Path
45

56
import matplotlib.pyplot as plt
67
import pandas as pd
78
import seaborn as sns
89

9-
from bettercode.simple_workflow import generate_clustered_heatmap
10+
# Add workflow directory to path for local simple_workflow module
11+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
12+
from simple_workflow import generate_clustered_heatmap
1013

1114

1215
def main():

snakemake_workflow/scripts/join_data.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""Snakemake script for joining two dataframes."""
22

3+
import sys
34
from pathlib import Path
45

56
import pandas as pd
67

7-
from bettercode.simple_workflow import join_dataframes
8+
# Add workflow directory to path for local simple_workflow module
9+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
10+
from simple_workflow import join_dataframes
811

912
def main():
1013
"""Join the two datasets."""

0 commit comments

Comments
 (0)