Skip to content

Commit

Permalink
unify all path, relative to data_config.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
y9c committed Dec 30, 2022
1 parent 909c11a commit 1a7e094
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 59 deletions.
26 changes: 18 additions & 8 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,38 @@ if sys.version_info < (3, 6):
min_version("7.0")


WORKDIR = config.get("workdir", "workspace")
WORKDIR = os.path.relpath(
config.get("workdir", "workspace"), os.path.dirname(workflow.configfiles[-1])
)
TEMPDIR = os.path.relpath(config.get("tempdir", os.path.join(WORKDIR, ".tmp")), WORKDIR)


workdir: WORKDIR


REF = config["reference"]
for k, v in REF.items():
for k2, v2 in v.items():
REF[k][k2] = os.path.relpath(os.path.expanduser(v2), WORKDIR)
REFTYPE = ["genes", "genome"]
GROUP2SAMPLE = defaultdict(lambda: defaultdict(list))
SAMPLE_IDS = []
SAMPLE2RUN = defaultdict(list)
RUN2DATA = {}
SAMPLE2RUN = defaultdict(dict)
SAMPLE2BAM = defaultdict(dict)
for s, v2 in config[f"samples"].items():
for s, v2 in config["samples"].items():
SAMPLE_IDS.append(s)
if v2.get("treated", True):
GROUP2SAMPLE[v2["group"]]["treated"].append(s)
else:
GROUP2SAMPLE[v2["group"]]["input"].append(s)
for i, v3 in enumerate(v2.get("data", []), 1):
r = f"{s}_run{i}"
SAMPLE2RUN[s].append(r)
RUN2DATA[r] = {k4: os.path.expanduser(v4) for k4, v4 in v3.items()}
SAMPLE2RUN[s][r] = {
k4: os.path.relpath(os.path.expanduser(v4), WORKDIR)
for k4, v4 in v3.items()
}
for k, v3 in v2.get("bam", {}).items():
SAMPLE2BAM[s][k] = os.path.expanduser(v3)
SAMPLE2BAM[s][k] = os.path.relpath(os.path.expanduser(v3), WORKDIR)


rule all:
Expand All @@ -45,7 +51,11 @@ rule all:

rule join_pairend_reads:
input:
lambda wildcards: RUN2DATA[wildcards.rn].values(),
lambda wildcards: [
r
for rn_dict in SAMPLE2RUN.values()
for r in rn_dict.get(wildcards.rn, {}).values()
],
output:
temp(os.path.join(TEMPDIR, "merged_reads/{rn}.fq.gz")),
params:
Expand Down
24 changes: 12 additions & 12 deletions test/data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@ keep_internal: false
# # only analysis putative sites that show pass prefilter in >= x groups
# min_group_num: 1

# NOTE: relative path of reference and sample data should relative to the workdir
# NOTE: relative path of reference and sample data should relative to current config file, rather than workdir
reference:
# Optional
# skip contamination removal if this section is not provided
contamination:
fa: ../ref/contamination.fa
bt2: ../ref/contamination
fa: ./ref/contamination.fa
bt2: ./ref/contamination
# Required
genes:
fa: ../ref/genes.fa
fai: ../ref/genes.fa.fai
bt2: ../ref/genes
fa: ./ref/genes.fa
fai: ./ref/genes.fa.fai
bt2: ./ref/genes
# Required
genome:
fa: /data/reference/genome/Mus_musculus/GRCm39.fa
Expand All @@ -46,31 +46,31 @@ samples:
# but it is a good habit to use letters and hyphen only, no space, underscore or other special symbol.
mESCWT-rep1-input:
data:
- R1: ../data/IP16_run1.fastq.gz
- R1: ./data/IP16_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep2-input:
data:
- R1: ../data/IP17_run1.fastq.gz
- R1: ./data/IP17_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep3-input:
data:
- R1: ../data/IP18_run1.fastq.gz
- R1: ./data/IP18_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep1-treated:
data:
- R1: ../data/IP4_run1.fastq.gz
- R1: ./data/IP4_run1.fastq.gz
group: mESCWT
treated: true
mESCWT-rep2-treated:
data:
- R1: ../data/IP5_run1.fastq.gz
- R1: ./data/IP5_run1.fastq.gz
group: mESCWT
treated: true
mESCWT-rep3-treated:
data:
- R1: ../data/IP6_run1.fastq.gz
- R1: ./data/IP6_run1.fastq.gz
group: mESCWT
treated: true
60 changes: 60 additions & 0 deletions test/data_from_bam.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Directory for all output results and analysis reports
workdir: ./workspace_from_bam

# NOTE: relative path of reference and sample data should relative to current config file, rather than workdir
reference:
# Optional
# skip contamination removal if this section is not provided
contamination:
fa: ./ref/contamination.fa
bt2: ./ref/contamination
# Required
genes:
fa: ./ref/genes.fa
fai: ./ref/genes.fa.fai
bt2: ./ref/genes
# Required
genome:
fa: /data/reference/genome/Mus_musculus/GRCm39.fa
fai: /data/reference/genome/Mus_musculus/GRCm39.fa.fai
star: /data/reference/genome/Mus_musculus/star/GRCm39.release108

samples:
# sample name can be any string,
# but it is a good habit to use letters and hyphen only, no space, underscore or other special symbol.
mESCWT-rep1-input:
bam:
genes: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep1-input_genes.bam
genome: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep1-input_genome.bam
group: mESCWT
treated: false
mESCWT-rep2-input:
bam:
genes: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep2-input_genes.bam
genome: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep2-input_genome.bam
group: mESCWT
treated: false
mESCWT-rep3-input:
bam:
genes: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep3-input_genes.bam
genome: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep3-input_genome.bam
group: mESCWT
treated: false
mESCWT-rep1-treated:
bam:
genes: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep1-treated_genes.bam
genome: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep1-treated_genome.bam
group: mESCWT
treated: true
mESCWT-rep2-treated:
bam:
genes: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep2-treated_genes.bam
genome: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep2-treated_genome.bam
group: mESCWT
treated: true
mESCWT-rep3-treated:
bam:
genes: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep2-treated_genes.bam
genome: ./workspace_mouse_demo/drop_duplicates/mESCWT-rep2-treated_genome.bam
group: mESCWT
treated: true
24 changes: 12 additions & 12 deletions test/data_multi_groups.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Directory for all output results and analysis reports
workdir: ./workspace_multi_groups

# NOTE: relative path of reference and sample data should relative to the workdir
# NOTE: relative path of reference and sample data should relative to current config file, rather than workdir
reference:
contamination:
fa: ../ref/contamination.fa
bt2: ../ref/contamination
fa: ./ref/contamination.fa
bt2: ./ref/contamination
genes:
fa: ../ref/genes.fa
fai: ../ref/genes.fa.fai
bt2: ../ref/genes
fa: ./ref/genes.fa
fai: ./ref/genes.fa.fai
bt2: ./ref/genes
genome:
fa: /data/reference/genome/Mus_musculus/GRCm39.fa
fai: /data/reference/genome/Mus_musculus/GRCm39.fa.fai
Expand All @@ -18,31 +18,31 @@ reference:
samples:
mESCWT-rep1-input:
data:
- R1: ../data/IP16_run1.fastq.gz
- R1: ./data/IP16_run1.fastq.gz
group: mESCWT1
treated: false
mESCWT-rep2-input:
data:
- R1: ../data/IP17_run1.fastq.gz
- R1: ./data/IP17_run1.fastq.gz
group: mESCWT2
treated: false
mESCWT-rep3-input:
data:
- R1: ../data/IP18_run1.fastq.gz
- R1: ./data/IP18_run1.fastq.gz
group: mESCWT3
treated: false
mESCWT-rep1-treated:
data:
- R1: ../data/IP4_run1.fastq.gz
- R1: ./data/IP4_run1.fastq.gz
group: mESCWT1
treated: true
mESCWT-rep2-treated:
data:
- R1: ../data/IP5_run1.fastq.gz
- R1: ./data/IP5_run1.fastq.gz
group: mESCWT2
treated: true
mESCWT-rep3-treated:
data:
- R1: ../data/IP6_run1.fastq.gz
- R1: ./data/IP6_run1.fastq.gz
group: mESCWT3
treated: true
34 changes: 17 additions & 17 deletions test/data_multi_runs.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Directory for all output results and analysis reports
workdir: ./workspace_multi_runs

# NOTE: relative path of reference and sample data should relative to the workdir
# NOTE: relative path of reference and sample data should relative to current config file, rather than workdir
reference:
contamination:
fa: ../ref/contamination.fa
bt2: ../ref/contamination
fa: ./ref/contamination.fa
bt2: ./ref/contamination
genes:
fa: ../ref/genes.fa
fai: ../ref/genes.fa.fai
bt2: ../ref/genes
fa: ./ref/genes.fa
fai: ./ref/genes.fa.fai
bt2: ./ref/genes
genome:
fa: /data/reference/genome/Mus_musculus/GRCm39.fa
fai: /data/reference/genome/Mus_musculus/GRCm39.fa.fai
Expand All @@ -18,36 +18,36 @@ reference:
samples:
mESCWT-rep1-input:
data:
- R1: ../data/IP16_run1.fastq.gz
- R1: ./data/IP16_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep2-input:
data:
- R1: ../data/IP17_run1.fastq.gz
- R1: ../data/IP17_run2.fastq.gz
- R1: ./data/IP17_run1.fastq.gz
- R1: ./data/IP17_run2.fastq.gz
group: mESCWT
treated: false
mESCWT-rep3-input:
data:
- R1: ../data/IP18_run1.fastq.gz
- R1: ../data/IP18_run2.fastq.gz
- R1: ./data/IP18_run1.fastq.gz
- R1: ./data/IP18_run2.fastq.gz
group: mESCWT
treated: false
mESCWT-rep1-treated:
data:
- R1: ../data/IP4_run1.fastq.gz
- R1: ../data/IP4_run2.fastq.gz
- R1: ./data/IP4_run1.fastq.gz
- R1: ./data/IP4_run2.fastq.gz
group: mESCWT
treated: true
mESCWT-rep2-treated:
data:
- R1: ../data/IP5_run1.fastq.gz
- R1: ../data/IP5_run2.fastq.gz
- R1: ./data/IP5_run1.fastq.gz
- R1: ./data/IP5_run2.fastq.gz
group: mESCWT
treated: true
mESCWT-rep3-treated:
data:
- R1: ../data/IP6_run1.fastq.gz
- R1: ../data/IP6_run2.fastq.gz
- R1: ./data/IP6_run1.fastq.gz
- R1: ./data/IP6_run2.fastq.gz
group: mESCWT
treated: true
20 changes: 10 additions & 10 deletions test/data_without_contamination.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Directory for all output results and analysis reports
workdir: ./workspace_without_contamination

# NOTE: relative path of reference and sample data should relative to the workdir
# NOTE: relative path of reference and sample data should relative to current config file, rather than workdir
reference:
# skip contamination removal if this section is not provided
# REQUIRED
genes:
fa: ../ref/genes.fa
fai: ../ref/genes.fa.fai
bt2: ../ref/genes
fa: ./ref/genes.fa
fai: ./ref/genes.fa.fai
bt2: ./ref/genes
# REQUIRED
genome:
fa: /data/reference/genome/Mus_musculus/GRCm39.fa
Expand All @@ -18,31 +18,31 @@ reference:
samples:
mESCWT-rep1-input:
data:
- R1: ../data/IP16_run1.fastq.gz
- R1: ./data/IP16_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep2-input:
data:
- R1: ../data/IP17_run1.fastq.gz
- R1: ./data/IP17_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep3-input:
data:
- R1: ../data/IP18_run1.fastq.gz
- R1: ./data/IP18_run1.fastq.gz
group: mESCWT
treated: false
mESCWT-rep1-treated:
data:
- R1: ../data/IP4_run1.fastq.gz
- R1: ./data/IP4_run1.fastq.gz
group: mESCWT
treated: true
mESCWT-rep2-treated:
data:
- R1: ../data/IP5_run1.fastq.gz
- R1: ./data/IP5_run1.fastq.gz
group: mESCWT
treated: true
mESCWT-rep3-treated:
data:
- R1: ../data/IP6_run1.fastq.gz
- R1: ./data/IP6_run1.fastq.gz
group: mESCWT
treated: true

0 comments on commit 1a7e094

Please sign in to comment.