-
Notifications
You must be signed in to change notification settings - Fork 44
/
Snakefile
130 lines (102 loc) · 3.49 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# Copyright 2018 Johannes Köster.
# Licensed under the MIT license (http://opensource.org/licenses/MIT)
# This file may not be copied, modified, or distributed
# except according to those terms.
from snakemake.utils import validate
import pandas as pd
########## load config an cell sheet ############
configfile: "config.yaml"
validate(config, schema="schemas/config.schema.yaml")
cells = pd.read_csv(config["cells"], sep="\t").set_index("id", drop=False)
validate(cells, schema="schemas/cells.schema.yaml")
markers = None
if "markers" in config.get("celltype", {}):
markers = pd.read_csv(config["celltype"]["markers"], sep="\t").set_index(
"name", drop=False
)
markers.loc[:, "parent"].fillna("root", inplace=True)
targets_qc = [
"plots/library-size.pdf",
"plots/expressed-genes.pdf",
"plots/mito-proportion.pdf",
"plots/spike-proportion.pdf",
"plots/explained-variance.pdf",
]
seeds = [23213, 789789, 897354]
######## target rules ##############
rule all:
input:
targets_qc,
"plots/hvg-expr-dists.pdf",
"plots/mean-vs-variance.pdf",
"tables/hvg.tsv",
"tables/hvg-correlations.tsv",
"plots/hvg-clusters.pdf",
"plots/hvg-corr-heatmap.pdf",
expand("plots/cycle-scores.{covariate}.pdf", covariate=cells.columns[1:]),
expand("plots/hvg-pca.{covariate}.pdf", covariate=cells.columns[1:]),
expand(
"plots/hvg-tsne.{covariate}.seed={seed}.pdf",
covariate=cells.columns[1:],
seed=seeds,
),
expand("plots/cellassign.{parent}.pdf", parent=markers["parent"].unique()),
expand(
"plots/celltype-tsne.{parent}.seed={seed}.pdf",
seed=seeds,
parent=markers["parent"].unique(),
),
expand(
"plots/gene-tsne/{gene}.tsne.seed={seed}.pdf",
seed=seeds,
gene=config["celltype"]["expression-plot-genes"],
),
expand(
[
"tables/diffexp.{test}.tsv",
"plots/diffexp.{test}.bcv.pdf",
"plots/diffexp.{test}.md.pdf",
"plots/diffexp.{test}.disp.pdf",
],
test=config["diffexp"],
),
[
expand(
"plots/expression/{gene}.{test}.expression.pdf",
test=name,
gene=test["genes_of_interest"],
)
for name, test in config["diffexp"].items()
],
expand(
"plots/celltype-expressions.{parent}.pdf", parent=markers["parent"].unique()
),
[
expand(
"plots/gene-vs-gene/{x}-vs-{y}.{settings}.expressions.pdf",
x=entry["pairs"]["x"],
y=y,
settings=settings,
)
for settings, entry in config["gene-vs-gene-plots"].items()
for y in entry["pairs"]["y"]
],
rule all_qc:
input:
targets_qc,
##### setup containers #####
# this container image defines the underlying OS for each job when using the workflow
# with --use-conda --use-singularity
container: "docker://continuumio/miniconda3"
##### setup report #####
report: "report/workflow.rst"
##### load rules #####
include: "rules/common.smk"
include: "rules/counts.smk"
include: "rules/qc.smk"
include: "rules/filtration.smk"
include: "rules/cell-cycle.smk"
include: "rules/normalization.smk"
include: "rules/variance.smk"
include: "rules/cell-type.smk"
include: "rules/diffexp.smk"