-
Notifications
You must be signed in to change notification settings - Fork 3
/
run_benchmark.py
241 lines (212 loc) · 8.59 KB
/
run_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#!/usr/bin/env python3
# -- coding: utf-8 --
'''
File: run_benchmark.py
Created Date: May 16th 2019
Author: ZL Deng <dawnmsg(at)gmail.com>
---------------------------------------
Last Modified: 29th July 2019 12:03:45 pm
'''
import os
import sys
import click
import functools
import snakemake
wd = os.path.dirname(os.path.realpath(__file__))
VERSION = '0.4.2'
# get the current directory
cd = os.getcwd()
class SpecialHelpOrder(click.Group):
def __init__(self, *args, **kwargs):
self.help_priorities = {}
super(SpecialHelpOrder, self).__init__(*args, **kwargs)
def get_help(self, ctx):
self.list_commands = self.list_commands_for_help
return super(SpecialHelpOrder, self).get_help(ctx)
def list_commands_for_help(self, ctx):
"""reorder the list of commands when listing the help"""
commands = super(SpecialHelpOrder, self).list_commands(ctx)
return (c[1] for c in sorted(
(self.help_priorities.get(command, 1), command)
for command in commands))
def command(self, *args, **kwargs):
"""Behaves the same as `click.Group.command()` except capture
a priority for listing command names in help.
"""
help_priority = kwargs.pop('help_priority', 1)
help_priorities = self.help_priorities
def decorator(f):
cmd = super(SpecialHelpOrder, self).command(*args, **kwargs)(f)
help_priorities[cmd.name] = help_priority
return cmd
return decorator
def print_version(ctx, param, value):
if not value or ctx.resilient_parsing:
return
click.echo("Version {}".format(VERSION))
ctx.exit()
@click.group(cls=SpecialHelpOrder)
@click.option("--version", is_flag=True, callback=print_version,
expose_value=False, is_eager=True, help="Print the version.")
def cli():
pass
def common_options(f):
options = [click.option("-d",
"--dryrun",
is_flag=True,
default=False,
show_default=True,
help="Print the details without run the pipeline."
),
click.option("-t",
"--threads",
type=int,
default=2,
show_default=True,
help="The number of threads to use."),
click.option("-c",
"--conda_prefix",
type=click.Path(exists=True),
default=None,
help="The prefix of conda ENV. [default: in the working directory]."),
click.option("-o",
"--outpath",
type=click.Path(),
default=None,
help="The directory where to put the results and figures. \
The path can be specified either in the CLI as argument or in the config file. \
[default: outpath defined in the config file]")
]
return functools.reduce(lambda x, opt: opt(x), options, f)
@cli.command(help_priority=1, help="Benchmarking for HCMV dataset")
@common_options
@click.option("-e",
"--evaluation",
required=True,
type=click.Choice(["all", "variantcall", "assembly"]),
help="The evaluation to run.")
@click.option("-s",
"--slow",
is_flag=True,
default=False,
show_default=True,
help="Run the evaluation based on reads, which is very slow. \
By default, the evaluation will be based on the VCF and contig \
files provided within this software. If this parameter is on, \
this software will run all the analyses to generate outputs \
based on reads for benchmarking which is very time consuming.")
def hcmv(evaluation, dryrun=False, conda_prefix=None, slow=False, **kwargs):
variantcall_smk = os.path.join(wd, "eval_variantcall.smk")
assembly_smk = os.path.join(wd, "eval_assembly.smk")
snake_kwargs = dict(runOnReads=slow)
for arg, val in kwargs.items():
if val != None:
if arg == 'outpath':
val = os.path.join(cd, val)
snake_kwargs[arg] = val
if evaluation == "variantcall":
snakes = [variantcall_smk]
elif evaluation == "assembly":
snakes = [assembly_smk]
else:
snakes = [variantcall_smk, assembly_smk]
for snake in snakes:
run_snake(snake, dryrun, conda_prefix, **snake_kwargs)
@cli.command(help_priority=2, help="Variants benchmark for customized dataset")
@common_options
@click.option("-v",
"--vcfs",
type=str,
help="Comma-separated list of VCF files. Please quote the whole \
parameter if there is any white space the file names. The files can be \
specified either in the CLI as argument or in the config file.")
@click.option("-l", "--labels", help="Comma-separated list of labels of VCF.", default=None)
@click.option("-r",
"--refs",
type=str,
help="Comma-separated list of reference genome files. Please \
quote the whole parameter if there is any white space the file names. \
(The files can be specified either in the CLI as argument or in the config file.)")
@click.option("--novenn", is_flag=True, help="Do not visualize the SNPs in Venn diagram")
def vareval(dryrun=False, conda_prefix=None, **kwargs):
variantcall_smk = os.path.join(wd, "eval_variant_custom.smk")
snake_kwargs = {}
for arg, val in kwargs.items():
if val != None:
if arg == 'refs' or arg == 'vcfs':
val = ','.join([os.path.join(cd, item.strip())
for item in val.split(',')])
elif arg == 'outpath':
val = os.path.join(cd, val)
else:
continue
snake_kwargs[arg] = val
run_snake(variantcall_smk, dryrun, conda_prefix, **snake_kwargs)
@cli.command(help_priority=3, help="Assembly benchmark for customized dataset")
@common_options
@click.option("-s",
"--scaffolds",
type=str,
help="Comma-separated list of scaffold files. Please quote the \
whole parameter if there is any white space the file names. \
The files can be specified either in the CLI as argument or in the config file.")
@click.option("-r",
"--refs",
type=str,
help="Comma-separated list of reference genome files. Please \
quote the whole parameter if there is any white space in the file names. \
(The files can be specified either in the CLI as argument or in the config file.)")
def asmeval(dryrun=False, threads=2, conda_prefix=None, **kwargs):
#snpcall_smk = os.path.join(wd, "evaluate_snpcall_customize.smk")
assembly_smk = os.path.join(wd, "eval_assembly_custom.smk")
snake_kwargs = {}
for arg, val in kwargs.items():
if val != None:
if arg == 'outpath':
val = os.path.join(cd, val)
snake_kwargs[arg] = val
run_snake(assembly_smk, dryrun, conda_prefix, **snake_kwargs)
def run_snake(snake, dryrun=False, conda_prefix=None, **kwargs):
try:
# Unlock the working directory
# unlocked = snakemake.snakemake(
# snakefile=snake,
# # unlock=False,
# unlock=True,
# workdir=wd,
# config=kwargs
# )
# if not unlocked:
# raise Exception('Could not unlock the working directory!')
# Start the snakemake pipeline
success = snakemake.snakemake(
snakefile=snake,
restart_times=0,
cores=kwargs.get("threads", 2),
workdir=wd,
use_conda=True,
conda_prefix=conda_prefix,
dryrun=dryrun,
printshellcmds=True,
force_incomplete=True,
config=kwargs,
)
if not success:
raise Exception('Snakemake pipeline failed!')
except Exception as e:
from datetime import datetime
print('ERROR ({})'.format(snake))
print('{}\t{}\n'.format(
datetime.now().isoformat(' ', timespec='minutes'),
e))
raise RuntimeError(e)
except:
from datetime import datetime
print('ERROR ({})'.format(snake))
print('{}\t{}\n'.format(
datetime.now().isoformat(' ', timespec='minutes'),
sys.exc_info()))
raise RuntimeError(
'Unknown problem occured when lauching Snakemake!')
if __name__ == "__main__":
cli()