Skip to content

Commit

Permalink
v.0.1.14 changed runtime in rules to seconds in order to run on SLURM
Browse files Browse the repository at this point in the history
  • Loading branch information
trinezac committed Apr 5, 2023
1 parent c1140e1 commit 75919a5
Show file tree
Hide file tree
Showing 16 changed files with 487 additions and 42 deletions.
4 changes: 2 additions & 2 deletions maginator/workflow/alignment.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ rule align:
resources:
cores=1,
memory=4,
runtime='10:00:00'
runtime='36000' #10h in s
shell:
"""
mkdir -p {output}
Expand All @@ -42,7 +42,7 @@ rule concat:
resources:
cores=10,
memory=40,
runtime='10:00:00'
runtime='36000' #10h in s
params:
ali_dir=os.path.join(WD, 'phylo', 'alignments')
script:
Expand Down
2 changes: 1 addition & 1 deletion maginator/workflow/filter.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ mem = math.ceil(n_contigs/1000000)*30
if mem > int(param_dict['max_mem']):
mem = int(param_dict['max_mem'])
## time is 1 hour per million
tim = str(math.ceil(n_contigs/1000000))*60*60 # runtime in seconds
tim = str(math.ceil(n_contigs/1000000)*60*60) # runtime in seconds

rule all:
input:
Expand Down
6 changes: 3 additions & 3 deletions maginator/workflow/filter_geneclusters.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ rule nonredundant_catalogue:
resources:
cores = 1,
memory = 50,
runtime = '12:00:00'
runtime = '43200' #12h in s
shell:
"perl -ne 'if(/^>(\S+)/){{$c=$i{{$1}}}}$c?print:chomp;$i{{$_}}=1 if @ARGV' <(cut -f1 {input.clusters} | uniq) {input.genecat} | awk '{{print $1}}' > {output}"

Expand All @@ -52,7 +52,7 @@ rule bwa_index:
resources:
cores = 40,
memory = 188,
runtime = '1:00:00:00'
runtime = '86400' #1d in s
shell:
"bwa-mem2 index -p {output.index} {input.fasta}; samtools faidx {input.fasta}; touch {output.index}"

Expand All @@ -72,6 +72,6 @@ rule bwa_readmap:
resources:
cores = 40,
memory = 188,
runtime = '1:00:00:00'
runtime = '86400' #1d in s
shell:
"bwa-mem2 mem -t {resources.cores} {input.index} {input.fastq1} {input.fastq2} | samtools view -T {input.fasta} -F 3584 -b --threads {resources.cores} | samtools sort --threads {resources.cores} > {output.bam}; samtools index {output.bam}"
8 changes: 4 additions & 4 deletions maginator/workflow/gene_count_mat.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ rule count_genes:
resources:
cores = 1,
memory = 20,
runtime = '2:00:00'
runtime = '7200' #2h in s
shell:
"samtools idxstats {input} | cut -f3 > {output.readcounts}"

Expand All @@ -44,7 +44,7 @@ rule gene_names:
resources:
cores = 1,
memory = 20,
runtime = '1:00:00'
runtime = '3600' #1h in s
shell:
"samtools idxstats {input} | cut -f1 > {output}"

Expand All @@ -58,7 +58,7 @@ rule create_header:
resources:
cores = 1,
memory = 188,
runtime = '1:00:00'
runtime = '3600' #1h in s
run:
header = "Gene"
for f in input:
Expand All @@ -82,6 +82,6 @@ rule gene_count_matrix:
resources:
cores = 1,
memory = 188,
runtime = '1:00:00'
runtime = '3600' #1h in s
shell:
"paste {input.gene_names} {input.readcounts} | cat {input.header} - > {output}; sed -i '$d' {output}"
6 changes: 3 additions & 3 deletions maginator/workflow/gene_tax.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ rule gene_tax:
resources:
cores = 1,
memory = 80,
runtime = '02:00:00'
runtime = '7200' #2h in s
script:
"scripts/gene_cluster2tax.py"

Expand All @@ -47,7 +47,7 @@ rule synteny_graph:
resources:
cores = 1,
memory = 40,
runtime = '10:00:00'
runtime = '36000' #10h in s
script:
"scripts/synteny.py"

Expand All @@ -67,7 +67,7 @@ rule synteny_mcl:
resources:
cores = 40,
memory = 180,
runtime = '24:00:00'
runtime = '86400' #1d in s
shell:
"""
join -1 2 -2 1 <(join -j1 <(sed 's/.*(//;s/)//;s/,//;s/\.0$//' {input.graph} | sort -k1,1) <(sort -k1,1 {input.index}) | sort -k2,2) <(sort -k1,1 {input.index}) \
Expand Down
2 changes: 1 addition & 1 deletion maginator/workflow/gtdbtk.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ CLUSTERS = {x for x in CLUSTERS if x.isdigit()}
out = subprocess.Popen(['wc', '-l', READS], stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()[0]
n_samples = int(out.partition(b' ')[0])
## time is 1 hour per 30 samples
tim = str(math.ceil(n_samples/30))+':00:00'
tim = str(math.ceil(n_samples/30)*60*60) # time in seconds

wildcard_constraints:
cluster="\d+"
Expand Down
15 changes: 7 additions & 8 deletions maginator/workflow/outgroup.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ rule markers:
resources:
cores=1,
memory=10,
runtime='10:00:00'
runtime='36000' #10h in s
script:
"scripts/marker_genes.py"

Expand All @@ -47,7 +47,7 @@ rule fasta1:
resources:
cores=1,
memory=10,
runtime='10:00:00'
runtime='36000' #10h in s
shell:
"perl -ne 'if(/^>(\S+)/){{$c=$i{{$1}}}}$c?print:chomp;$i{{$_}}=1 if @ARGV' <(cut -f4 {input.tab} | sort | uniq) {input.fasta} > {output}"

Expand All @@ -62,7 +62,7 @@ rule fasta2:
resources:
cores=1,
memory=10,
runtime='10:00:00'
runtime='36000' #10h in s
shell:
"perl -ne 'if(/^>(\S+)/){{$c=$i{{$1}}}}$c?print:chomp;$i{{$_}}=1 if @ARGV' <(cut -f5 {input.tab} | sort | uniq) {input.fasta} > {output}"

Expand All @@ -77,7 +77,7 @@ rule fasta3:
resources:
cores=1,
memory=10,
runtime='10:00:00'
runtime='36000' #10h in s
shell:
"perl -ne 'if(/^>(\S+)/){{$c=$i{{$1}}}}$c?print:chomp;$i{{$_}}=1 if @ARGV' <(cut -f1 {input.tab}) {input.fasta} > {output}"

Expand All @@ -93,7 +93,7 @@ rule bed:
resources:
cores=1,
memory=10,
runtime='10:00:00'
runtime='36000' #10h in s
shell:
"""
awk '{{print $1"\t0\t1000000"}}' {input.sig} > {output.beds}
Expand All @@ -114,7 +114,7 @@ rule uniq:
resources:
cores=1,
memory=10,
runtime='10:00:00'
runtime='36000' #10h in s
shell:
"""
cat {input.beds} {input.bedm} | sort | uniq > {output.bed}
Expand All @@ -131,9 +131,8 @@ rule index:
resources:
cores=1,
memory=10,
runtime='10:00:00'
runtime='36000' #10h in s
shell:
"""
samtools faidx {input}
"""

8 changes: 4 additions & 4 deletions maginator/workflow/parse_gtdbtk.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ rule parse_gtdbtk:
resources:
cores=1,
memory=32,
runtime='10:00:00'
runtime='36000' #10h in s
script:
"scripts/parse_gtdbtk.py"

Expand All @@ -55,7 +55,7 @@ rule repres_genes:
resources:
cores = 14,
memory = 50,
runtime = '2:00:00:00'
runtime = '172800' #2d in s
params:
tmp_dir = os.path.join(WD, 'tmp'),
out_prefix = os.path.join(WD, 'genes', 'all_genes'),
Expand All @@ -77,7 +77,7 @@ rule join:
resources:
cores = 1,
memory = 20,
runtime = '24:00:00'
runtime = '86400' #1d in s
shell:
"join -1 1 -2 2 <(sort -k1,1 {input.gtdb}) <(sort -k2,2 {input.cluster}) > {output}"

Expand All @@ -90,6 +90,6 @@ rule collect:
resources:
cores = 1,
memory = 20,
runtime = '24:00:00'
runtime = '86400' #1d in s
shell:
"for i in {input}/*/*summary.tsv; do tail -n+2 $i; done | cut -f1,2 > {output}"
4 changes: 2 additions & 2 deletions maginator/workflow/phylo.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if param_dict['phylo'] == 'fasttree':
resources:
cores=1,
memory=32,
runtime='12:00:00'
runtime='43200' #12h in s
shell:
"""
fasttree -nt {input} > {output}
Expand All @@ -46,7 +46,7 @@ if param_dict['phylo'] == 'iqtree':
resources:
cores=40,
memory=180,
runtime='02:00:00:00'
runtime='172800' #2d in s
shell:
"""
iqtree -T {resources.cores} -s {input.fna} -p {input.part} -o Outgroup --prefix {params.prefix} || true && if [ -f {params.prefix}.treefile ]; then mv {params.prefix}.treefile {output}; else touch {output}; fi
Expand Down
4 changes: 2 additions & 2 deletions maginator/workflow/pileup.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ rule subset:
resources:
cores = 1,
memory = 20,
runtime = '12:00:00'
runtime = '43200' #12h in s
shell:
"samtools view -b -L {input.bed} {input.bam} | samtools sort -o {output}"

Expand All @@ -40,7 +40,7 @@ rule pileup:
resources:
cores = 1,
memory = 20,
runtime = '24:00:00'
runtime = '86400' #1d in s
shell:
"samtools mpileup -A -x -f {input.fna} {input.bam} -o {output}"

2 changes: 1 addition & 1 deletion maginator/workflow/pileup_parse.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ rule parse:
resources:
cores = 40,
memory = 180,
runtime = '02:00:00:00'
runtime = '172800' #2d in s
script:
"scripts/mpileup.py"

8 changes: 4 additions & 4 deletions maginator/workflow/prescreening_genes.Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ rule geneID_collectionID:
resources:
cores = 1,
memory = 188,
runtime = '12:00:00'
runtime = '43200' #12h in s
script:
"scripts/species_collections.py"

Expand All @@ -51,7 +51,7 @@ rule sort_genes_across_MGS:
resources:
cores = 1,
memory = 188,
runtime = '12:00:00'
runtime = '43200' #12h in s
script:
"scripts/sort_gene_mat.py"

Expand All @@ -71,7 +71,7 @@ rule format_conversion:
resources:
cores = 1,
memory = 188,
runtime = '24:00:00'
runtime = '86400' #1d in s
script:
"scripts/matrix2SG_formatconversion.R"

Expand All @@ -88,6 +88,6 @@ rule prescreening_genes:
resources:
cores = 1,
memory = 188,
runtime = '24:00:00'
runtime = '86400' #1d in s
script:
"scripts/prescreening_genes.R"
Loading

0 comments on commit 75919a5

Please sign in to comment.