Skip to content

Commit 30f7cb4

Browse files
authored
Merge pull request #6 from ToolsVanBox/develop
Develop
2 parents 22f0546 + fe688cd commit 30f7cb4

File tree

31 files changed

+215
-53
lines changed

31 files changed

+215
-53
lines changed

configs/base.config

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,18 @@ def check_max(obj, type) {
3232
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
3333
return obj
3434
}
35+
} else if (type == 'disk') {
36+
try {
37+
if (obj.compareTo(params.max_disk as nextflow.util.MemoryUnit) == 1)
38+
return params.max_disk as nextflow.util.MemoryUnit
39+
else
40+
return obj
41+
} catch (all) {
42+
println " ### ERROR ### Max disk '${params.max_disk}' is not valid! Using default value: $obj"
43+
return obj
44+
}
3545
}
46+
3647
}
3748

3849
process {
@@ -89,18 +100,32 @@ process {
89100
time = { check_max( 2.h * task.attempt, 'time' ) }
90101
memory = { check_max( 90.GB * task.attempt, 'memory' ) }
91102
}
103+
withName: 'BWAMEM2_MEM' {
104+
disk = 100.GB
105+
}
106+
withName: 'SAMTOOLS_SORT' {
107+
disk = 150.GB
108+
}
109+
withName: 'SAMTOOLS_MERGE' {
110+
disk = 60.GB
111+
}
112+
withName: 'GATK4_MARKDUPLICATES_SPARK' {
113+
disk = 200.GB
114+
}
92115
withName: 'SAMTOOLS_MPILEUP' {
93-
time = { check_max( 12.h * task.attempt, 'time' ) }
116+
time = { check_max( 12.h * task.attempt, 'time' ) }
117+
disk = 250.GB
94118
}
95119
withName: 'FREEC_GERMLINE' {
96120
time = { check_max( 2.h * task.attempt, 'time' ) }
97121
memory = { check_max( 80.GB * task.attempt, 'memory' ) }
98122
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
99123
}
100-
withName: 'GRIDSS' {
101-
time = { check_max( 2.h * task.attempt, 'time' ) }
102-
memory = { check_max( 80.GB * task.attempt, 'memory' ) }
103-
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
124+
withName: 'GRIDSS_GERMLINE' {
125+
time = { check_max( 24.h * task.attempt, 'time' ) }
126+
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
127+
cpus = { check_max( 10 * task.attempt, 'cpus' ) }
128+
drive = 100.GB
104129
}
105130
withName: 'GATK4_MUTECT2' {
106131
cpus = { check_max( 6 * task.attempt, 'cpus' ) }

configs/nextflow.config

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,26 @@ profiles {
6464
process.executor = 'google-batch'
6565
executor.queueSize = 1000
6666
execute.pollInterval = '60sec'
67+
executor.jobName = {
68+
def safeName = task.name
69+
.replaceAll(/[^a-zA-Z0-9-]/, "-")
70+
.take(30)
71+
.toLowerCase()
72+
def safeTag = task.tag
73+
.replaceAll(/[^a-zA-Z0-9-]/, "-")
74+
.take(12)
75+
.toLowerCase()
76+
def safeHash = task.hash.take(8)
77+
return "${safeTag}-${safeName}-${safeHash}"
78+
}
6779

68-
google.batch.bootDiskSize = '333GB'
69-
7080
google.region = 'europe-west4'
7181
google.project = 'pmc-gcp-box-d-pip-development'
7282
google.location = 'europe-west4'
73-
83+
7484
google.batch.spot = true
85+
google.batch.autoRetryExitCodes = [104, 9, 50001, 50002, 50003, 50004, 50005, 50006]
86+
google.batch.maxSpotAttempts = 10
7587
google.batch.usePrivateAddress = true
7688
google.batch.network = 'projects/pmc-vpc-res-private-20gx/global/networks/shared-vpc-res-priv-dev'
7789
google.batch.subnetwork = 'projects/pmc-vpc-res-private-20gx/regions/europe-west4/subnetworks/subnet-res-priv-dev'
@@ -99,4 +111,4 @@ timeline {
99111

100112
}
101113

102-
cleanup = true
114+
cleanup = true

configs/run-template.config

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,23 @@ params {
3232
vcf_germline_short_variant_somatic_filtration = true
3333

3434
// Somatic short variant discovery
35-
bam_somatic_short_variant_discovery = true
36-
vcf_somatic_short_variant_filtration = true
35+
bam_somatic_short_variant_discovery = false // FALSE
36+
vcf_somatic_short_variant_filtration = false // FALSE
3737

3838
// Short variant annotation
3939
vcf_short_variant_annotation = true
4040

4141
// Copy number discovery
42-
bam_germline_copy_number_discovery = false
43-
bam_somatic_copy_number_discovery = false
44-
bam_tumoronly_copy_number_discovery = false
42+
bam_germline_copy_number_discovery = false // TRUE
43+
bam_somatic_copy_number_discovery = false // FALSE
44+
bam_tumoronly_copy_number_discovery = false // FALSE
4545

4646
// Structural variant discovery
47-
bam_germline_structural_variant_discovery = false
48-
bam_somatic_structural_variant_discovery = false
49-
bam_tumoronly_structural_variant_discovery = false
47+
bam_germline_structural_variant_discovery = false // TRUE
48+
bam_somatic_structural_variant_discovery = false // FALSE
49+
bam_tumoronly_structural_variant_discovery = false //FALSE
5050

51-
vcf_structural_variant_filtration = false
51+
vcf_somatic_structural_variant_filtration = true // TRUE
5252

5353
// HLA typing
5454
bam_hla_type_calling = false

configs/settings.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ process {
412412
}
413413

414414
withName: 'SNPSIFT_SPLIT' {
415-
ext.args = { "-l 1000" }
415+
ext.args = { "-l 5000" }
416416
ext.prefix = { }
417417
}
418418
withName: 'SNPSIFT_JOIN_.*' {

create_samplesheet_gcloud.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/bash
2+
3+
MYFOLDER=$1
4+
SAMPLE_TYPE='tumor'
5+
6+
R1_FASTQS=($(gcloud storage ls ${MYFOLDER}/** | grep -P ".*_1.f.*q.gz"))
7+
R1_FASTQS+=($(gcloud storage ls ${MYFOLDER}/** | grep -P ".*_R1_.*.f.*q.gz"))
8+
R1_FASTQS+=($(gcloud storage ls ${MYFOLDER}/** | grep -P ".*_R1.f.*q.gz"))
9+
10+
#R1_FASTQS=($(find ${MYFOLDER} -iname "*_1.f*q.gz"))
11+
#R1_FASTQS+=($(find ${MYFOLDER} -iname "*_R1_*.f*q.gz"))
12+
13+
echo "sample,fastq_1,fastq_2,bam,bai,sample_type"
14+
15+
for FASTQ_1 in ${R1_FASTQS[@]}; do
16+
BAM=""
17+
BAI=""
18+
SAMPLE=$( echo $(basename ${FASTQ_1}) | cut -f1 -d'_' | cut -f1 -d'.')
19+
if [[ "${FASTQ_1}" == *"_R1_"* ]]; then
20+
FASTQ_2=${FASTQ_1/_R1_/_R2_}
21+
fi
22+
if [[ "${FASTQ_1}" == *"_1.f"* ]]; then
23+
FASTQ_2=${FASTQ_1/_1.f/_2.f}
24+
fi
25+
if [[ "${FASTQ_1}" == *"_R1.f"* ]]; then
26+
FASTQ_2=${FASTQ_1/_R1.f/_R2.f}
27+
fi
28+
if gsutil -q stat ${FASTQ_2}; then
29+
echo ${SAMPLE},${FASTQ_1},${FASTQ_2},${BAM},${BAI},${SAMPLE_TYPE}
30+
else
31+
echo "NO PAIRED FASTQ FILES FOUND FOR ${SAMPLE}"
32+
fi
33+
done
34+
35+
BAMS=($(gcloud storage ls ${MYFOLDER}/** | grep -P ".*.bam$"))
36+
#BAMS=($(find ${MYFOLDER} -iname "*bam"))
37+
38+
for BAM in ${BAMS[@]}; do
39+
FASTQ_1=""
40+
FASTQ_2=""
41+
SAMPLE=$( echo $(basename ${BAM}) | cut -f1 -d'_' | cut -f1 -d'.')
42+
BAI=${BAM}.bai
43+
44+
if ! gsutil -q stat ${BAI} ; then
45+
BAI=${BAM/.bam/.bai}
46+
fi
47+
if ! gsutil -q stat ${BAI}; then
48+
echo "NO BAI FILE FOUND FOR ${SAMPLE}"
49+
else
50+
echo ${SAMPLE},${FASTQ_1},${FASTQ_2},${BAM},${BAI},${SAMPLE_TYPE}
51+
fi
52+
done
53+

create_samplesheet_gcloud2.sh

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/bin/bash
2+
3+
MYFOLDER=$1
4+
SAMPLE=$2
5+
SAMPLE_TYPE='tumor'
6+
7+
R1_FASTQS=($(gcloud storage ls ${MYFOLDER}/** | grep -P "${SAMPLE}.*_1.f.*q.gz"))
8+
R1_FASTQS+=($(gcloud storage ls ${MYFOLDER}/** | grep -P "${SAMPLE}.*_R1_.*.f.*q.gz"))
9+
R1_FASTQS+=($(gcloud storage ls ${MYFOLDER}/** | grep -P "${SAMPLE}.*_R1.f.*q.gz"))
10+
11+
#R1_FASTQS=($(find ${MYFOLDER} -iname "*_1.f*q.gz"))
12+
#R1_FASTQS+=($(find ${MYFOLDER} -iname "*_R1_*.f*q.gz"))
13+
14+
echo "sample,fastq_1,fastq_2,bam,bai,sample_type"
15+
16+
for FASTQ_1 in ${R1_FASTQS[@]}; do
17+
BAM=""
18+
BAI=""
19+
SAMPLE=$( echo $(basename ${FASTQ_1}) | cut -f1 -d'_' | cut -f1 -d'.')
20+
if [[ "${FASTQ_1}" == *"_R1_"* ]]; then
21+
FASTQ_2=${FASTQ_1/_R1_/_R2_}
22+
fi
23+
if [[ "${FASTQ_1}" == *"_1.f"* ]]; then
24+
FASTQ_2=${FASTQ_1/_1.f/_2.f}
25+
fi
26+
if [[ "${FASTQ_1}" == *"_R1.f"* ]]; then
27+
FASTQ_2=${FASTQ_1/_R1.f/_R2.f}
28+
fi
29+
if gsutil -q stat ${FASTQ_2}; then
30+
echo ${SAMPLE},${FASTQ_1},${FASTQ_2},${BAM},${BAI},${SAMPLE_TYPE}
31+
else
32+
echo "NO PAIRED FASTQ FILES FOUND FOR ${SAMPLE}"
33+
fi
34+
done
35+
36+
BAMS=($(gcloud storage ls ${MYFOLDER}/** | grep -P ".*.bam$"))
37+
#BAMS=($(find ${MYFOLDER} -iname "*bam"))
38+
39+
for BAM in ${BAMS[@]}; do
40+
FASTQ_1=""
41+
FASTQ_2=""
42+
SAMPLE=$( echo $(basename ${BAM}) | cut -f1 -d'_' | cut -f1 -d'.')
43+
BAI=${BAM}.bai
44+
45+
if ! gsutil -q stat ${BAI} ; then
46+
BAI=${BAM/.bam/.bai}
47+
fi
48+
if ! gsutil -q stat ${BAI}; then
49+
echo "NO BAI FILE FOUND FOR ${SAMPLE}"
50+
else
51+
echo ${SAMPLE},${FASTQ_1},${FASTQ_2},${BAM},${BAI},${SAMPLE_TYPE}
52+
fi
53+
done
54+

main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@ workflow {
1717
}
1818

1919
ASAP()
20-
}
20+
}

modules/local/controlfreec/makebafplot/main.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ process CONTROLFREEC_MAKEBAFPLOT {
2424
2525
cat <<-END_VERSIONS > versions.yml
2626
"${task.process}":
27-
R: \$(echo \$(R --version 2>&1) ) | grep -oP "R version .+ --" | cut -f 3 -d' '
27+
R: \$(echo \$(R --version 2>&1) | grep -oP "R version .+ --" | cut -f 3 -d' ' )
2828
END_VERSIONS
2929
"""
3030

@@ -35,7 +35,7 @@ process CONTROLFREEC_MAKEBAFPLOT {
3535
3636
cat <<-END_VERSIONS > versions.yml
3737
"${task.process}":
38-
R: \$(echo \$(R --version 2>&1) ) | grep -oP "R version .+ --" | cut -f 3 -d' '
38+
R: \$(echo \$(R --version 2>&1) | grep -oP "R version .+ --" | cut -f 3 -d' ' )
3939
END_VERSIONS
4040
"""
4141
}

modules/local/controlfreec/makekaryotype/main.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ process CONTROLFREEC_MAKEKARYOTYPE {
2727
2828
cat <<-END_VERSIONS > versions.yml
2929
"${task.process}":
30-
R: \$(echo \$(R --version 2>&1) ) | grep -oP "R version .+ --" | cut -f 3 -d' '
30+
R: \$(echo \$(R --version 2>&1) | grep -oP "R version .+ --" | cut -f 3 -d' ' )
3131
END_VERSIONS
3232
"""
3333

@@ -38,7 +38,7 @@ process CONTROLFREEC_MAKEKARYOTYPE {
3838
3939
cat <<-END_VERSIONS > versions.yml
4040
"${task.process}":
41-
R: \$(echo \$(R --version 2>&1) ) | grep -oP "R version .+ --" | cut -f 3 -d' '
41+
R: \$(echo \$(R --version 2>&1) | grep -oP "R version .+ --" | cut -f 3 -d' ' )
4242
END_VERSIONS
4343
"""
4444
}

modules/local/fingerprint/heatmap/main.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ process FINGERPRINT_HEATMAP {
2727
2828
cat <<-END_VERSIONS > versions.yml
2929
"${task.process}":
30-
R: \$(echo \$(R --version 2>&1) ) | grep -oP "R version .+ --" | cut -f 3 -d' '
30+
R: \$(echo \$(R --version 2>&1) | grep -oP "R version .+ --" | cut -f 3 -d' ' )
3131
END_VERSIONS
3232
"""
3333

@@ -38,7 +38,7 @@ process FINGERPRINT_HEATMAP {
3838
3939
cat <<-END_VERSIONS > versions.yml
4040
"${task.process}":
41-
R: \$(echo \$(R --version 2>&1) ) | grep -oP "R version .+ --" | cut -f 3 -d' '
41+
R: \$(echo \$(R --version 2>&1) | grep -oP "R version .+ --" | cut -f 3 -d' ' )
4242
END_VERSIONS
4343
"""
4444

0 commit comments

Comments
 (0)