diff --git a/modules.json b/modules.json index ad4fd57616..db52a03a64 100644 --- a/modules.json +++ b/modules.json @@ -8,507 +8,727 @@ "ascat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/annotate": { "branch": "master", "git_sha": "cb08035150685b11d890d90c9534d4f16869eaec", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "d1e0ec7670fa77905a378627232566ce54c3c26d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/mpileup": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["bam_ngscheckmate"] + "installed_by": [ + "bam_ngscheckmate" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/stats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/index": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/mem": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", "git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/antitarget": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/batch": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/call": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/export": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/genemetrics": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cnvkit/reference": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "controlfreec/assesssignificance": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff" }, "controlfreec/freec": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "controlfreec/freec2bed": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "controlfreec/freec2circos": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "controlfreec/makegraph2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deepvariant/rundeepvariant": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "dragmap/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/dragmap/align/dragmap-align.diff" }, "dragmap/hashtable": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff" }, "ensemblvep/download": { "branch": "master", "git_sha": "6e3585d9ad20b41adc7d271009f8cb5e191ecab4", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ensemblvep/vep": { "branch": "master", "git_sha": "6e3585d9ad20b41adc7d271009f8cb5e191ecab4", - "installed_by": ["modules", "vcf_annotate_ensemblvep"] + "installed_by": [ + "modules", + "vcf_annotate_ensemblvep" + ] }, "fastp": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fgbio/callmolecularconsensusreads": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fgbio/fastqtobam": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fgbio/groupreadsbyumi": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "freebayes": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/applybqsr": { "branch": "master", "git_sha": "6b3bf38285d94cc1ea3cd9fa93310d54b04c3819", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/applyvqsr": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/baserecalibrator": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/calculatecontamination": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/cnnscorevariants": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/estimatelibrarycomplexity": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/filtermutectcalls": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/filtervarianttranches": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/gatherbqsrreports": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/gatherpileupsummaries": { "branch": "master", "git_sha": "679f45cae4f603f12d7c38c042afee11150574a0", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/genomicsdbimport": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/genotypegvcfs": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/getpileupsummaries": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/haplotypecaller": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/intervallisttobed": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/gatk4/intervallisttobed/gatk4-intervallisttobed.diff" }, "gatk4/learnreadorientationmodel": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/markduplicates": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mergemutectstats": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mutect2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/variantrecalibrator": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4spark/applybqsr": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4spark/baserecalibrator": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4spark/markduplicates": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gawk": { "branch": "master", "git_sha": "97321eded31a12598837a476d3615300af413bb7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "goleft/indexcov": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "lofreq/callparallel": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "manta/germline": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "manta/somatic": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "manta/tumoronly": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mosdepth": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "msisensorpro/msisomatic": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "msisensorpro/scan": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ngscheckmate/ncm": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["bam_ngscheckmate"] + "installed_by": [ + "bam_ngscheckmate" + ] + }, + "parabricks/applybqsr": { + "branch": "master", + "git_sha": "5a32b2de5a5368b02e7ba65f7ebed46f37f17eae", + "installed_by": [ + "fastq_align_parabricks", + "modules" + ] + }, + "parabricks/fq2bam": { + "branch": "master", + "git_sha": "0e9cb409c32d3ec4f0d3804588e4778971c09b7e", + "installed_by": [ + "fastq_align_parabricks", + "modules" + ] }, "samblaster": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/bam2fq": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/collatefastq": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/convert": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/mpileup": { "branch": "master", "git_sha": "13e7d1046922381df90cd8fe9bee8c3e57ae8457", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/applyvarcal": { "branch": "master", "git_sha": "eb7b70119bfb1877334c996d13e520c61b21067d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/bwamem": { "branch": "master", "git_sha": "eb7b70119bfb1877334c996d13e520c61b21067d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/dedup": { "branch": "master", "git_sha": "eb7b70119bfb1877334c996d13e520c61b21067d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/dnamodelapply": { "branch": "master", "git_sha": "eb7b70119bfb1877334c996d13e520c61b21067d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/dnascope": { "branch": "master", "git_sha": "eb7b70119bfb1877334c996d13e520c61b21067d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/gvcftyper": { "branch": "master", "git_sha": "eb7b70119bfb1877334c996d13e520c61b21067d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/haplotyper": { "branch": "master", "git_sha": "eb7b70119bfb1877334c996d13e520c61b21067d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "sentieon/varcal": { "branch": "master", "git_sha": "eb7b70119bfb1877334c996d13e520c61b21067d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "snpeff/download": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "snpeff/snpeff": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules", "vcf_annotate_snpeff"] + "installed_by": [ + "modules", + "vcf_annotate_snpeff" + ] }, "spring/decompress": { "branch": "master", "git_sha": "d7462e71f9129083ce10c3fe953ed401781e0ebd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "strelka/germline": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "strelka/somatic": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "svdb/merge": { "branch": "master", "git_sha": "eb2c3f7ee2c938ab1a49764bdb1319adaa35492c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "f448e846bdadd80fc8be31fbbc78d9f5b5131a45", - "installed_by": ["modules", "vcf_annotate_snpeff"] + "installed_by": [ + "modules", + "vcf_annotate_snpeff" + ] }, "tabix/tabix": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules", "vcf_annotate_ensemblvep"] + "installed_by": [ + "modules", + "vcf_annotate_ensemblvep" + ] }, "tiddit/sv": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "unzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "vcftools": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -517,35 +737,54 @@ "bam_ngscheckmate": { "branch": "master", "git_sha": "c60c14b285b89bdd0607e371417dadb80385ad6e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] + }, + "fastq_align_parabricks": { + "branch": "master", + "git_sha": "205d830c47e3d7523b0c8635dbb685a1821d6c5d", + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_annotate_ensemblvep": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_annotate_snpeff": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/parabricks/applybqsr/main.nf b/modules/nf-core/parabricks/applybqsr/main.nf new file mode 100644 index 0000000000..fd1128e44e --- /dev/null +++ b/modules/nf-core/parabricks/applybqsr/main.nf @@ -0,0 +1,62 @@ +process PARABRICKS_APPLYBQSR { + tag "${meta.id}" + label 'process_high' + label 'process_gpu' + stageInMode 'copy' + + container "nvcr.io/nvidia/clara/clara-parabricks:4.4.0-1" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(bam_index) + tuple val(meta3), path(bqsr_table) + tuple val(meta4), path(intervals) + tuple val(meta5), path(fasta) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.bai"), emit: bai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Parabricks module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? intervals.collect{"--interval-file $it"}.join(' ') : "" + def num_gpus = task.accelerator ? "--num-gpus $task.accelerator.request" : '' + """ + pbrun \\ + applybqsr \\ + --ref ${fasta} \\ + --in-bam ${bam} \\ + --in-recal-file ${bqsr_table} \\ + ${interval_command} \\ + --out-bam ${prefix}.bam \\ + --num-threads ${task.cpus} \\ + ${num_gpus} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbrun: \$(echo \$(pbrun version 2>&1) | sed 's/^Please.* //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbrun: \$(echo \$(pbrun version 2>&1) | sed 's/^Please.* //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/parabricks/applybqsr/meta.yml b/modules/nf-core/parabricks/applybqsr/meta.yml new file mode 100644 index 0000000000..4bb2afc1aa --- /dev/null +++ b/modules/nf-core/parabricks/applybqsr/meta.yml @@ -0,0 +1,91 @@ +name: "parabricks_applybqsr" +description: NVIDIA Clara Parabricks GPU-accelerated apply Base Quality Score Recalibration + (BQSR). +keywords: + - bqsr + - bam + - GPU-accelerated + - base quality score recalibration +tools: + - "parabricks": + description: "NVIDIA Clara Parabricks GPU-accelerated genomics tools" + homepage: "https://www.nvidia.com/en-us/clara/genomics/" + documentation: "https://docs.nvidia.com/clara/parabricks/" + licence: ["https://docs.nvidia.com/clara/parabricks/3.8.0/gettingstarted.html#licensing"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test' ] + - bam: + type: file + description: BAM file + pattern: "*.bam" + - - meta2: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test' ] + - bam_index: + type: file + description: BAM index file + pattern: "*.bai" + - - meta3: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test' ] + - bqsr_table: + type: file + description: Table from calculating BQSR. Output from parabricks/fq2bam or gatk4/baserecalibrator. + pattern: "*.table" + - - meta4: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test' ] + - intervals: + type: file + description: intervals + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fasta: + type: file + description: Reference fasta - must be unzipped. + pattern: "*.fasta" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test' ] + - "*.bam": + type: file + description: BAM file after applying BQSR. + pattern: "*.bam" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test' ] + - "*.bai": + type: file + description: bai index corresponding to output bam file. + pattern: "*.bai" + - versions: + - versions.yml: + type: file + description: File containing software versions. + pattern: "versions.yml" +authors: + - "@bsiranosian" +maintainers: + - "@bsiranosian" + - "@famosab" diff --git a/modules/nf-core/parabricks/applybqsr/tests/main.nf.test b/modules/nf-core/parabricks/applybqsr/tests/main.nf.test new file mode 100644 index 0000000000..4942ba9685 --- /dev/null +++ b/modules/nf-core/parabricks/applybqsr/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process PARABRICKS_APPLYBQSR" + script "../main.nf" + process "PARABRICKS_APPLYBQSR" + + tag "modules" + tag "modules_nfcore" + tag "parabricks" + tag "parabricks/applybqsr" + tag "gpu" + + test("sarscov2 | paired-end | [bam]") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [], [] // index not needed unless using intervals + ] + input[2] = [ + [ id:'bqsr_table'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true) + ] + input[3] = [ + [], [] // no intervals + ] + input[4] = [ + [ id:'reference'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.bai[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 | paired-end | [bam] | intervals") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test-index'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[2] = [ + [ id:'bqsr_table'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true) + ] + input[3] = [ + [ id:'intervals'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[4] = [ + [ id:'reference'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.bai[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens | paired-end | [cram]") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ] + input[1] = [ + [], [] // index not needed unless using intervals + ] + input[2] = [ + [ id:'bqsr_table'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table', checkIfExists: true) + ] + input[3] = [ + [], [] // no intervals + ] + input[4] = [ + [ id:'reference'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.bai[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens | paired-end | [cram] | intervals - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ] + input[1] = [ + [ id:'test-index'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ] + input[2] = [ + [ id:'bqsr_table'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table', checkIfExists: true) + ] + input[3] = [ + [ id:'intervals'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[4] = [ + [ id:'reference'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 | paired-end | [bam] | intervals - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test-index'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[2] = [ + [ id:'bqsr_table'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true) + ] + input[3] = [ + [ id:'intervals'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[4] = [ + [ id:'reference'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/parabricks/applybqsr/tests/main.nf.test.snap b/modules/nf-core/parabricks/applybqsr/tests/main.nf.test.snap new file mode 100644 index 0000000000..7908af55f9 --- /dev/null +++ b/modules/nf-core/parabricks/applybqsr/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "homo_sapiens | paired-end | [cram]": { + "content": [ + "2f11e4fe3390b8ad0a1852616fd1da04", + "test.bam.bai", + [ + "versions.yml:md5,f396830824c06b6dc30056e423a91634" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T08:44:34.058228768" + }, + "sarscov2 | paired-end | [bam]": { + "content": [ + "894549ee3ced6b5ca2eed2563a985217", + "test.bam.bai", + [ + "versions.yml:md5,f396830824c06b6dc30056e423a91634" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T08:44:17.204736211" + }, + "sarscov2 | paired-end | [bam] | intervals": { + "content": [ + "b194021b699cc5cf4b2b1f715e0b7b4c", + "test.bam.bai", + [ + "versions.yml:md5,f396830824c06b6dc30056e423a91634" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T08:44:25.661590103" + }, + "sarscov2 | paired-end | [bam] | intervals - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,f396830824c06b6dc30056e423a91634" + ], + "bai": [ + [ + { + "id": "test" + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f396830824c06b6dc30056e423a91634" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T08:44:47.700907318" + }, + "homo_sapiens | paired-end | [cram] | intervals - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,f396830824c06b6dc30056e423a91634" + ], + "bai": [ + [ + { + "id": "test" + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f396830824c06b6dc30056e423a91634" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T08:44:41.08456539" + } +} diff --git a/modules/nf-core/parabricks/fq2bam/main.nf b/modules/nf-core/parabricks/fq2bam/main.nf new file mode 100644 index 0000000000..0cd6904804 --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/main.nf @@ -0,0 +1,75 @@ +process PARABRICKS_FQ2BAM { + tag "$meta.id" + label 'process_high' + label 'process_gpu' + stageInMode 'copy' + + container "nvcr.io/nvidia/clara/clara-parabricks:4.4.0-1" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(index) + tuple val(meta4), path(interval_file) + path(known_sites) + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.bai") , emit: bai + tuple val(meta), path("*.table"), emit: bqsr_table , optional:true + path("versions.yml") , emit: versions + path("qc_metrics") , emit: qc_metrics , optional:true + path("duplicate-metrics.txt") , emit: duplicate_metrics , optional:true + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Parabricks module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def in_fq_command = meta.single_end ? "--in-se-fq $reads" : "--in-fq $reads" + def known_sites_command = known_sites ? known_sites.collect{"--knownSites $it"}.join(' ') : "" + def known_sites_output = known_sites ? "--out-recal-file ${prefix}.table" : "" + def interval_file_command = interval_file ? interval_file.collect{"--interval-file $it"}.join(' ') : "" + def num_gpus = task.accelerator ? "--num-gpus $task.accelerator.request" : '' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + cp $fasta \$INDEX + + pbrun \\ + fq2bam \\ + --ref \$INDEX \\ + $in_fq_command \\ + --out-bam ${prefix}.bam \\ + $known_sites_command \\ + $known_sites_output \\ + $interval_file_command \\ + $num_gpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbrun: \$(echo \$(pbrun version 2>&1) | sed 's/^Please.* //' ) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Parabricks module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pbrun: \$(echo \$(pbrun version 2>&1) | sed 's/^Please.* //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/parabricks/fq2bam/meta.yml b/modules/nf-core/parabricks/fq2bam/meta.yml new file mode 100644 index 0000000000..8b45cb1628 --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/meta.yml @@ -0,0 +1,112 @@ +name: "parabricks_fq2bam" +description: NVIDIA Clara Parabricks GPU-accelerated alignment, sorting, BQSR calculation, + and duplicate marking. Note this nf-core module requires files to be copied into + the working directory and not symlinked. +keywords: + - align + - sort + - bqsr + - duplicates +tools: + - "parabricks": + description: "NVIDIA Clara Parabricks GPU-accelerated genomics tools" + homepage: "https://www.nvidia.com/en-us/clara/genomics/" + documentation: "https://docs.nvidia.com/clara/parabricks/4.0.1/Documentation/" + licence: ["custom"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq.gz files + pattern: "*.fastq.gz" + - - meta2: + type: map + description: | + Groovy Map containing fasta information + - fasta: + type: file + description: reference fasta file - must be unzipped + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing index information + - index: + type: file + description: reference BWA index + pattern: "*.{amb,ann,bwt,pac,sa}" + - - meta4: + type: map + description: | + Groovy Map containing index information + - interval_file: + type: file + description: (optional) file(s) containing genomic intervals for use in base + quality score recalibration (BQSR) + pattern: "*.{bed,interval_list,picard,list,intervals}" + - - known_sites: + type: file + description: (optional) known sites file(s) for calculating BQSR. markdups must + be true to perform BQSR. + pattern: "*.vcf.gz" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.bam" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: index corresponding to sorted BAM file + pattern: "*.bai" + - bqsr_table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - "*.table": + type: file + description: (optional) table from base quality score recalibration calculation, + to be used with parabricks/applybqsr + pattern: "*.table" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - qc_metrics: + - qc_metrics: + type: directory + description: (optional) optional directory of qc metrics + pattern: "qc_metrics" + - duplicate_metrics: + - duplicate-metrics.txt: + type: file + description: (optional) metrics calculated from marking duplicates in the bam + file + pattern: "*-duplicate-metrics.txt" +authors: + - "@bsiranosian" + - "@adamrtalbot" +maintainers: + - "@bsiranosian" + - "@adamrtalbot" + - "@gallvp" + - "@famosab" diff --git a/modules/nf-core/parabricks/fq2bam/tests/main.nf.test b/modules/nf-core/parabricks/fq2bam/tests/main.nf.test new file mode 100644 index 0000000000..0dec1c5f8c --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/tests/main.nf.test @@ -0,0 +1,199 @@ +nextflow_process { + + name "Test Process PARABRICKS_FQ2BAM" + script "../main.nf" + process "PARABRICKS_FQ2BAM" + + tag "bwa/index" + tag "modules" + tag "parabricks/fq2bam" + tag "modules_nfcore" + tag "parabricks" + tag "gpu" + + setup { + run("BWA_INDEX") { + script "../../../bwa/index/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + """ + } + } + + run("BWA_INDEX", alias: 'BWA_INDEX_PE') { + script "../../../bwa/index/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + } + + test("SRR389222 - fastq - se") { + + config './nextflow.config' + + when { + params { + module_args = '--low-memory' + // Ref: https://forums.developer.nvidia.com/t/problem-with-gpu/256825/6 + // Parabricks’s fq2bam requires 24GB of memory. + // Using --low-memory for testing + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = BWA_INDEX.out.index + input[3] = [ [], [] ] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.bai[0][1]).name, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("SRR389222 - fastq - se - stub") { + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'test' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = BWA_INDEX.out.index + input[3] = [ [], [] ] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("sarscov2 - fastq - pe") { + + config './nextflow.config' + + when { + params { + module_args = '--low-memory' + // Ref: https://forums.developer.nvidia.com/t/problem-with-gpu/256825/6 + // Parabricks’s fq2bam requires 24GB of memory. + // Using --low-memory for testing + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX_PE.out.index + input[3] = [ [], [] ] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.bai[0][1]).name, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + + test("sarscov2 - fastq - pe - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX_PE.out.index + input[3] = [ [], [] ] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/parabricks/fq2bam/tests/main.nf.test.snap b/modules/nf-core/parabricks/fq2bam/tests/main.nf.test.snap new file mode 100644 index 0000000000..73d5ced7c5 --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/tests/main.nf.test.snap @@ -0,0 +1,192 @@ +{ + "SRR389222 - fastq - se - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,55d1e67ef8fa9d0ea3065363a653ffef" + ], + "4": [ + + ], + "5": [ + + ], + "bai": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bqsr_table": [ + + ], + "duplicate_metrics": [ + + ], + "qc_metrics": [ + + ], + "versions": [ + "versions.yml:md5,55d1e67ef8fa9d0ea3065363a653ffef" + ] + }, + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.4.0-1" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-16T12:16:33.055785098" + }, + "sarscov2 - fastq - pe - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,55d1e67ef8fa9d0ea3065363a653ffef" + ], + "4": [ + + ], + "5": [ + + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bqsr_table": [ + + ], + "duplicate_metrics": [ + + ], + "qc_metrics": [ + + ], + "versions": [ + "versions.yml:md5,55d1e67ef8fa9d0ea3065363a653ffef" + ] + }, + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.4.0-1" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-16T12:16:48.158061416" + }, + "sarscov2 - fastq - pe": { + "content": [ + "2d64e4363d9f3c0e2167fce49d5087cf", + "test.bam.bai", + [ + "versions.yml:md5,55d1e67ef8fa9d0ea3065363a653ffef" + ], + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.4.0-1" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-16T12:25:23.63061876" + }, + "SRR389222 - fastq - se": { + "content": [ + "3d5b94990c7fdf90a682edb5ee0f59de", + "test.bam.bai", + [ + "versions.yml:md5,55d1e67ef8fa9d0ea3065363a653ffef" + ], + { + "PARABRICKS_FQ2BAM": { + "pbrun": "4.4.0-1" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-16T12:24:32.45197929" + } +} \ No newline at end of file diff --git a/modules/nf-core/parabricks/fq2bam/tests/nextflow.config b/modules/nf-core/parabricks/fq2bam/tests/nextflow.config new file mode 100644 index 0000000000..6f58b220de --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: 'PARABRICKS_FQ2BAM' { + ext.args = params.module_args + } + +} diff --git a/subworkflows/nf-core/fastq_align_parabricks/main.nf b/subworkflows/nf-core/fastq_align_parabricks/main.nf new file mode 100644 index 0000000000..e3ad9c598a --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/main.nf @@ -0,0 +1,57 @@ +// +// Alignment and BQSR with Nvidia CLARA Parabricks +// +include { PARABRICKS_FQ2BAM } from '../../../modules/nf-core/parabricks/fq2bam/main' +include { PARABRICKS_APPLYBQSR } from '../../../modules/nf-core/parabricks/applybqsr/main' + +workflow FASTQ_ALIGN_PARABRICKS { + + take: + ch_reads // channel: [mandatory] meta, reads + ch_fasta // channel: [mandatory] meta, fasta + ch_index // channel: [mandatory] meta, index + ch_interval_file // channel: [optional] meta, intervals_bed_combined + ch_known_sites // channel [optional] known_sites_indels + + main: + ch_versions = Channel.empty() + ch_bam = Channel.empty() + ch_bai = Channel.empty() + ch_bqsr_table = Channel.empty() + ch_qc_metrics = Channel.empty() + ch_duplicate_metrics = Channel.empty() + + PARABRICKS_FQ2BAM( + ch_reads, + ch_fasta, + ch_index, + ch_interval_file, + ch_known_sites + ) + + // Collecting FQ2BAM outputs + ch_bam = ch_bam.mix(PARABRICKS_FQ2BAM.out.bam) + ch_bai = ch_bai.mix(PARABRICKS_FQ2BAM.out.bai) + ch_qc_metrics = ch_qc_metrics.mix(PARABRICKS_FQ2BAM.out.qc_metrics) + ch_bqsr_table = ch_bqsr_table.mix(PARABRICKS_FQ2BAM.out.bqsr_table) + ch_duplicate_metrics = ch_duplicate_metrics.mix(PARABRICKS_FQ2BAM.out.duplicate_metrics) + ch_versions = ch_versions.mix(PARABRICKS_FQ2BAM.out.versions) + + // Apply BQSR + PARABRICKS_APPLYBQSR( + ch_bam, + ch_bai, + ch_bqsr_table.ifEmpty([]), + ch_interval_file, + ch_fasta + ) + ch_versions = ch_versions.mix(PARABRICKS_APPLYBQSR.out.versions) + + emit: + bam = PARABRICKS_APPLYBQSR.out.bam // channel: [ [meta], bam ] + bai = PARABRICKS_APPLYBQSR.out.bai // channel: [ [meta], bai ] + qc_metrics = ch_qc_metrics // channel: [ [meta], qc_metrics ] + duplicate_metrics = ch_duplicate_metrics // channel: [ [meta], duplicate_metrics ] + bqsr_table = ch_bqsr_table // channel: [ [meta], bqsr_table ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_parabricks/meta.yml b/subworkflows/nf-core/fastq_align_parabricks/meta.yml new file mode 100644 index 0000000000..27fe1ab34d --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_parabricks" +description: Align a fastq file using GPU-based acceleration +keywords: + - fastq + - align + - parabricks + - gpu + - preprocessing +components: + - parabricks/fq2bam + - parabricks/applybqsr +input: + - ch_reads: + type: file + description: | + Channel containing reads (either one file for se or two files for pe) + Structure: [ val(meta), [ path(fastq1), path(fastq2) ] ] + - ch_fasta: + type: file + description: | + Channel containing reference fasta file + Structure: [ val(meta), path(fasta) ] + - ch_index: + type: file + description: | + Channel containing reference BWA index + Structure: [ val(meta), path(.{amb,ann,bwt,pac,sa}) ] + - ch_interval_file: + type: file + description: | + (optional) file(s) containing genomic intervals for use in base + quality score recalibration (BQSR) + Structure: [ val(meta), path(.{bed,interval_list,picard,list,intervals}) ] + - ch_known_sites: + type: file + description: | + (optional) known sites file(s) for calculating BQSR. markdups must + be true to perform BQSR. + Structure [ path(vcf) ] +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test b/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test new file mode 100644 index 0000000000..7f102f528a --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_PARABRICKS" + script "../main.nf" + workflow "FASTQ_ALIGN_PARABRICKS" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_parabricks" + tag "parabricks" + tag "parabricks/fq2bam" + tag "parabricks/applybqsr" + tag "bwa" + tag "bwa/index" + tag "gpu" + + setup { + run("BWA_INDEX") { + script "../../../../modules/nf-core/bwa/index/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + } + + test("sarscov2 single-end [fastq_gz]") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = Channel.value([ + [id: 'reference'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX.out.index + input[3] = Channel.value([ + [id: 'intervals'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/picard/baits.interval_list', checkIfExists: true) + ]) + input[4] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.versions + ).match() + } + ) + } + } + + test("sarscov2 paired-end [fastq_gz]") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.value([ + [id: 'reference'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX.out.index + input[3] = Channel.value([ + [id: 'intervals'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/picard/baits.interval_list', checkIfExists: true) + ]) + input[4] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.versions + ).match() + } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test.snap new file mode 100644 index 0000000000..1535c2619e --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test.snap @@ -0,0 +1,40 @@ +{ + "sarscov2 single-end [fastq_gz]": { + "content": [ + [ + "7e2bd786d964e42ddbc2ab0c9f340b09" + ], + [ + "test.recal.bam.bai" + ], + [ + "versions.yml:md5,0d8766379e89038cb5fdcd074f3289f6", + "versions.yml:md5,df165e28f025dad39d826caead132115" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-17T16:25:03.460025311" + }, + "sarscov2 paired-end [fastq_gz]": { + "content": [ + [ + "73e8e89cda8fce1cf07bdebff0f793ec" + ], + [ + "test.recal.bam.bai" + ], + [ + "versions.yml:md5,0d8766379e89038cb5fdcd074f3289f6", + "versions.yml:md5,df165e28f025dad39d826caead132115" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-17T16:26:01.468588642" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_parabricks/tests/nextflow.config b/subworkflows/nf-core/fastq_align_parabricks/tests/nextflow.config new file mode 100644 index 0000000000..335587c2bf --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/tests/nextflow.config @@ -0,0 +1,15 @@ +process { + + withName: 'PARABRICKS_FQ2BAM' { + ext.args = '--low-memory' + } + + // Ref: https://forums.developer.nvidia.com/t/problem-with-gpu/256825/6 + // Parabricks’s fq2bam requires 24GB of memory. + // Using --low-memory for testing + + withName: 'PARABRICKS_APPLYBQSR' { + ext.prefix = { "${meta.id}.recal" } + } + +} diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index f554cd9ddd..c940853d2e 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -37,6 +37,8 @@ include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../../subwor // Map input reads to reference genome include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main' +// Map input reads to reference genome using GPU +include { FASTQ_ALIGN_PARABRICKS } from '../../subworkflows/nf-core/fastq_align_parabricks/main' // Merge and index BAM files (optional) include { BAM_MERGE_INDEX_SAMTOOLS } from '../../subworkflows/local/bam_merge_index_samtools/main' @@ -289,328 +291,334 @@ workflow SAREK { else [ meta, reads ] } - // reads will be sorted - sort_bam = true - FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON(reads_for_alignment, index_alignment, sort_bam, fasta, fasta_fai) - - // Grouping the bams from the same samples not to stall the workflow - // Use groupKey to make sure that the correct group can advance as soon as it is complete - // and not stall the workflow until all reads from all channels are mapped - bam_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bam - .combine(reads_grouping_key) // Creates a tuple of [ meta, bam, reads_grouping_key ] - .filter { meta1, bam, meta2 -> meta1.sample == meta2.sample } - // Add n_fastq and other variables to meta - .map { meta1, bam, meta2 -> - [ meta1 + meta2, bam ] - } - // Manipulate meta map to remove old fields and add new ones - .map { meta, bam -> - [ meta - meta.subMap('id', 'read_group', 'data_type', 'num_lanes', 'read_group', 'size') + [ data_type: 'bam', id: meta.sample ], bam ] - } - // Create groupKey from meta map - .map { meta, bam -> - [ groupKey( meta, meta.n_fastq), bam ] - } - // Group - .groupTuple() + if (params.gpu_aligner) { + // TODO: implement cram output for parbricks modules + // TODO: remove applybqsr from align subworkflow as its included in the fq2bam module + FASTQ_ALIGN_PARABRICKS(reads_for_alignment, fasta, index_alignment, intervals_for_preprocessing, []) + cram_variant_calling = FASTQ_ALIGN_PARABRICKS.out.bam + } else { + // reads will be sorted + sort_bam = true + FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON(reads_for_alignment, index_alignment, sort_bam, fasta, fasta_fai) + + // Grouping the bams from the same samples not to stall the workflow + // Use groupKey to make sure that the correct group can advance as soon as it is complete + // and not stall the workflow until all reads from all channels are mapped + bam_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bam + .combine(reads_grouping_key) // Creates a tuple of [ meta, bam, reads_grouping_key ] + .filter { meta1, bam, meta2 -> meta1.sample == meta2.sample } + // Add n_fastq and other variables to meta + .map { meta1, bam, meta2 -> + [ meta1 + meta2, bam ] + } + // Manipulate meta map to remove old fields and add new ones + .map { meta, bam -> + [ meta - meta.subMap('id', 'read_group', 'data_type', 'num_lanes', 'read_group', 'size') + [ data_type: 'bam', id: meta.sample ], bam ] + } + // Create groupKey from meta map + .map { meta, bam -> + [ groupKey( meta, meta.n_fastq), bam ] + } + // Group + .groupTuple() + + bai_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bai + .combine(reads_grouping_key) // Creates a tuple of [ meta, bai, reads_grouping_key ] + .filter { meta1, bai, meta2 -> meta1.sample == meta2.sample } + // Add n_fastq and other variables to meta + .map { meta1, bai, meta2 -> + [ meta1 + meta2, bai ] + } + // Manipulate meta map to remove old fields and add new ones + .map { meta, bai -> + [ meta - meta.subMap('id', 'read_group', 'data_type', 'num_lanes', 'read_group', 'size') + [ data_type: 'bai', id: meta.sample ], bai ] + } + // Create groupKey from meta map + .map { meta, bai -> + [ groupKey( meta, meta.n_fastq), bai ] + } + // Group + .groupTuple() + + + // gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here + // Except if and only if save_mapped or (skipping markduplicates and sentieon-dedup) + if ( + params.save_mapped || + ( + (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) && + !(params.tools && params.tools.split(',').contains('sentieon_dedup')) + ) + ) { + // bams are merged (when multiple lanes from the same sample), indexed and then converted to cram + BAM_MERGE_INDEX_SAMTOOLS(bam_mapped) + + BAM_TO_CRAM_MAPPING(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, fasta, fasta_fai) + // Create CSV to restart from this step + if (params.save_output_as_bam) CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, params.outdir, params.save_output_as_bam) + else CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true), params.outdir, params.save_output_as_bam) - bai_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bai - .combine(reads_grouping_key) // Creates a tuple of [ meta, bai, reads_grouping_key ] - .filter { meta1, bai, meta2 -> meta1.sample == meta2.sample } - // Add n_fastq and other variables to meta - .map { meta1, bai, meta2 -> - [ meta1 + meta2, bai ] - } - // Manipulate meta map to remove old fields and add new ones - .map { meta, bai -> - [ meta - meta.subMap('id', 'read_group', 'data_type', 'num_lanes', 'read_group', 'size') + [ data_type: 'bai', id: meta.sample ], bai ] - } - // Create groupKey from meta map - .map { meta, bai -> - [ groupKey( meta, meta.n_fastq), bai ] + // Gather used softwares versions + versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) + versions = versions.mix(BAM_TO_CRAM_MAPPING.out.versions) } - // Group - .groupTuple() + // Gather used softwares versions + versions = versions.mix(CONVERT_FASTQ_INPUT.out.versions) + versions = versions.mix(FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.versions) + } - // gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here - // Except if and only if save_mapped or (skipping markduplicates and sentieon-dedup) - if ( - params.save_mapped || - ( - (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) && - !(params.tools && params.tools.split(',').contains('sentieon_dedup')) - ) - ) { - // bams are merged (when multiple lanes from the same sample), indexed and then converted to cram - BAM_MERGE_INDEX_SAMTOOLS(bam_mapped) + if (params.step in ['mapping', 'markduplicates']) { - BAM_TO_CRAM_MAPPING(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, fasta, fasta_fai) - // Create CSV to restart from this step - if (params.save_output_as_bam) CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, params.outdir, params.save_output_as_bam) - else CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true), params.outdir, params.save_output_as_bam) + // ch_cram_no_markduplicates_restart = Channel.empty() + cram_markduplicates_no_spark = Channel.empty() + cram_sentieon_dedup = Channel.empty() + cram_markduplicates_spark = Channel.empty() - // Gather used softwares versions - versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) - versions = versions.mix(BAM_TO_CRAM_MAPPING.out.versions) - } + // STEP 2: markduplicates (+QC) + convert to CRAM - // Gather used softwares versions - versions = versions.mix(CONVERT_FASTQ_INPUT.out.versions) - versions = versions.mix(FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.versions) - } + // ch_bam_for_markduplicates will contain bam mapped with FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON when step is mapping + // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration + cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map{ meta, input, index -> [ meta, input ] } + // if no MD is done, then run QC on mapped & converted CRAM files + // or the input BAM (+converted) or CRAM files + cram_skip_markduplicates = Channel.empty() - if (params.step in ['mapping', 'markduplicates']) { + // Should it be possible to restart from converted crams? + // For now, conversion from bam to cram is only done when skipping markduplicates - // ch_cram_no_markduplicates_restart = Channel.empty() - cram_markduplicates_no_spark = Channel.empty() - cram_sentieon_dedup = Channel.empty() - cram_markduplicates_spark = Channel.empty() + if ( + params.skip_tools && + params.skip_tools.split(',').contains('markduplicates') && + !(params.tools && params.tools.split(',').contains('sentieon_dedup')) + ) { + if (params.step == 'mapping') { + cram_skip_markduplicates = BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true) + } else { + cram_skip_markduplicates = Channel.empty().mix(input_sample) + } - // STEP 2: markduplicates (+QC) + convert to CRAM + CRAM_QC_NO_MD(cram_skip_markduplicates, fasta, intervals_for_preprocessing) - // ch_bam_for_markduplicates will contain bam mapped with FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON when step is mapping - // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration - cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map{ meta, input, index -> [ meta, input ] } - // if no MD is done, then run QC on mapped & converted CRAM files - // or the input BAM (+converted) or CRAM files - cram_skip_markduplicates = Channel.empty() + // Gather QC reports + reports = reports.mix(CRAM_QC_NO_MD.out.reports.collect{ meta, report -> [ report ] }) - // Should it be possible to restart from converted crams? - // For now, conversion from bam to cram is only done when skipping markduplicates + // Gather used softwares versions + versions = versions.mix(CRAM_QC_NO_MD.out.versions) + } else if (params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates')) { + BAM_MARKDUPLICATES_SPARK( + cram_for_markduplicates, + dict, + fasta, + fasta_fai, + intervals_for_preprocessing) + cram_markduplicates_spark = BAM_MARKDUPLICATES_SPARK.out.cram - if ( - params.skip_tools && - params.skip_tools.split(',').contains('markduplicates') && - !(params.tools && params.tools.split(',').contains('sentieon_dedup')) - ) { - if (params.step == 'mapping') { - cram_skip_markduplicates = BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true) - } else { - cram_skip_markduplicates = Channel.empty().mix(input_sample) - } + // Gather QC reports + reports = reports.mix(BAM_MARKDUPLICATES_SPARK.out.reports.collect{ meta, report -> [ report ] }) - CRAM_QC_NO_MD(cram_skip_markduplicates, fasta, intervals_for_preprocessing) + // Gather used softwares versions + versions = versions.mix(BAM_MARKDUPLICATES_SPARK.out.versions) + } else if (params.tools && params.tools.split(',').contains('sentieon_dedup')) { + crai_for_markduplicates = params.step == 'mapping' ? bai_mapped : input_sample.map{ meta, input, index -> [ meta, index ] } + BAM_SENTIEON_DEDUP( + cram_for_markduplicates, + crai_for_markduplicates, + fasta, + fasta_fai, + intervals_for_preprocessing) - // Gather QC reports - reports = reports.mix(CRAM_QC_NO_MD.out.reports.collect{ meta, report -> [ report ] }) + cram_sentieon_dedup = BAM_SENTIEON_DEDUP.out.cram - // Gather used softwares versions - versions = versions.mix(CRAM_QC_NO_MD.out.versions) - } else if (params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates')) { - BAM_MARKDUPLICATES_SPARK( - cram_for_markduplicates, - dict, - fasta, - fasta_fai, - intervals_for_preprocessing) - cram_markduplicates_spark = BAM_MARKDUPLICATES_SPARK.out.cram + // Gather QC reports + reports = reports.mix(BAM_SENTIEON_DEDUP.out.reports.collect{ meta, report -> [ report ] }) - // Gather QC reports - reports = reports.mix(BAM_MARKDUPLICATES_SPARK.out.reports.collect{ meta, report -> [ report ] }) + // Gather used softwares versions + versions = versions.mix(BAM_SENTIEON_DEDUP.out.versions) + } else { + BAM_MARKDUPLICATES( + cram_for_markduplicates, + fasta, + fasta_fai, + intervals_for_preprocessing) - // Gather used softwares versions - versions = versions.mix(BAM_MARKDUPLICATES_SPARK.out.versions) - } else if (params.tools && params.tools.split(',').contains('sentieon_dedup')) { - crai_for_markduplicates = params.step == 'mapping' ? bai_mapped : input_sample.map{ meta, input, index -> [ meta, index ] } - BAM_SENTIEON_DEDUP( - cram_for_markduplicates, - crai_for_markduplicates, - fasta, - fasta_fai, - intervals_for_preprocessing) + cram_markduplicates_no_spark = BAM_MARKDUPLICATES.out.cram - cram_sentieon_dedup = BAM_SENTIEON_DEDUP.out.cram + // Gather QC reports + reports = reports.mix(BAM_MARKDUPLICATES.out.reports.collect{ meta, report -> [ report ] }) - // Gather QC reports - reports = reports.mix(BAM_SENTIEON_DEDUP.out.reports.collect{ meta, report -> [ report ] }) + // Gather used softwares versions + versions = versions.mix(BAM_MARKDUPLICATES.out.versions) + } - // Gather used softwares versions - versions = versions.mix(BAM_SENTIEON_DEDUP.out.versions) - } else { - BAM_MARKDUPLICATES( - cram_for_markduplicates, - fasta, - fasta_fai, - intervals_for_preprocessing) + // ch_md_cram_for_restart contains either: + // - crams from markduplicates + // - crams from sentieon_dedup + // - crams from markduplicates_spark + // - crams from input step markduplicates --> from the converted ones only? + ch_md_cram_for_restart = Channel.empty().mix(cram_markduplicates_no_spark, cram_markduplicates_spark, cram_sentieon_dedup) + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } - cram_markduplicates_no_spark = BAM_MARKDUPLICATES.out.cram + // If params.save_output_as_bam, then convert CRAM files to BAM + CRAM_TO_BAM(ch_md_cram_for_restart, fasta, fasta_fai) + versions = versions.mix(CRAM_TO_BAM.out.versions) - // Gather QC reports - reports = reports.mix(BAM_MARKDUPLICATES.out.reports.collect{ meta, report -> [ report ] }) + // CSV should be written for the file actually out, either CRAM or BAM + // Create CSV to restart from this step + csv_subfolder = (params.tools && params.tools.split(',').contains('sentieon_dedup')) ? 'sentieon_dedup' : 'markduplicates' - // Gather used softwares versions - versions = versions.mix(BAM_MARKDUPLICATES.out.versions) + if (params.save_output_as_bam) CHANNEL_MARKDUPLICATES_CREATE_CSV(CRAM_TO_BAM.out.bam.join(CRAM_TO_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true), csv_subfolder, params.outdir, params.save_output_as_bam) + else CHANNEL_MARKDUPLICATES_CREATE_CSV(ch_md_cram_for_restart, csv_subfolder, params.outdir, params.save_output_as_bam) } - // ch_md_cram_for_restart contains either: - // - crams from markduplicates - // - crams from sentieon_dedup - // - crams from markduplicates_spark - // - crams from input step markduplicates --> from the converted ones only? - ch_md_cram_for_restart = Channel.empty().mix(cram_markduplicates_no_spark, cram_markduplicates_spark, cram_sentieon_dedup) - // Make sure correct data types are carried through - .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } - - // If params.save_output_as_bam, then convert CRAM files to BAM - CRAM_TO_BAM(ch_md_cram_for_restart, fasta, fasta_fai) - versions = versions.mix(CRAM_TO_BAM.out.versions) - - // CSV should be written for the file actually out, either CRAM or BAM - // Create CSV to restart from this step - csv_subfolder = (params.tools && params.tools.split(',').contains('sentieon_dedup')) ? 'sentieon_dedup' : 'markduplicates' - - if (params.save_output_as_bam) CHANNEL_MARKDUPLICATES_CREATE_CSV(CRAM_TO_BAM.out.bam.join(CRAM_TO_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true), csv_subfolder, params.outdir, params.save_output_as_bam) - else CHANNEL_MARKDUPLICATES_CREATE_CSV(ch_md_cram_for_restart, csv_subfolder, params.outdir, params.save_output_as_bam) - } + if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration']) { - if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration']) { + // Run if starting from step "prepare_recalibration" + if (params.step == 'prepare_recalibration') { - // Run if starting from step "prepare_recalibration" - if (params.step == 'prepare_recalibration') { + ch_cram_for_bam_baserecalibrator = Channel.empty().mix(input_sample) - ch_cram_for_bam_baserecalibrator = Channel.empty().mix(input_sample) + // Set the input samples for restart so we generate a samplesheet that contains the input files together with the recalibration table + ch_md_cram_for_restart = ch_cram_for_bam_baserecalibrator - // Set the input samples for restart so we generate a samplesheet that contains the input files together with the recalibration table - ch_md_cram_for_restart = ch_cram_for_bam_baserecalibrator + } else { - } else { + // ch_cram_for_bam_baserecalibrator contains either: + // - crams from markduplicates + // - crams from markduplicates_spark + // - crams converted from bam mapped when skipping markduplicates + // - input cram files, when start from step markduplicates + ch_cram_for_bam_baserecalibrator = Channel.empty().mix(ch_md_cram_for_restart, cram_skip_markduplicates ) + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } - // ch_cram_for_bam_baserecalibrator contains either: - // - crams from markduplicates - // - crams from markduplicates_spark - // - crams converted from bam mapped when skipping markduplicates - // - input cram files, when start from step markduplicates - ch_cram_for_bam_baserecalibrator = Channel.empty().mix(ch_md_cram_for_restart, cram_skip_markduplicates ) - // Make sure correct data types are carried through - .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + } - } + // STEP 3: Create recalibration tables + if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { - // STEP 3: Create recalibration tables - if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { + ch_table_bqsr_no_spark = Channel.empty() + ch_table_bqsr_spark = Channel.empty() - ch_table_bqsr_no_spark = Channel.empty() - ch_table_bqsr_spark = Channel.empty() + if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) { + BAM_BASERECALIBRATOR_SPARK( + ch_cram_for_bam_baserecalibrator, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals, + known_sites_indels, + known_sites_indels_tbi) - if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) { - BAM_BASERECALIBRATOR_SPARK( - ch_cram_for_bam_baserecalibrator, - dict, - fasta, - fasta_fai, - intervals_and_num_intervals, - known_sites_indels, - known_sites_indels_tbi) + ch_table_bqsr_spark = BAM_BASERECALIBRATOR_SPARK.out.table_bqsr - ch_table_bqsr_spark = BAM_BASERECALIBRATOR_SPARK.out.table_bqsr + // Gather used softwares versions + versions = versions.mix(BAM_BASERECALIBRATOR_SPARK.out.versions) + } else { - // Gather used softwares versions - versions = versions.mix(BAM_BASERECALIBRATOR_SPARK.out.versions) - } else { + BAM_BASERECALIBRATOR( + ch_cram_for_bam_baserecalibrator, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals, + known_sites_indels, + known_sites_indels_tbi) - BAM_BASERECALIBRATOR( - ch_cram_for_bam_baserecalibrator, - dict, - fasta, - fasta_fai, - intervals_and_num_intervals, - known_sites_indels, - known_sites_indels_tbi) + ch_table_bqsr_no_spark = BAM_BASERECALIBRATOR.out.table_bqsr - ch_table_bqsr_no_spark = BAM_BASERECALIBRATOR.out.table_bqsr + // Gather used softwares versions + versions = versions.mix(BAM_BASERECALIBRATOR.out.versions) + } - // Gather used softwares versions - versions = versions.mix(BAM_BASERECALIBRATOR.out.versions) - } - - // ch_table_bqsr contains either: - // - bqsr table from baserecalibrator - // - bqsr table from baserecalibrator_spark - ch_table_bqsr = Channel.empty().mix( - ch_table_bqsr_no_spark, - ch_table_bqsr_spark) + // ch_table_bqsr contains either: + // - bqsr table from baserecalibrator + // - bqsr table from baserecalibrator_spark + ch_table_bqsr = Channel.empty().mix( + ch_table_bqsr_no_spark, + ch_table_bqsr_spark) - reports = reports.mix(ch_table_bqsr.collect{ meta, table -> [ table ] }) + reports = reports.mix(ch_table_bqsr.collect{ meta, table -> [ table ] }) - cram_applybqsr = ch_cram_for_bam_baserecalibrator.join(ch_table_bqsr, failOnDuplicate: true, failOnMismatch: true) + cram_applybqsr = ch_cram_for_bam_baserecalibrator.join(ch_table_bqsr, failOnDuplicate: true, failOnMismatch: true) - // Create CSV to restart from this step - CHANNEL_BASERECALIBRATOR_CREATE_CSV(ch_md_cram_for_restart.join(ch_table_bqsr, failOnDuplicate: true), params.tools, params.skip_tools, params.outdir, params.save_output_as_bam) + // Create CSV to restart from this step + CHANNEL_BASERECALIBRATOR_CREATE_CSV(ch_md_cram_for_restart.join(ch_table_bqsr, failOnDuplicate: true), params.tools, params.skip_tools, params.outdir, params.save_output_as_bam) + } } - } - // STEP 4: RECALIBRATING - if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate']) { + // STEP 4: RECALIBRATING + if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate']) { - // Run if starting from step "prepare_recalibration" - if (params.step == 'recalibrate') { + // Run if starting from step "prepare_recalibration" + if (params.step == 'recalibrate') { - cram_applybqsr = Channel.empty().mix(input_sample) + cram_applybqsr = Channel.empty().mix(input_sample) - } + } - if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { - cram_variant_calling_no_spark = Channel.empty() - cram_variant_calling_spark = Channel.empty() + if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { + cram_variant_calling_no_spark = Channel.empty() + cram_variant_calling_spark = Channel.empty() - if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) { + if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) { - BAM_APPLYBQSR_SPARK( - cram_applybqsr, - dict, - fasta, - fasta_fai, - intervals_and_num_intervals) + BAM_APPLYBQSR_SPARK( + cram_applybqsr, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals) - cram_variant_calling_spark = BAM_APPLYBQSR_SPARK.out.cram + cram_variant_calling_spark = BAM_APPLYBQSR_SPARK.out.cram - // Gather used softwares versions - versions = versions.mix(BAM_APPLYBQSR_SPARK.out.versions) + // Gather used softwares versions + versions = versions.mix(BAM_APPLYBQSR_SPARK.out.versions) - } else { + } else { - BAM_APPLYBQSR( - cram_applybqsr, - dict, - fasta, - fasta_fai, - intervals_and_num_intervals) + BAM_APPLYBQSR( + cram_applybqsr, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals) - cram_variant_calling_no_spark = BAM_APPLYBQSR.out.cram + cram_variant_calling_no_spark = BAM_APPLYBQSR.out.cram - // Gather used softwares versions - versions = versions.mix(BAM_APPLYBQSR.out.versions) - } + // Gather used softwares versions + versions = versions.mix(BAM_APPLYBQSR.out.versions) + } - cram_variant_calling = Channel.empty().mix( - cram_variant_calling_no_spark, - cram_variant_calling_spark) + cram_variant_calling = Channel.empty().mix( + cram_variant_calling_no_spark, + cram_variant_calling_spark) - // If params.save_output_as_bam, then convert CRAM files to BAM - CRAM_TO_BAM_RECAL(cram_variant_calling, fasta, fasta_fai) - versions = versions.mix(CRAM_TO_BAM_RECAL.out.versions) + // If params.save_output_as_bam, then convert CRAM files to BAM + CRAM_TO_BAM_RECAL(cram_variant_calling, fasta, fasta_fai) + versions = versions.mix(CRAM_TO_BAM_RECAL.out.versions) - // CSV should be written for the file actually out out, either CRAM or BAM - csv_recalibration = Channel.empty() - csv_recalibration = params.save_output_as_bam ? CRAM_TO_BAM_RECAL.out.bam.join(CRAM_TO_BAM_RECAL.out.bai, failOnDuplicate: true, failOnMismatch: true) : cram_variant_calling + // CSV should be written for the file actually out out, either CRAM or BAM + csv_recalibration = Channel.empty() + csv_recalibration = params.save_output_as_bam ? CRAM_TO_BAM_RECAL.out.bam.join(CRAM_TO_BAM_RECAL.out.bai, failOnDuplicate: true, failOnMismatch: true) : cram_variant_calling - // Create CSV to restart from this step - CHANNEL_APPLYBQSR_CREATE_CSV(csv_recalibration, params.outdir, params.save_output_as_bam) + // Create CSV to restart from this step + CHANNEL_APPLYBQSR_CREATE_CSV(csv_recalibration, params.outdir, params.save_output_as_bam) - } else if (params.step == 'recalibrate') { - // cram_variant_calling contains either: - // - input bams converted to crams, if started from step recal + skip BQSR - // - input crams if started from step recal + skip BQSR - cram_variant_calling = Channel.empty().mix(input_sample.map{ meta, cram, crai, table -> [ meta, cram, crai ] }) - } else { - // cram_variant_calling contains either: - // - crams from markduplicates = ch_cram_for_bam_baserecalibrator if skip BQSR but not started from step recalibration - cram_variant_calling = Channel.empty().mix(ch_cram_for_bam_baserecalibrator) + } else if (params.step == 'recalibrate') { + // cram_variant_calling contains either: + // - input bams converted to crams, if started from step recal + skip BQSR + // - input crams if started from step recal + skip BQSR + cram_variant_calling = Channel.empty().mix(input_sample.map{ meta, cram, crai, table -> [ meta, cram, crai ] }) + } else { + // cram_variant_calling contains either: + // - crams from markduplicates = ch_cram_for_bam_baserecalibrator if skip BQSR but not started from step recalibration + cram_variant_calling = Channel.empty().mix(ch_cram_for_bam_baserecalibrator) + } } } - if (params.step == 'variant_calling') { cram_variant_calling = Channel.empty().mix( input_sample )