From 18cf83640b0f2ec890bcacabbb8d9ccefa54dad0 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Wed, 26 Jun 2024 11:08:07 +0100 Subject: [PATCH 01/27] add --header optional argument --- nextflow.config | 1 + nextflow_schema.json | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index d09f872..b10fe94 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,6 +14,7 @@ params { vector_db = "${projectDir}/assets/vectorDB.tar.gz" bwamem2_index = null fasta = null + header = null // Execution options use_work_dir_as_temp = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 737b3c2..20533b4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -26,7 +26,8 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "./results" }, "vector_db": { "type": "string", @@ -64,6 +65,12 @@ "description": "Path to directory or tar.gz archive for pre-built BWAMEM2 index.", "format": "path", "fa_icon": "fas fa-bezier-curve" + }, + "header": { + "type": "string", + "format": "path", + "description": "Optional template header file for BAM/CRAM outputs", + "fa_icon": "far fa-file-code" } } }, From 7a11e6be0b1ac76d9eaa05abb4ff68f5939ebada Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Wed, 26 Jun 2024 11:08:50 +0100 Subject: [PATCH 02/27] install nf-core/samtools/reheader module --- .../nf-core/samtools/reheader/environment.yml | 8 +++ modules/nf-core/samtools/reheader/main.nf | 49 ++++++++++++++++++ modules/nf-core/samtools/reheader/meta.yml | 50 +++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 modules/nf-core/samtools/reheader/environment.yml create mode 100644 modules/nf-core/samtools/reheader/main.nf create mode 100644 modules/nf-core/samtools/reheader/meta.yml diff --git a/modules/nf-core/samtools/reheader/environment.yml b/modules/nf-core/samtools/reheader/environment.yml new file mode 100644 index 0000000..aa9c798 --- /dev/null +++ b/modules/nf-core/samtools/reheader/environment.yml @@ -0,0 +1,8 @@ +name: samtools_reheader +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/reheader/main.nf b/modules/nf-core/samtools/reheader/main.nf new file mode 100644 index 0000000..24192ea --- /dev/null +++ b/modules/nf-core/samtools/reheader/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_REHEADER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0': + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if ("$bam" == "${meta.id}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + reheader \\ + $args \\ + $bam \\ + > ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + ccat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/reheader/meta.yml b/modules/nf-core/samtools/reheader/meta.yml new file mode 100644 index 0000000..a5e1591 --- /dev/null +++ b/modules/nf-core/samtools/reheader/meta.yml @@ -0,0 +1,50 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json + +name: "samtools_reheader" +description: | + Replace the header in the bam file with the header generated by the command. + This command is much faster than replacing the header with a BAM→SAM→BAM conversion. +keywords: + - reheader + - cram + - bam + - genomics +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: BAM/CRAM file to be reheaded + pattern: "*.{bam,cram}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Reheaded BAM/CRAM file + pattern: "*.{bam,cram}" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" From 6fcc80a093beed655ef8b0dd76706f2c0dde627c Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Wed, 26 Jun 2024 11:09:21 +0100 Subject: [PATCH 03/27] install nf-core/samtools/reheader module --- modules.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules.json b/modules.json index 81077ac..3331b0d 100644 --- a/modules.json +++ b/modules.json @@ -66,6 +66,11 @@ "installed_by": ["modules"], "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, + "samtools/reheader": { + "branch": "master", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "installed_by": ["modules"] + }, "samtools/stats": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", From 55172c730bc9ddf4fdf6e482d9f084f18d8ae7ec Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 13:50:28 +0100 Subject: [PATCH 04/27] default config for samtools_reheader --- conf/modules.config | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index c9e3628..3533e40 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -28,6 +28,13 @@ process { ext.prefix = { "${meta.id}.merge" } } + // If custom header provided, this is inserted in place of existing + // @HD and @SQ lines, while preserving any other header entries + withName: SAMTOOLS_REHEADER { + ext.prefix = { "${meta.id}.reheader" } + ext.args = { '-c \'grep -v ^@HD\\|^@SQ\'' } + } + withName: SAMTOOLS_COLLATETOFASTA { ext.args = { (params.use_work_dir_as_temp ? "-T." : "") } } From 7a460b04b0e59a6c6e74b4aa00e4f6d7bf7ad2a8 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 13:52:04 +0100 Subject: [PATCH 05/27] switch to custom samtools reheader wrapper in local module --- .../samtools_replaceheader.nf} | 34 +++++++------ .../nf-core/samtools/reheader/environment.yml | 8 --- modules/nf-core/samtools/reheader/meta.yml | 50 ------------------- 3 files changed, 18 insertions(+), 74 deletions(-) rename modules/{nf-core/samtools/reheader/main.nf => local/samtools_replaceheader.nf} (50%) delete mode 100644 modules/nf-core/samtools/reheader/environment.yml delete mode 100644 modules/nf-core/samtools/reheader/meta.yml diff --git a/modules/nf-core/samtools/reheader/main.nf b/modules/local/samtools_replaceheader.nf similarity index 50% rename from modules/nf-core/samtools/reheader/main.nf rename to modules/local/samtools_replaceheader.nf index 24192ea..6e23e46 100644 --- a/modules/nf-core/samtools/reheader/main.nf +++ b/modules/local/samtools_replaceheader.nf @@ -2,32 +2,34 @@ process SAMTOOLS_REHEADER { tag "$meta.id" label 'process_single' - conda "${moduleDir}/environment.yml" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0': - 'biocontainers/samtools:1.20--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(file), path(header) output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.${meta.suffix}"), emit: bam + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = "${meta.suffix}" + def args = task.ext.args ?: '' - if ("$bam" == "${meta.id}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if ("$file" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools \\ - reheader \\ - $args \\ - $bam \\ - > ${prefix}.bam + if [ ! -z "${args}" ]; then + samtools reheader ${args} ${file} | \\ + samtools reheader ${header} - > ${prefix}.${suffix} + else + samtools reheader ${header} ${file} > ${prefix}.${suffix} + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -36,12 +38,12 @@ process SAMTOOLS_REHEADER { """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = file.name.split('\\.')[-1] """ - touch ${prefix}.bam + touch ${prefix}.${suffix} - ccat <<-END_VERSIONS > versions.yml + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS diff --git a/modules/nf-core/samtools/reheader/environment.yml b/modules/nf-core/samtools/reheader/environment.yml deleted file mode 100644 index aa9c798..0000000 --- a/modules/nf-core/samtools/reheader/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: samtools_reheader -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::samtools=1.20 - - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/reheader/meta.yml b/modules/nf-core/samtools/reheader/meta.yml deleted file mode 100644 index a5e1591..0000000 --- a/modules/nf-core/samtools/reheader/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json - -name: "samtools_reheader" -description: | - Replace the header in the bam file with the header generated by the command. - This command is much faster than replacing the header with a BAM→SAM→BAM conversion. -keywords: - - reheader - - cram - - bam - - genomics -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: http://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - bam: - type: file - description: BAM/CRAM file to be reheaded - pattern: "*.{bam,cram}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Reheaded BAM/CRAM file - pattern: "*.{bam,cram}" -authors: - - "@louislenezet" -maintainers: - - "@louislenezet" From eeb3b2980eb4ccd987f95aea4d30628de3d398be Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 13:52:19 +0100 Subject: [PATCH 06/27] switch to custom samtools reheader wrapper in local module --- modules.json | 67 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/modules.json b/modules.json index 3331b0d..bc96f67 100644 --- a/modules.json +++ b/modules.json @@ -8,86 +8,111 @@ "blast/blastn": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "crumble": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "efbf86bb487f288ac30660282709d9620dd6048e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/fastq": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, - "samtools/reheader": { - "branch": "master", - "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", - "installed_by": ["modules"] - }, "samtools/stats": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } } } } -} +} \ No newline at end of file From 0e58adef543f2066637a2c9caced65c2f6d98fb8 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 13:53:02 +0100 Subject: [PATCH 07/27] added subworkflow to insert custom sam header before cram generation --- workflows/readmapping.nf | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 18ebd36..854d3be 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -21,6 +21,13 @@ if (params.fasta) { ch_fasta = Channel.fromPath(params.fasta) } else { exit 1, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +// +// MODULE: Local modules +// + +include { SAMTOOLS_REHEADER } from '../modules/local/samtools_replaceheader' + + // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // @@ -48,7 +55,6 @@ include { CONVERT_STATS } from '../subworkflows/local/convert_st include { UNTAR } from '../modules/nf-core/untar/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -125,14 +131,34 @@ workflow READMAPPING { ALIGN_ONT ( PREPARE_GENOME.out.fasta, ch_reads.ont ) ch_versions = ch_versions.mix ( ALIGN_ONT.out.versions ) - + // gather alignments ch_aligned_bams = Channel.empty() | mix( ALIGN_HIC.out.bam ) | mix( ALIGN_ILLUMINA.out.bam ) | mix( ALIGN_HIFI.out.bam ) | mix( ALIGN_CLR.out.bam ) | mix( ALIGN_ONT.out.bam ) - CONVERT_STATS ( ch_aligned_bams, PREPARE_GENOME.out.fasta ) + + // Optionally insert params.header information to bams + ch_reheadered_bams = Channel.empty() + if ( params.header ) { + ch_combined = ch_aligned_bams.map { meta, bam, _ -> + def suffix = bam instanceof List ? bam[0].getExtension() : bam.getExtension() + meta.suffix = suffix // add suffix to meta so output matches input type + [meta, bam, file( params.header )] + } + SAMTOOLS_REHEADER( ch_combined ) + ch_reheadered_bams = SAMTOOLS_REHEADER.out.bam.map { bam -> bam + [[]] } + ch_versions = ch_versions.mix ( SAMTOOLS_REHEADER.out.versions ) + } else { + // If no reheadering is done, use the original aligned bams + ch_reheadered_bams = ch_aligned_bams + } + ch_aligned_bams.view() + ch_reheadered_bams.view() + + // convert to cram and gather stats + CONVERT_STATS ( ch_reheadered_bams, PREPARE_GENOME.out.fasta ) ch_versions = ch_versions.mix ( CONVERT_STATS.out.versions ) From 6e065b9688258432a2789ef8d1115936e3c96355 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 14:01:23 +0100 Subject: [PATCH 08/27] example header template for profile/test --- assets/GCA_922984935.2.subset.header.sam | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 assets/GCA_922984935.2.subset.header.sam diff --git a/assets/GCA_922984935.2.subset.header.sam b/assets/GCA_922984935.2.subset.header.sam new file mode 100644 index 0000000..ded409f --- /dev/null +++ b/assets/GCA_922984935.2.subset.header.sam @@ -0,0 +1,2 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:OV277441.1 LN:7999920 M5:0457acf8690429f1c98ee545cb8573b8 UR:https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz AS:GCA_922984935.2 AN:SUPER_1 SP:Meles meles From 23bcb91a16e0bb177bde5dd5fe5e5c72f9276380 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 14:08:32 +0100 Subject: [PATCH 09/27] remove test prints --- workflows/readmapping.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 854d3be..6fbc2ee 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -154,8 +154,6 @@ workflow READMAPPING { // If no reheadering is done, use the original aligned bams ch_reheadered_bams = ch_aligned_bams } - ch_aligned_bams.view() - ch_reheadered_bams.view() // convert to cram and gather stats CONVERT_STATS ( ch_reheadered_bams, PREPARE_GENOME.out.fasta ) From 79aef7df0c9b9b051b801a457b569f199233eeba Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 14:10:00 +0100 Subject: [PATCH 10/27] remove test prints and minor cleanup --- workflows/readmapping.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 6fbc2ee..1395230 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -148,7 +148,7 @@ workflow READMAPPING { [meta, bam, file( params.header )] } SAMTOOLS_REHEADER( ch_combined ) - ch_reheadered_bams = SAMTOOLS_REHEADER.out.bam.map { bam -> bam + [[]] } + ch_reheadered_bams = SAMTOOLS_REHEADER.out.bam.map { bam -> bam + [] } ch_versions = ch_versions.mix ( SAMTOOLS_REHEADER.out.versions ) } else { // If no reheadering is done, use the original aligned bams From 36079fe60656c7577dc1bf2df9f2b447524a890f Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 14:17:50 +0100 Subject: [PATCH 11/27] remove test prints and minor cleanup --- workflows/readmapping.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 1395230..844d2dc 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -148,10 +148,9 @@ workflow READMAPPING { [meta, bam, file( params.header )] } SAMTOOLS_REHEADER( ch_combined ) - ch_reheadered_bams = SAMTOOLS_REHEADER.out.bam.map { bam -> bam + [] } + ch_reheadered_bams = SAMTOOLS_REHEADER.out.bam.map { bam -> bam + [[]] } ch_versions = ch_versions.mix ( SAMTOOLS_REHEADER.out.versions ) } else { - // If no reheadering is done, use the original aligned bams ch_reheadered_bams = ch_aligned_bams } From c6568fe18cc3d763b6183632861b93b9c2aec6a6 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 14:43:29 +0100 Subject: [PATCH 12/27] docs for samtools_reheader --- docs/output.md | 4 ++++ docs/usage.md | 3 +++ 2 files changed, 7 insertions(+) diff --git a/docs/output.md b/docs/output.md index 9722d11..836af2f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -79,6 +79,10 @@ The filtered PacBio reads are aligned with `MINIMAP2_ALIGN`. The sorted and merg ## Alignment post-processing +### External metadata + +If provided using the `--header` option, all output alignments (`*.cram`) will include any additional metadata supplied as a SAM header template, replacing the existing *@HD* and *@SD* entries (note that this behaviour can be altered by modifying the `ext.args` for `SAMTOOLS_REHEADER` in `modules.config`. + ### Statistics The output alignments, along with the index, are used to calculate mapping statistics. Output files are generated using `SAMTOOLS_STATS`, `SAMTOOLS_FLAGSTAT` and `SAMTOOLS_IDXSTATS`. diff --git a/docs/usage.md b/docs/usage.md index 8c6923d..d63cf0f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -66,6 +66,9 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +You can also optionally supply a template SAM header using the `--header` option to add or modify metadata associated with the assembly, which will be incorporated into the output alignments. + + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: From af7636edfc7df93636f7b741216e7627b7e75d4c Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 28 Jun 2024 16:04:20 +0100 Subject: [PATCH 13/27] prettier linting --- docs/output.md | 2 +- docs/usage.md | 1 - modules.json | 62 +++++++++++++------------------------------------- 3 files changed, 17 insertions(+), 48 deletions(-) diff --git a/docs/output.md b/docs/output.md index 836af2f..e9b8b8c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -81,7 +81,7 @@ The filtered PacBio reads are aligned with `MINIMAP2_ALIGN`. The sorted and merg ### External metadata -If provided using the `--header` option, all output alignments (`*.cram`) will include any additional metadata supplied as a SAM header template, replacing the existing *@HD* and *@SD* entries (note that this behaviour can be altered by modifying the `ext.args` for `SAMTOOLS_REHEADER` in `modules.config`. +If provided using the `--header` option, all output alignments (`*.cram`) will include any additional metadata supplied as a SAM header template, replacing the existing _@HD_ and _@SD_ entries (note that this behaviour can be altered by modifying the `ext.args` for `SAMTOOLS_REHEADER` in `modules.config`. ### Statistics diff --git a/docs/usage.md b/docs/usage.md index d63cf0f..f539695 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -68,7 +68,6 @@ work # Directory containing the nextflow working files You can also optionally supply a template SAM header using the `--header` option to add or modify metadata associated with the assembly, which will be incorporated into the output alignments. - ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: diff --git a/modules.json b/modules.json index bc96f67..81077ac 100644 --- a/modules.json +++ b/modules.json @@ -8,111 +8,81 @@ "blast/blastn": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "crumble": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", "git_sha": "efbf86bb487f288ac30660282709d9620dd6048e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/fastq": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, "samtools/stats": { "branch": "master", "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } } } } -} \ No newline at end of file +} From 755731e7abc89c07413528722d818baaf59a2d8c Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Mon, 1 Jul 2024 10:03:24 +0100 Subject: [PATCH 14/27] moved assets/{example}.header.sam to hosted test_data --- assets/GCA_922984935.2.subset.header.sam | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 assets/GCA_922984935.2.subset.header.sam diff --git a/assets/GCA_922984935.2.subset.header.sam b/assets/GCA_922984935.2.subset.header.sam deleted file mode 100644 index ded409f..0000000 --- a/assets/GCA_922984935.2.subset.header.sam +++ /dev/null @@ -1,2 +0,0 @@ -@HD VN:1.0 SO:unsorted -@SQ SN:OV277441.1 LN:7999920 M5:0457acf8690429f1c98ee545cb8573b8 UR:https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz AS:GCA_922984935.2 AN:SUPER_1 SP:Meles meles From 1593a8dd750768cae7fb4f404b8751726576925a Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Mon, 1 Jul 2024 10:11:16 +0100 Subject: [PATCH 15/27] add reheader steps to -profile test runs --- conf/test.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test.config b/conf/test.config index 015885c..e269f68 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,4 +24,5 @@ params { // Fasta references fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz" + header = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.header.sam" } From 4ab46b0e659dc4c407399dfb5414abbf9dedd9f9 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Thu, 4 Jul 2024 13:32:54 +0100 Subject: [PATCH 16/27] fix reheader to only replace SQ lines; maintains input order --- conf/modules.config | 1 - modules/local/samtools_replaceheader.nf | 22 +++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 3533e40..1bb619f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -32,7 +32,6 @@ process { // @HD and @SQ lines, while preserving any other header entries withName: SAMTOOLS_REHEADER { ext.prefix = { "${meta.id}.reheader" } - ext.args = { '-c \'grep -v ^@HD\\|^@SQ\'' } } withName: SAMTOOLS_COLLATETOFASTA { diff --git a/modules/local/samtools_replaceheader.nf b/modules/local/samtools_replaceheader.nf index 6e23e46..0abbffe 100644 --- a/modules/local/samtools_replaceheader.nf +++ b/modules/local/samtools_replaceheader.nf @@ -20,16 +20,24 @@ process SAMTOOLS_REHEADER { script: def prefix = task.ext.prefix ?: "${meta.id}" def suffix = "${meta.suffix}" - def args = task.ext.args ?: '' if ("$file" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - if [ ! -z "${args}" ]; then - samtools reheader ${args} ${file} | \\ - samtools reheader ${header} - > ${prefix}.${suffix} - else - samtools reheader ${header} ${file} > ${prefix}.${suffix} - fi + # Replace SQ lines with those from external template + ( samtools view --no-PG --header-only ${file} | \\ + grep -v ^@SQ && grep ^@SQ ${header} ) > .temp.header.sam + + # custom sort for readability (retain order of insertion but sort groups by tag) + ( grep ^@HD .temp.header.sam && \ + grep ^@SQ .temp.header.sam && \ + grep ^@RG .temp.header.sam && \ + grep ^@PG .temp.header.sam && \ + if grep -q -E -v '^@HD|^@SQ|^@RG|^@PG' .temp.header.sam; then \ + grep -v -E '^@HD|^@SQ|^@RG|^@PG' .temp.header.sam; \ + fi; ) > .temp.sorted.header.sam + + # Insert new header into file + samtools reheader .temp.sorted.header.sam ${file} > ${prefix}.${suffix} cat <<-END_VERSIONS > versions.yml "${task.process}": From a1725a027b06d3be424c0d15ba7f4ce9e8fb6034 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 5 Jul 2024 14:02:37 +0100 Subject: [PATCH 17/27] Update modules/local/samtools_replaceheader.nf Co-authored-by: Matthieu Muffato --- modules/local/samtools_replaceheader.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/local/samtools_replaceheader.nf b/modules/local/samtools_replaceheader.nf index 0abbffe..2d3496b 100644 --- a/modules/local/samtools_replaceheader.nf +++ b/modules/local/samtools_replaceheader.nf @@ -32,9 +32,8 @@ process SAMTOOLS_REHEADER { grep ^@SQ .temp.header.sam && \ grep ^@RG .temp.header.sam && \ grep ^@PG .temp.header.sam && \ - if grep -q -E -v '^@HD|^@SQ|^@RG|^@PG' .temp.header.sam; then \ - grep -v -E '^@HD|^@SQ|^@RG|^@PG' .temp.header.sam; \ - fi; ) > .temp.sorted.header.sam + grep -v -E '^@HD|^@SQ|^@RG|^@PG' .temp.header.sam || false; \ + ) > .temp.sorted.header.sam # Insert new header into file samtools reheader .temp.sorted.header.sam ${file} > ${prefix}.${suffix} From b6e64771761169efb18fcd1f0d3170483934c689 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 5 Jul 2024 17:49:24 +0100 Subject: [PATCH 18/27] dynamic output but this time it works --- modules/local/samtools_replaceheader.nf | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/modules/local/samtools_replaceheader.nf b/modules/local/samtools_replaceheader.nf index 2d3496b..c28cfbc 100644 --- a/modules/local/samtools_replaceheader.nf +++ b/modules/local/samtools_replaceheader.nf @@ -11,15 +11,16 @@ process SAMTOOLS_REHEADER { tuple val(meta), path(file), path(header) output: - tuple val(meta), path("*.${meta.suffix}"), emit: bam + tuple val(meta), path("${prefix}.${suffix}"), optional:true, emit: bam + tuple val(meta), path("${prefix}.${suffix}"), optional:true, emit: cram path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = "${meta.suffix}" + prefix = task.ext.prefix ?: "${meta.id}" + suffix = file.getExtension() if ("$file" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ @@ -28,11 +29,11 @@ process SAMTOOLS_REHEADER { grep -v ^@SQ && grep ^@SQ ${header} ) > .temp.header.sam # custom sort for readability (retain order of insertion but sort groups by tag) - ( grep ^@HD .temp.header.sam && \ - grep ^@SQ .temp.header.sam && \ - grep ^@RG .temp.header.sam && \ - grep ^@PG .temp.header.sam && \ - grep -v -E '^@HD|^@SQ|^@RG|^@PG' .temp.header.sam || false; \ + ( grep ^@HD .temp.header.sam || true && \ + grep ^@SQ .temp.header.sam || true && \ + grep ^@RG .temp.header.sam || true && \ + grep ^@PG .temp.header.sam || true && \ + grep -v -E '^@HD|^@SQ|^@RG|^@PG' .temp.header.sam || true; \ ) > .temp.sorted.header.sam # Insert new header into file @@ -45,8 +46,8 @@ process SAMTOOLS_REHEADER { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = file.name.split('\\.')[-1] + prefix = task.ext.prefix ?: "${meta.id}" + suffix = file.getExtension() """ touch ${prefix}.${suffix} From 8ade9cfe8c7e8309b703599437990aa2af1038cd Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 5 Jul 2024 18:47:56 +0100 Subject: [PATCH 19/27] simplify samtools_replaceheader inputs --- modules/local/samtools_replaceheader.nf | 3 ++- workflows/readmapping.nf | 11 +++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/local/samtools_replaceheader.nf b/modules/local/samtools_replaceheader.nf index c28cfbc..06e45f6 100644 --- a/modules/local/samtools_replaceheader.nf +++ b/modules/local/samtools_replaceheader.nf @@ -8,7 +8,8 @@ process SAMTOOLS_REHEADER { 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: - tuple val(meta), path(file), path(header) + tuple val(meta), path(file) + path(header) output: tuple val(meta), path("${prefix}.${suffix}"), optional:true, emit: bam diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 844d2dc..f1571ad 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -13,7 +13,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = Channel.fromPath(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.fasta) { ch_fasta = Channel.fromPath(params.fasta) } else { exit 1, 'Genome fasta file not specified!' } - +if (params.header) { ch_header = Channel.fromPath(params.header) } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -142,13 +142,8 @@ workflow READMAPPING { // Optionally insert params.header information to bams ch_reheadered_bams = Channel.empty() if ( params.header ) { - ch_combined = ch_aligned_bams.map { meta, bam, _ -> - def suffix = bam instanceof List ? bam[0].getExtension() : bam.getExtension() - meta.suffix = suffix // add suffix to meta so output matches input type - [meta, bam, file( params.header )] - } - SAMTOOLS_REHEADER( ch_combined ) - ch_reheadered_bams = SAMTOOLS_REHEADER.out.bam.map { bam -> bam + [[]] } + SAMTOOLS_REHEADER( ch_aligned_bams, ch_header.first() ) + ch_reheadered_bams = SAMTOOLS_REHEADER.out.bam ch_versions = ch_versions.mix ( SAMTOOLS_REHEADER.out.versions ) } else { ch_reheadered_bams = ch_aligned_bams From a8abc897f926d74ee98f1b24907688c6ffc96fd2 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 5 Jul 2024 18:48:24 +0100 Subject: [PATCH 20/27] remove some of the back-and-forth with bai placeholders --- subworkflows/local/align_ont.nf | 1 - subworkflows/local/align_pacbio.nf | 1 - subworkflows/local/align_short.nf | 3 ++- subworkflows/local/convert_stats.nf | 6 ++++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/align_ont.nf b/subworkflows/local/align_ont.nf index f1013d4..33d42b5 100644 --- a/subworkflows/local/align_ont.nf +++ b/subworkflows/local/align_ont.nf @@ -42,7 +42,6 @@ workflow ALIGN_ONT { // Convert merged BAM to CRAM and calculate indices and statistics SAMTOOLS_MERGE.out.bam | mix ( ch_bams.single_bam ) - | map { meta, bam -> [ meta, bam, [] ] } | set { ch_sort } diff --git a/subworkflows/local/align_pacbio.nf b/subworkflows/local/align_pacbio.nf index 07855a7..73a8321 100644 --- a/subworkflows/local/align_pacbio.nf +++ b/subworkflows/local/align_pacbio.nf @@ -49,7 +49,6 @@ workflow ALIGN_PACBIO { // Convert merged BAM to CRAM and calculate indices and statistics SAMTOOLS_MERGE.out.bam | mix ( ch_bams.single_bam ) - | map { meta, bam -> [ meta, bam, [] ] } | set { ch_sort } diff --git a/subworkflows/local/align_short.nf b/subworkflows/local/align_short.nf index 33c27b6..5b7017a 100644 --- a/subworkflows/local/align_short.nf +++ b/subworkflows/local/align_short.nf @@ -58,8 +58,9 @@ workflow ALIGN_SHORT { // Convert merged BAM to CRAM and calculate indices and statistics + SAMTOOLS_SORMADUP.out.bam.view() SAMTOOLS_SORMADUP.out.bam - | map { meta, bam -> [ meta, bam, [] ] } + | map { meta, bam -> [ meta, bam] } | set { ch_stat } diff --git a/subworkflows/local/convert_stats.nf b/subworkflows/local/convert_stats.nf index b89928a..3061bf7 100644 --- a/subworkflows/local/convert_stats.nf +++ b/subworkflows/local/convert_stats.nf @@ -21,7 +21,7 @@ workflow CONVERT_STATS { // Compress the quality scores of Illumina and PacBio CCS alignments bam | branch { - meta, bam, bai -> + meta, bam -> run_crumble : meta.datatype == "hic" || meta.datatype == "illumina" || meta.datatype == "pacbio" [meta, bam] no_crumble: true @@ -35,7 +35,9 @@ workflow CONVERT_STATS { // Convert BAM to CRAM CRUMBLE.out.bam | map { meta, bam -> [meta, bam, []] } - | mix ( ch_bams.no_crumble ) + | mix ( ch_bams.no_crumble.map{ + meta, bam -> [meta, bam, []] } + ) | set { ch_bams_for_conversion } SAMTOOLS_VIEW ( ch_bams_for_conversion, fasta, [] ) From db5450110e152950506596310e5b88b7325de96f Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 5 Jul 2024 18:55:14 +0100 Subject: [PATCH 21/27] cleanup un-needed line --- subworkflows/local/align_short.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/align_short.nf b/subworkflows/local/align_short.nf index 5b7017a..bb49e12 100644 --- a/subworkflows/local/align_short.nf +++ b/subworkflows/local/align_short.nf @@ -60,7 +60,6 @@ workflow ALIGN_SHORT { // Convert merged BAM to CRAM and calculate indices and statistics SAMTOOLS_SORMADUP.out.bam.view() SAMTOOLS_SORMADUP.out.bam - | map { meta, bam -> [ meta, bam] } | set { ch_stat } From f5b3edc11ad9fdbbe17001f9d4778dfbfcc18970 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Thu, 11 Jul 2024 16:49:16 +0100 Subject: [PATCH 22/27] Update subworkflows/local/align_short.nf Co-authored-by: Matthieu Muffato --- subworkflows/local/align_short.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/align_short.nf b/subworkflows/local/align_short.nf index bb49e12..09befb8 100644 --- a/subworkflows/local/align_short.nf +++ b/subworkflows/local/align_short.nf @@ -58,7 +58,6 @@ workflow ALIGN_SHORT { // Convert merged BAM to CRAM and calculate indices and statistics - SAMTOOLS_SORMADUP.out.bam.view() SAMTOOLS_SORMADUP.out.bam | set { ch_stat } From a7c125b61a5e6a3d49bb519e5665c0cf99f69e05 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Thu, 11 Jul 2024 16:51:49 +0100 Subject: [PATCH 23/27] Update modules/local/samtools_replaceheader.nf Co-authored-by: Matthieu Muffato --- modules/local/samtools_replaceheader.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/local/samtools_replaceheader.nf b/modules/local/samtools_replaceheader.nf index 06e45f6..0a027c8 100644 --- a/modules/local/samtools_replaceheader.nf +++ b/modules/local/samtools_replaceheader.nf @@ -27,18 +27,18 @@ process SAMTOOLS_REHEADER { """ # Replace SQ lines with those from external template ( samtools view --no-PG --header-only ${file} | \\ - grep -v ^@SQ && grep ^@SQ ${header} ) > .temp.header.sam + grep -v ^@SQ && grep ^@SQ ${header} ) > temp.header.sam # custom sort for readability (retain order of insertion but sort groups by tag) - ( grep ^@HD .temp.header.sam || true && \ - grep ^@SQ .temp.header.sam || true && \ - grep ^@RG .temp.header.sam || true && \ - grep ^@PG .temp.header.sam || true && \ - grep -v -E '^@HD|^@SQ|^@RG|^@PG' .temp.header.sam || true; \ - ) > .temp.sorted.header.sam + ( grep ^@HD temp.header.sam || true && \ + grep ^@SQ temp.header.sam || true && \ + grep ^@RG temp.header.sam || true && \ + grep ^@PG temp.header.sam || true && \ + grep -v -E '^@HD|^@SQ|^@RG|^@PG' temp.header.sam || true; \ + ) > temp.sorted.header.sam # Insert new header into file - samtools reheader .temp.sorted.header.sam ${file} > ${prefix}.${suffix} + samtools reheader temp.sorted.header.sam ${file} > ${prefix}.${suffix} cat <<-END_VERSIONS > versions.yml "${task.process}": From 7a714fcc5e9b329c8b7acfb61a010b7e93d1512e Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Thu, 11 Jul 2024 16:52:46 +0100 Subject: [PATCH 24/27] Update subworkflows/local/convert_stats.nf Co-authored-by: Matthieu Muffato --- subworkflows/local/convert_stats.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/convert_stats.nf b/subworkflows/local/convert_stats.nf index 3061bf7..dce58c7 100644 --- a/subworkflows/local/convert_stats.nf +++ b/subworkflows/local/convert_stats.nf @@ -23,7 +23,6 @@ workflow CONVERT_STATS { | branch { meta, bam -> run_crumble : meta.datatype == "hic" || meta.datatype == "illumina" || meta.datatype == "pacbio" - [meta, bam] no_crumble: true } | set { ch_bams } From ca86a50171b9f3ec9be885e25f6fa20182728843 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Thu, 11 Jul 2024 16:53:41 +0100 Subject: [PATCH 25/27] Update modules/local/samtools_replaceheader.nf Co-authored-by: Matthieu Muffato --- modules/local/samtools_replaceheader.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/samtools_replaceheader.nf b/modules/local/samtools_replaceheader.nf index 0a027c8..09ac4fc 100644 --- a/modules/local/samtools_replaceheader.nf +++ b/modules/local/samtools_replaceheader.nf @@ -12,8 +12,8 @@ process SAMTOOLS_REHEADER { path(header) output: - tuple val(meta), path("${prefix}.${suffix}"), optional:true, emit: bam - tuple val(meta), path("${prefix}.${suffix}"), optional:true, emit: cram + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram path "versions.yml", emit: versions when: From 15e8d7b2a6a7aa9cd78ea1e907d8d6ae69f9cfb9 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 12 Jul 2024 09:08:41 +0100 Subject: [PATCH 26/27] streamline prep for samtools_view call --- subworkflows/local/convert_stats.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/subworkflows/local/convert_stats.nf b/subworkflows/local/convert_stats.nf index dce58c7..66308e0 100644 --- a/subworkflows/local/convert_stats.nf +++ b/subworkflows/local/convert_stats.nf @@ -33,10 +33,8 @@ workflow CONVERT_STATS { // Convert BAM to CRAM CRUMBLE.out.bam + | mix ( ch_bams.no_crumble ) | map { meta, bam -> [meta, bam, []] } - | mix ( ch_bams.no_crumble.map{ - meta, bam -> [meta, bam, []] } - ) | set { ch_bams_for_conversion } SAMTOOLS_VIEW ( ch_bams_for_conversion, fasta, [] ) From 3aca264d6c0b714d5ce19968201f2f3bb3295ea5 Mon Sep 17 00:00:00 2001 From: Tyler Chafin Date: Fri, 12 Jul 2024 09:09:46 +0100 Subject: [PATCH 27/27] remove intermediate steps before emit --- subworkflows/local/align_short.nf | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/subworkflows/local/align_short.nf b/subworkflows/local/align_short.nf index 09befb8..8f061ad 100644 --- a/subworkflows/local/align_short.nf +++ b/subworkflows/local/align_short.nf @@ -56,13 +56,7 @@ workflow ALIGN_SHORT { SAMTOOLS_SORMADUP ( ch_bam, fasta ) ch_versions = ch_versions.mix ( SAMTOOLS_SORMADUP.out.versions ) - - // Convert merged BAM to CRAM and calculate indices and statistics - SAMTOOLS_SORMADUP.out.bam - | set { ch_stat } - - emit: - bam = ch_stat // channel: [ val(meta), /path/to/bam ] + bam = SAMTOOLS_SORMADUP.out.bam // channel: [ val(meta), /path/to/bam ] versions = ch_versions // channel: [ versions.yml ] }