Skip to content

Commit

Permalink
Merge pull request #51 from sanger-tol/hifiasm_hic_scaff
Browse files Browse the repository at this point in the history
hap1/hap2 scaffolding
  • Loading branch information
ksenia-krasheninnikova authored Sep 6, 2024
2 parents 76d3fc9 + cbc319e commit 115b833
Show file tree
Hide file tree
Showing 13 changed files with 390 additions and 96 deletions.
207 changes: 188 additions & 19 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ process {
]
}

withName: '.*GENOME_STATISTICS_RAW:BUSCO' {
withName: '.*GENOME_STATISTICS_RAW:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/${meta.id}.p_ctg.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -132,17 +132,17 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI_HIC' {
ext.prefix = { "${meta.id}.asm.hic.p_ctg" }
withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_HAP1_HIC' {
ext.prefix = { "${meta.id}.asm.hic.hap1" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT_HIC' {
ext.prefix = { "${meta.id}.asm.hic.a_ctg" }
withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_HAP2_HIC' {
ext.prefix = { "${meta.id}.asm.hic.hap2" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" },
mode: params.publish_dir_mode,
Expand All @@ -158,7 +158,7 @@ process {
]
}
withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_PRI' {
ext.prefix = { "${meta.id}.asm.hic.p_ctg" }
ext.prefix = { "${meta.id}.asm.hic.hap1" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" },
mode: params.publish_dir_mode,
Expand All @@ -167,16 +167,30 @@ process {
}

withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_HAP' {
ext.prefix = { "${meta.id}.asm.hic.a_ctg" }
ext.prefix = { "${meta.id}.asm.hic.hap2" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" },
mode: params.publish_dir_mode,
pattern: '*assembly_summary'
]
}
withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO' {

withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.p_ctg.${meta.lineage}.busco" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap1.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('busco.log') ? filename :
filename.endsWith('full_table.tsv') ? filename :
filename.endsWith('missing_busco_list.tsv') ? filename :
filename.startsWith('short_summary') ? filename :
filename.endsWith('busco.batch_summary.txt') ? filename :
null }
]
}

withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO_HAP' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap2.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('busco.log') ? filename :
filename.endsWith('full_table.tsv') ? filename :
Expand All @@ -189,7 +203,7 @@ process {

withName: '.*GENOME_STATISTICS_RAW_HIC:MERQURYFK_MERQURYFK' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.p_ctg.ccs.merquryk" },
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap1.ccs.merquryk" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand Down Expand Up @@ -332,7 +346,7 @@ process {
]
}

withName: '.*GENOME_STATISTICS_PURGED:BUSCO' {
withName: '.*GENOME_STATISTICS_PURGED:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/${meta.id}.purged.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -499,7 +513,7 @@ process {
]
}

withName: '.*HIC_MAPPING:SAMTOOLS_MERGE_HIC_MAPPING' {
withName: '.*HIC_MAPPING.*:SAMTOOLS_MERGE_HIC_MAPPING' {
ext.prefix = { "${meta.id}_merged" }
}

Expand Down Expand Up @@ -530,7 +544,7 @@ process {
}


withName: '.*HIC_MAPPING:CONVERT_STATS:SAMTOOLS_VIEW' {
withName: '.*HIC_MAPPING.*:CONVERT_STATS:SAMTOOLS_VIEW' {
ext.args = "--output-fmt cram"
}

Expand Down Expand Up @@ -559,7 +573,7 @@ process {
}

// Set up of the scffolding pipeline
withName: 'YAHS' {
withName: '.*SCAFFOLDING:YAHS' {
ext.prefix = 'out'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" },
Expand All @@ -568,7 +582,7 @@ process {
]
}

withName: 'COOLER_CLOAD' {
withName: '.*SCAFFOLDING:COOLER_CLOAD' {
// Positions in the input file are zero-based;
// chrom1 field number (one-based) is 2;
// pos1 field number (one-based) is 3;
Expand All @@ -582,7 +596,7 @@ process {
]
}

withName: 'PRETEXTSNAPSHOT' {
withName: '.*SCAFFOLDING:PRETEXTSNAPSHOT' {
// Make one plot containing all sequences
ext.args = '--sequences \"=full\"'
publishDir = [
Expand All @@ -592,7 +606,7 @@ process {
]
}

withName: 'JUICER_TOOLS_PRE' {
withName: '.*SCAFFOLDING:JUICER_TOOLS_PRE' {
ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar'
ext.juicer_jvm_params = '-Xms1g -Xmx6g'
publishDir = [
Expand All @@ -602,7 +616,7 @@ process {
]
}

withName: 'JUICER_PRE' {
withName: '.*SCAFFOLDING:JUICER_PRE' {
ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'"
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" },
Expand All @@ -620,7 +634,7 @@ process {
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' {
withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
Expand All @@ -640,6 +654,161 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

// Scaffolding hap1/hap2
if (params.hifiasm_hic_on) {

withName: '.*HIC_MAPPING_HAP.*:SAMTOOLS_MARKDUP_HIC_MAPPING' {
ext.prefix = { "${meta.id}_mkdup" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP.*:BAMTOBED_SORT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}


withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_STATS' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_FLAGSTAT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_IDXSTATS' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP.*:YAHS' {
ext.prefix = { "${meta.hap_id}" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

}

withName: '.*SCAFFOLDING_HAP.*:COOLER_CLOAD' {
// Positions in the input file are zero-based;
// chrom1 field number (one-based) is 2;
// pos1 field number (one-based) is 3;
// chrom2 field number (one-based) is 6;
// pos2 field number (one-based) is 7
ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP.*:PRETEXTSNAPSHOT' {
// Make one plot containing all sequences
ext.args = '--sequences \"=full\"'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP.*:JUICER_TOOLS_PRE' {
ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar'
ext.juicer_jvm_params = '-Xms1g -Xmx6g'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*SCAFFOLDING_HAP.*:JUICER_PRE' {
ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'"
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}

// End of hap1/hap2 scaffolding

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_PRI' {
ext.prefix = { "${meta.id}_scaffolds_final" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
pattern: '*assembly_summary'
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_HAP' {
ext.prefix = { "${meta.id}_scaffolds_final" }
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
pattern: '*assembly_summary'
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:BUSCO_PRI' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('busco.log') ? filename :
filename.endsWith('full_table.tsv') ? filename :
filename.endsWith('missing_busco_list.tsv') ? filename :
filename.startsWith('short_summary') ? filename :
filename.endsWith('busco.batch_summary.txt') ? filename :
null }
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:BUSCO_HAP' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('busco.log') ? filename :
filename.endsWith('full_table.tsv') ? filename :
filename.endsWith('missing_busco_list.tsv') ? filename :
filename.startsWith('short_summary') ? filename :
filename.endsWith('busco.batch_summary.txt') ? filename :
null }
]
}

withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:MERQURYFK_MERQURYFK' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

// End of Scaffolding hap1/hap2
// End of Set up of the scaffolding pipeline

//Set up of assembly stats subworkflow
Expand Down
Loading

0 comments on commit 115b833

Please sign in to comment.