Skip to content

Commit

Permalink
Merge pull request #44 from sanger-tol/hic_minimap2
Browse files Browse the repository at this point in the history
add hic minimap2
  • Loading branch information
ksenia-krasheninnikova authored Jul 16, 2024
2 parents a146559 + a6edc93 commit 5869d2a
Show file tree
Hide file tree
Showing 23 changed files with 617 additions and 50 deletions.
1 change: 1 addition & 0 deletions assets/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dataset:
reads:
- reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram
hic_motif: GATC,GANTC,CTNAG,TTAA
hic_aligner: bwamem2
busco:
lineage: bacteria_odb10
mito:
Expand Down
1 change: 1 addition & 0 deletions assets/test_gfLaeSulp1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dataset:
reads:
- reads: /lustre/scratch124/tol/projects/darwin/data/fungi/Laetiporus_sulphureus/genomic_data/gfLaeSulp1/hic-arima2/40063_3#5.cram
hic_motif: GATC,GANTC,CTNAG,TTAA
hic_aligner: minimap2
busco:
lineages_path: /lustre/scratch123/tol/resources/busco/v5/
lineage: fungi_odb10
Expand Down
1 change: 1 addition & 0 deletions assets/test_github.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dataset:
reads:
- reads: /home/runner/work/genomeassembly/genomeassembly/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram
hic_motif: GATC,GANTC,CTNAG,TTAA
hic_aligner: minimap2
busco:
lineage: bacteria_odb10
mito:
Expand Down
1 change: 1 addition & 0 deletions assets/test_gsMetZobe1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dataset:
reads:
- reads: /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/hic-arima2/35528_4#7.cram
hic_motif: GATC,GANTC,CTNAG,TTAA
hic_aligner: minimap2
busco:
lineage_path: /lustre/scratch123/tol/resources/busco/v5/
lineage: fungi_odb10
Expand Down
1 change: 1 addition & 0 deletions assets/test_iyVesGerm1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dataset:
reads:
- reads: /lustre/scratch123/tol/resources/genomeassembly/testdata/iyVesGerm1/hic-arima2/34957_3#2.cram
hic_motif: GATC,GANTC,CTNAG,TTAA
hic_aligner: minimap2
busco:
lineage_path: /lustre/scratch123/tol/resources/busco/v5/
lineage: insecta_odb10
Expand Down
2 changes: 2 additions & 0 deletions bin/awk_filter_reads.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#remove the read number added earlier --yy5
awk 'BEGIN{OFS="\t"}{if($1 ~ /^@/) {print} else {$2=and($2,compl(2048)); print substr($1,2),$2,$3,$4,$5,$6,$7,$8,$9,$10,$11}}'
109 changes: 109 additions & 0 deletions bin/filter_five_end.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/perl
use strict;
use warnings;

my $prev_id = "";
my @five;
my @three;
my @unmap;
my @mid;
my @all;
my $counter = 0;

while (<STDIN>){
chomp;
if (/^@/){
print $_."\n";
next;
}
my ($id, $flag, $chr_from, $loc_from, $mapq, $cigar, $d1, $d2, $d3, $read, $read_qual, @rest) = split /\t/;
my $bin = reverse(dec2bin($flag));
my @binary = split(//,$bin);
if ($prev_id ne $id && $prev_id ne ""){
if ($counter == 1){
if (@five == 1){
print $five[0]."\n";
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//,$bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
}
}
elsif ($counter == 2 && @five == 1){
print $five[0]."\n";
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//,$bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
}

$counter = 0;
undef @unmap;
undef @five;
undef @three;
undef @mid;
undef @all;
}

$counter++;
$prev_id = $id;
push @all,$_;
if ($binary[2]==1){
push @unmap,$_;
}
elsif ($binary[4]==0 && $cigar =~ m/^[0-9]*M/ || $binary[4]==1 && $cigar =~ m/.*M$/){
push @five, $_;
}
elsif ($binary[4]==1 && $cigar =~ m/^[0-9]*M/ || $binary[4]==0 && $cigar =~ m/.*M$/){
push @three, $_;
}
elsif ($cigar =~ m/^[0-9]*[HS].*M.*[HS]$/){
push @mid, $_;
}
}

if ($counter == 1){
if (@five == 1){
print $five[0]."\n";
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//,$bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
}
}
elsif ($counter == 2 && @five == 1){
print $five[0]."\n";
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//,$bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
}

sub dec2bin {
my $str = unpack("B32", pack("N", shift));
return $str;
}

sub bin2dec {
return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
}
10 changes: 10 additions & 0 deletions bin/grep_pg.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# grep_pg.sh
# -------------------
# A shell script to exclude pg lines and label read 1 and read 2 from cram containers
#
# -------------------
# Author = yy5

grep -v "^@PG" | awk '{if($1 ~ /^@/) {print($0)} else {if(and($2,64)>0) {print("1"$0)} else {print("2"$0)}}}'
11 changes: 10 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,15 @@ process {
ext.args5 = { '--write-index -l1' }
}

withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT {
ext.args = ""
ext.args1 = ""
ext.args2 = { "-ax sr" }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}


withName: '.*HIC_MAPPING:BAMTOBED_SORT' {
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding" },
Expand Down Expand Up @@ -579,7 +588,7 @@ process {

withName: 'JUICER_TOOLS_PRE' {
ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar'
ext.juicer_jvm_params = '-Xms6g -Xmx48g'
ext.juicer_jvm_params = '-Xms1g -Xmx6g'
publishDir = [
path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" },
mode: params.publish_dir_mode,
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@
"installed_by": ["modules"],
"patch": "modules/nf-core/minimap2/align/minimap2-align.diff"
},
"minimap2/index": {
"branch": "master",
"git_sha": "72e277acfd9e61a9f1368eafb4a9e83f5bcaa9f5",
"installed_by": ["modules"]
},
"mitohifi/findmitoreference": {
"branch": "master",
"git_sha": "f52220e84bfc16a8616a5bb3d6f5bc67d601bdce",
Expand Down
59 changes: 59 additions & 0 deletions modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT {
tag "$meta.id"
label "process_high"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' :
'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }"

input:
tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref), path(reference)

output:
tuple val(meta), path("*.bam"), emit: mappedbam
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args1 = task.ext.args1 ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def args4 = task.ext.args4 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = "1.15" // Staden_io versions break the pipeline
"""
cram_filter -n ${from}-${to} ${cramfile} - | \\
samtools fastq ${args1} - | \\
minimap2 -t${task.cpus} -R '${rglines}' ${args2} ${ref} - | \\
${projectDir}/bin/grep_pg.sh | \\
perl ${projectDir}/bin/filter_five_end.pl | \\
${projectDir}/bin/awk_filter_reads.sh | \\
samtools fixmate ${args3} - - | \\
samtools sort ${args4} -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mm.bam -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )
minimap2: \$(minimap2 --version | sed 's/minimap2 //g')
staden_io: $VERSION
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def base = "45022_3#2"
def chunkid = "1"
"""
touch ${prefix}_${base}_${chunkid}_mm.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )
minimap2: \$(echo \$(minimap2 version 2>&1) | sed 's/.* //')
staden_io: $VERSION
END_VERSIONS
"""
}
6 changes: 3 additions & 3 deletions modules/local/get_calcuts_params.nf
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
process GET_CALCUTS_PARAMS {
label 'process_single'

conda "conda-forge::python"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3' :
'docker.io/python:3.9' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(model_fk)
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/minimap2/index/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions modules/nf-core/minimap2/index/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions modules/nf-core/minimap2/index/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5869d2a

Please sign in to comment.