Skip to content

Commit

Permalink
move gap limit handling to scfmap file
Browse files Browse the repository at this point in the history
  • Loading branch information
skoren committed Oct 17, 2024
1 parent 89a8379 commit 212f251
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 12 deletions.
6 changes: 1 addition & 5 deletions src/scripts/bam_rename.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import fasta_util as seq
import re

MAX_GAP_SIZE=100000

# rename a bam based on the input scfmap and tig name mapping files
# in the case of scaffolds, offset the start coordinate by the start of the sequence in the scaffold
#
Expand Down Expand Up @@ -56,9 +54,7 @@
for piece in scfmap[clist]:
numn = re.match(r"\[N(\d+)N]", piece)
if numn:
#1.5 - approximation for hpc->non-hpc transformation.
tuned_numn = min(round(int(numn[1]) * 1.5), MAX_GAP_SIZE)
offset += int(tuned_numn)
offset += int(numn[1])
elif piece in lens:
offsets[names[piece]] = offset
#sys.stderr.write("The offset for %s is %s\n"%(names[piece], offset))
Expand Down
7 changes: 1 addition & 6 deletions src/scripts/fasta_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

import fasta_util as seq

#TODO: constant to verkko.yml_sample
MAX_GAP_SIZE = 100000

mode = None
output_name = None
namedict = dict()
Expand Down Expand Up @@ -90,12 +87,10 @@
for piece in scfmap[clist]:
numn = re.match(r"\[N(\d+)N]", piece)
if numn:
#1.5 - approximation for hpc->non-hpc transformation.
tuned_numn = min(round(int(numn[1]) * 1.5), MAX_GAP_SIZE)
if not seq:
print(f"ERROR:piece {prev} missing from gapped contig {clist}.", file=sys.stderr)
sys.exit(1)
seq += "N" * int(tuned_numn)
seq += "N" * int(numn[1])
elif piece in pieces:
seq += pieces[piece]
elif seq:
Expand Down
4 changes: 3 additions & 1 deletion src/scripts/get_layout_from_mbg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
layout_output = sys.argv[6]
layscf_output = sys.argv[7]

MAX_GAP_SIZE = 100000
min_contig_no_trim = 500000
min_read_len_fraction = 0.5
min_read_fromend_fraction = min_read_len_fraction/1.5
Expand Down Expand Up @@ -255,7 +256,8 @@ def get_exact_match_length(clusters):
#pp is either path without gaps or gap. In latest case do nothing
gp = re.match(r"\[(N\d+N)(?:[^\]]+){0,1}\]", pp)
if gp:
contig_pieces[fullname].append("[" + gp.group(1) + "]")
tuned_numn = min(round(int(gp.group(1)[1:-1]) * 1.5), MAX_GAP_SIZE)
contig_pieces[fullname].append("[N" + str(tuned_numn) + "N]")
continue

pieceid = pieceid + 1
Expand Down

0 comments on commit 212f251

Please sign in to comment.