Skip to content
This repository has been archived by the owner on Jan 4, 2020. It is now read-only.

Commit

Permalink
The old logic will tag some non-overlapped pairs as "contain" which
Browse files Browse the repository at this point in the history
can reduce the contig size significantly
  • Loading branch information
Jason Chin committed Aug 11, 2013
1 parent fe000fa commit 698a69e
Showing 1 changed file with 21 additions and 13 deletions.
34 changes: 21 additions & 13 deletions src/py/p_overlapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def get_ovelap_alignment(seq1, seq0):
aln_range = aln_range_ptr[0]
kup.free_kmer_match(kmer_match_ptr)
s1, e1, s0, e0 = aln_range.s1, aln_range.e1, aln_range.s2, aln_range.e2
e1 += K
e0 += K
e1 += K + K/2
e0 += K + K/2
kup.free_aln_range(aln_range)
len_1 = len(seq1)
len_0 = len(seq0)
Expand All @@ -37,33 +37,41 @@ def get_ovelap_alignment(seq1, seq0):
e0 = len_0
do_aln = False
contain_status = "none"
#print s0, e0, s1, e1
if e1 - s1 > 500:
if s1 < 100 and len_1 - e1 < 100:
if s0 < s1 and s0 > 24:
do_aln = False
elif s1 <= s0 and s1 > 24:
do_aln = False
elif s1 < 24 and len_1 - e1 < 24:
do_aln = False
contain_status = "contains"
elif s0 < 100 and len_0 - e0 < 100:
#print "X1"
elif s0 < 24 and len_0 - e0 < 24:
do_aln = False
contain_status = "contained"
#print "X2"
else:
do_aln = True
if s0 < s1:
s1 -= s0 #assert s1 > 0
s0 = 0
e1 = len_1
if len_1 - s1 >= len_0:
do_aln = False
contain_status = "contained"
#if len_1 - s1 >= len_0:
# do_aln = False
# contain_status = "contains"
# print "X3", s0, e0, len_0, s1, e1, len_1


elif s1 <= s0:
s0 -= s1 #assert s1 > 0
s1 = 0
e0 = len_0
if len_0 - s0 >= len_1:
do_aln = False
contain_status = "contained"


print s0, e0, s1, e1
#if len_0 - s0 >= len_1:
# do_aln = False
# contain_status = "contained"
# print "X4"
#if abs( (e1 - s1) - (e0 - s0 ) ) > 200: #avoid overlap alignment for big indels
# do_aln = False

Expand Down Expand Up @@ -215,7 +223,7 @@ def lookup_data_iterator( q_seqs ):
total_index_base = len(seqs) * 1000
K = 14
build_look_up(seqs, K)
pool = mp.Pool(6)
pool = mp.Pool(8)

for r in pool.imap(get_candidate_hits, lookup_data_iterator( q_seqs)):
for h in r:
Expand Down

0 comments on commit 698a69e

Please sign in to comment.