Skip to content

Commit

Permalink
fixed bug identical geneIDs btw search and refspec
Browse files Browse the repository at this point in the history
  • Loading branch information
trvinh committed Dec 14, 2023
1 parent 959e0a0 commit 26bba47
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 60 deletions.
122 changes: 63 additions & 59 deletions fdog/libs/orthosearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,74 +83,78 @@ def hamstr(args):
silentOff, 'WARNING: No HMM hit found!')
else:
for hmm_hit in hmm_hits:
if not hmm_hit == seed_id: # only if search taxon == refspec
hmm_hit_fa = '%s/hmm_%s_%s_%s.fa' % (
outpath, seqName, search_taxon, hmm_hit)
with open(hmm_hit_fa, 'w') as hmm_fa_out:
hmm_fa_out.write('>%s\n%s' % (hmm_hit, search_seqs.fetch(hmm_hit)))
blast_xml = blast_fn.do_blastsearch(
hmm_hit_fa, refspec_db, evalBlast = evalBlast, lowComplexityFilter = lowComplexityFilter)
blast_out = blast_fn.parse_blast_xml(blast_xml)
output_fn.print_debug(debug, 'BLAST hits', blast_out)
if noCleanup == False:
os.remove(hmm_hit_fa)
### (4) check reciprocity
### (4a) if refspec_seq_id == best blast hit
if len(blast_out['hits'].keys()) > 0:
hmm_hit_fa = '%s/hmm_%s_%s_%s.fa' % (
outpath, seqName, search_taxon, hmm_hit)
with open(hmm_hit_fa, 'w') as hmm_fa_out:
hmm_fa_out.write('>%s\n%s' % (hmm_hit, search_seqs.fetch(hmm_hit)))
blast_xml = blast_fn.do_blastsearch(
hmm_hit_fa, refspec_db, evalBlast = evalBlast, lowComplexityFilter = lowComplexityFilter)
blast_out = blast_fn.parse_blast_xml(blast_xml)
output_fn.print_debug(debug, 'BLAST hits', blast_out)
if noCleanup == False:
os.remove(hmm_hit_fa)
### (4) check reciprocity
### (4a) if refspec_seq_id == best blast hit
if len(blast_out['hits'].keys()) > 0:
best_blast_hit = list(blast_out['hits'].keys())[0]
if best_blast_hit == hmm_hit and len(blast_out['hits'].keys()) > 1:
best_blast_hit = list(blast_out['hits'].keys())[0]
if best_blast_hit == hmm_hit and len(blast_out['hits'].keys()) > 1:
best_blast_hit = list(blast_out['hits'].keys())[1]
if seed_id == best_blast_hit:
output_fn.print_stdout(
silentOff,
'%s accepted (best blast hit is ref)' % (blast_out['query']))
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
continue
else:
### (4b) else, check for co-ortholog ref
if checkCoorthologsRefOff == False:
aln_fa = '%s/blast_%s_%s_%s_%s_%s.fa' % (
outpath, seqName, seed_id, search_taxon,
hmm_hit, best_blast_hit)
with open(aln_fa, 'w') as aln_fa_out:
aln_fa_out.write(
'>%s\n%s\n>%s\n%s\n>%s\n%s' % (
seed_id, refspec_seqs.fetch(seed_id),
hmm_hit, search_seqs.fetch(hmm_hit),
best_blast_hit, refspec_seqs.fetch(best_blast_hit)
)
if seed_id == best_blast_hit:
output_fn.print_stdout(
silentOff,
'%s accepted (best blast hit is ref)' % (blast_out['query']))
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
continue
else:
### (4b) else, check for co-ortholog ref
if checkCoorthologsRefOff == False:
aln_fa = '%s/blast_%s_%s_%s_%s_%s.fa' % (
outpath, seqName, seed_id, search_taxon,
hmm_hit, best_blast_hit)
with open(aln_fa, 'w') as aln_fa_out:
aln_fa_out.write(
'>%s\n%s\n>%s\n%s\n>%s\n%s' % (
seed_id, refspec_seqs.fetch(seed_id),
hmm_hit, search_seqs.fetch(hmm_hit),
best_blast_hit, refspec_seqs.fetch(best_blast_hit)
)
fasta_fn.remove_dup(aln_fa)
aln_seq = align_fn.do_align(aligner, aln_fa)
output_fn.print_debug(
debug, 'Alignment for checking co-ortholog ref', aln_seq)
br_dist = align_fn.calc_Kimura_dist(aln_seq, best_blast_hit, seed_id, debug)
bh_dist = align_fn.calc_Kimura_dist(aln_seq, best_blast_hit, hmm_hit, debug)
output_fn.print_debug(
debug, 'Check if distance blast_vs_ref < blast_vs_hmm',
'd_br = %s; d_bh = %s' % (br_dist, bh_dist))
if noCleanup == False:
os.remove(aln_fa)
if br_dist == bh_dist == 0 or br_dist < bh_dist:
output_fn.print_stdout(
silentOff,
'%s accepted (best blast hit is co-ortholog to ref)'
% (blast_out['query'])
)
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
continue
)
fasta_fn.remove_dup(aln_fa)
aln_seq = align_fn.do_align(aligner, aln_fa)
output_fn.print_debug(
debug, 'Alignment for checking co-ortholog ref', aln_seq)
br_dist = align_fn.calc_Kimura_dist(aln_seq, best_blast_hit, seed_id, debug)
bh_dist = align_fn.calc_Kimura_dist(aln_seq, best_blast_hit, hmm_hit, debug)
output_fn.print_debug(
debug, 'Check if distance blast_vs_ref < blast_vs_hmm',
'd_br = %s; d_bh = %s' % (br_dist, bh_dist))
if noCleanup == False:
os.remove(aln_fa)
if br_dist == bh_dist == 0 or br_dist < bh_dist:
output_fn.print_stdout(
silentOff,
'%s accepted (best blast hit is co-ortholog to ref)'
% (blast_out['query'])
)
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
continue

### (5) check co-ortholog if more than 1 HMM hits are accepted
if len(ortho_candi) == 0:
output_fn.print_stdout(
silentOff, 'WARNING: Reciprocity not fulfulled! No ortholog found!')
else:
best_ortho = list(ortho_candi.keys())[0]
if not best_ortho == seed_id:
ortho_final = fasta_fn.add_seq_to_dict(
ortho_final, '%s|%s|%s|1' % (seqName, search_taxon, best_ortho),
ortho_candi[best_ortho])
ortho_final = fasta_fn.add_seq_to_dict(
ortho_final, '%s|%s|%s|1' % (seqName, search_taxon, best_ortho),
ortho_candi[best_ortho])

if rep == False:
if len(ortho_candi) > 1:
if len(ortho_candi) == 1:
ortho_final = fasta_fn.add_seq_to_dict(
ortho_final, '%s|%s|%s|1' % (seqName, search_taxon, best_ortho),
ortho_candi[best_ortho])
else:
aln_co_fa = '%s/coortho_%s_%s.fa' % (
outpath, seqName, search_taxon)
with open(aln_co_fa, 'w') as aln_co_fa_out:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

setup(
name="fdog",
version="0.1.24",
version="0.1.25",
python_requires='>=3.7.0',
description="Feature-aware Directed OrtholoG search tool",
long_description=long_description,
Expand Down

0 comments on commit 26bba47

Please sign in to comment.