Skip to content

Commit

Permalink
fixed bug identical geneIDs btw search and refspec
Browse files Browse the repository at this point in the history
  • Loading branch information
trvinh committed Dec 20, 2023
1 parent 26bba47 commit 9dc4fe9
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 5 deletions.
2 changes: 2 additions & 0 deletions fdog/libs/corecompile.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ def compile_core(args):
leaves.reverse()
flag_node = 0
for leaf in leaves:
if not leaf in tax_ids:
continue
if flag_node == 1:
break
if not leaf == refspec_id and \
Expand Down
12 changes: 7 additions & 5 deletions fdog/libs/orthosearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,22 +139,24 @@ def hamstr(args):
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
continue

# remove seed protein from candidata list
if search_taxon == refspec:
if seed_id in ortho_candi:
ortho_candi.pop(seed_id)
### (5) check co-ortholog if more than 1 HMM hits are accepted
if len(ortho_candi) == 0:
output_fn.print_stdout(
silentOff, 'WARNING: Reciprocity not fulfulled! No ortholog found!')
else:
output_fn.print_debug(
debug, 'Candidates for checking co-orthologs', ortho_candi.keys())
best_ortho = list(ortho_candi.keys())[0]
ortho_final = fasta_fn.add_seq_to_dict(
ortho_final, '%s|%s|%s|1' % (seqName, search_taxon, best_ortho),
ortho_candi[best_ortho])

if rep == False:
if len(ortho_candi) == 1:
ortho_final = fasta_fn.add_seq_to_dict(
ortho_final, '%s|%s|%s|1' % (seqName, search_taxon, best_ortho),
ortho_candi[best_ortho])
else:
if len(ortho_candi) > 1:
aln_co_fa = '%s/coortho_%s_%s.fa' % (
outpath, seqName, search_taxon)
with open(aln_co_fa, 'w') as aln_co_fa_out:
Expand Down
4 changes: 4 additions & 0 deletions fdog/libs/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ def identify_seed_id(seqFile, refspec, corepath, debug, silentOff):
output_fn.print_debug(debug, 'Identify seed ID', 'Input seed ID not found!')
# otherwise, perform blast search
blast_xml = blast_fn.do_blastsearch(seqFile, refspec_db, evalBlast = 0.001)
if not blast_xml:
print(f'ERROR: No blast output!')
print(f'You can check it by running:\nblastp -query {seqFile} -db {corepath}/{refspec}/{refspec} -evalue 0.001 -outfmt 7')
sys.exit()
blast_out = blast_fn.parse_blast_xml(blast_xml)
if len(blast_out['hits']) < 1:
print(f'ERROR: Cannot find seed sequence {blast_out["query"]} in genome of reference species!')
Expand Down

0 comments on commit 9dc4fe9

Please sign in to comment.