Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions sierralocal/nucaminohook.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def __init__(self, algorithm, binary=None, program='post'):
# initialize gene map
self.pol_start = 2085
self.pol_nuc_map = {
'CA': (1186, 1878),
'PR': (2253, 2549),
'RT': (2550, 4229), # incorrectly includes RNAse, emulating sierrapy
'IN': (4230, 5096)
Expand Down Expand Up @@ -374,8 +375,6 @@ def get_genes(self, pol_aligned_sites, pol_first_aa, pol_last_aa):
"""
Determines the first POL gene that is present in
the query sequence, by virtue of gene breakpoints
TODO: sierra uses different minimum numbers of sites per gene
(40, 60 and 30 for PR, RT and IN)
@param pol_aligned_sites: list, sublist holds alignment program
output aligned POL sites
@param pol_first_aa: int, location of first amino acid in pol
Expand All @@ -385,7 +384,12 @@ def get_genes(self, pol_aligned_sites, pol_first_aa, pol_last_aa):
first na position in pol, last na position in pol]
"""
# good here
min_overlap = {'PR': 40, 'RT': 60, 'IN': 30}
min_overlap = {
'PR': 40,
'RT': 60,
'IN': 30,
'CA': 30, # Arbitrary as could not find source for above thresholds
}
genes = []
for gene, bounds in self.gene_map.items():
aa_start, aa_end = bounds
Expand Down