hgvs_c to hgvs_p projection throws IndexError for "NM_001253909.2:c.416_417insGTG" #707
Closed
Description
Said variant is a (disruptive) inframe_insertion
in the stop codon with stop_retained
effect. This corresponds to SPDI GRCh37:10:5139789:A:AGTG
.
Based on the current main
branch, the following test fails:
diff --git a/tests/test_hgvs_variantmapper.py b/tests/test_hgvs_variantmapper.py
index 63a61ce..dcd5bb4 100644
--- a/tests/test_hgvs_variantmapper.py
+++ b/tests/test_hgvs_variantmapper.py
@@ -39,6 +39,12 @@ class Test_VariantMapper_Exceptions(unittest.TestCase):
cls.vm = hgvs.variantmapper.VariantMapper(cls.hdp)
cls.hp = hgvs.parser.Parser()
+ def test_map_stop_retained(self):
+ hgvs_c = "NM_001253909.2:c.416_417insGTG"
+ var_c = self.hp.parse_hgvs_variant(hgvs_c)
+ with self.assertRaises(HGVSInvalidVariantError):
+ self.vm.c_to_p(var_c)
+
def test_gcrp_invalid_input_type(self):
hgvs_g = "NC_000007.13:g.36561662C>T"
hgvs_c = "NM_001637.3:c.1582G>A"
The exception backtrace is as follows:
============================================================================================= test session starts ==============================================================================================
platform linux -- Python 3.10.6, pytest-7.4.2, pluggy-1.3.0 -- /home/holtgrem_c/Development/hgvs/venv/bin/python
cachedir: .pytest_cache
HGVS_CACHE_MODE: learn
HGVS_SEQREPO_URL: None
UTA_DB_URL: postgresql://anonymous:anonymous@uta.biocommons.org:/uta/uta_20210129
hgvs.easy.hdp=postgresql://anonymous:anonymous@uta.biocommons.org:/uta/uta_20210129
hgvs.easy.hdp.seqfetcher.source='bioutils.seqfetcher (network fetching)'
rootdir: /home/holtgrem_c/Development/hgvs
configfile: pytest.ini
plugins: recording-0.13.0, cov-4.1.0, anyio-4.0.0
collected 1 item
tests/test_hgvs_variantmapper.py::Test_VariantMapper_Exceptions::test_map_stop_retained FAILED [100%]
=================================================================================================== FAILURES ===================================================================================================
_____________________________________________________________________________ Test_VariantMapper_Exceptions.test_map_stop_retained _____________________________________________________________________________
self = <test_hgvs_variantmapper.Test_VariantMapper_Exceptions testMethod=test_map_stop_retained>
def test_map_stop_retained(self):
hgvs_c = "NM_001253909.2:c.416_417insGTG"
var_c = self.hp.parse_hgvs_variant(hgvs_c)
with self.assertRaises(HGVSInvalidVariantError):
> self.vm.c_to_p(var_c)
tests/test_hgvs_variantmapper.py:46:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
src/hgvs/variantmapper.py:447: in c_to_p
var_p = builder.build_hgvsp()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
def build_hgvsp(self):
"""Compare two amino acid sequences; generate an hgvs tag from the output
:return list of variants in sequence order
:rtype list of dict
"""
variants = []
if not self._is_ambiguous and len(self._alt_seq) > 0:
do_delins = True
if self._ref_seq == self._alt_seq:
# Silent p. variant
start = self._alt_data.variant_start_aa
if start - 1 < len(self._ref_seq):
deletion = self._ref_seq[start - 1]
insertion = deletion
else:
start = ""
deletion = ""
insertion = ""
self._is_frameshift = False
variants.append({"start": start, "ins": insertion, "del": deletion})
do_delins = False
elif self._is_substitution:
if len(self._ref_seq) == len(self._alt_seq):
diff_pos = [
(i, self._ref_seq[i], self._alt_seq[i])
for i in range(len(self._ref_seq))
if self._ref_seq[i] != self._alt_seq[i]
]
if len(diff_pos) == 1:
(start, deletion, insertion) = diff_pos[0]
variants.append({"start": start + 1, "ins": insertion, "del": deletion})
do_delins = False
if do_delins:
if self._alt_data.is_frameshift:
start = self._alt_data.variant_start_aa - 1
while self._ref_seq[start] == self._alt_seq[start]:
start += 1
insertion = list(self._alt_seq[start:])
deletion = list(self._ref_seq[start:])
variants.append({"start": start + 1, "ins": insertion, "del": deletion})
else: # non-frameshifting delins or dup
# get size diff from diff in ref/alt lengths
start = self._alt_data.variant_start_aa - 1
delta = len(self._alt_seq) - len(self._ref_seq)
> while self._ref_seq[start] == self._alt_seq[start]:
E IndexError: string index out of range
src/hgvs/utils/altseq_to_hgvsp.py:102: IndexError
Also see here:
The VEP prediction is correct * => **
plus inframe_insertion&stop_retained_variant
.
#Uploaded_variation | Location | Allele | Consequence | IMPACT | SYMBOL | Gene | Feature_type | Feature | BIOTYPE | EXON | INTRON | HGVSc | HGVSp | cDNA_position | CDS_position | Protein_position | Amino_acids | Codons | Existing_variation | DISTANCE | STRAND | FLAGS | SYMBOL_SOURCE | HGNC_ID | MANE_SELECT | MANE_PLUS_CLINICAL | TSL | APPRIS | REFSEQ_MATCH | REFSEQ_OFFSET | GIVEN_REF | USED_REF | BAM_EDIT | SIFT | PolyPhen | AF | CLIN_SIG | SOMATIC | PHENO | PUBMED | MOTIF_NAME | MOTIF_POS | HIGH_INF_POS | MOTIF_SCORE_CHANGE | TRANSCRIPTION_FACTORS |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
. | 10:5139789-5139789 | GTG | inframe_insertion,stop_retained_variant | MODERATE | AKR1C3 | 8644 | Transcript | NM_001253909.2 | protein_coding | 3/3 | - | - | - | 447-448 | 416-417 | 139 | /* | taa/taGTGa | - | - | 1 | - | EntrezGene | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |