From d0d4025aaa6656226efb13987bd183ba93ac1f8f Mon Sep 17 00:00:00 2001 From: Dave Lawrence Date: Tue, 6 Feb 2024 16:56:47 +1030 Subject: [PATCH 1/2] #722 Babelfish VCF bugs --- src/hgvs/extras/babelfish.py | 27 ++++++++++++++++----------- tests/data/cache-py3.hdp | Bin 921577 -> 921714 bytes tests/test_hgvs_extras_babelfish.py | 12 +++++++++++- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/hgvs/extras/babelfish.py b/src/hgvs/extras/babelfish.py index b1a7b49d..77f0a95b 100644 --- a/src/hgvs/extras/babelfish.py +++ b/src/hgvs/extras/babelfish.py @@ -74,16 +74,24 @@ def hgvs_to_vcf(self, var_g): return chrom, start_i + 1, ref, alt, typ def vcf_to_g_hgvs(self, chrom, position, ref, alt): + # VCF spec https://samtools.github.io/hts-specs/VCFv4.1.pdf + # says for REF/ALT "Each base must be one of A,C,G,T,N (case insensitive)" + ref = ref.upper() + alt = alt.upper() + ac = self.name_to_ac_map[chrom] - # Strip common prefix - if len(alt) > 1 and len(ref) > 1: - pfx = os.path.commonprefix([ref, alt]) - lp = len(pfx) - if lp > 0: - ref = ref[lp:] - alt = alt[lp:] - position += lp + if ref != alt: + # Strip common prefix + if len(alt) > 1 and len(ref) > 1: + pfx = os.path.commonprefix([ref, alt]) + lp = len(pfx) + if lp > 0: + ref = ref[lp:] + alt = alt[lp:] + position += lp + elif alt == ".": + alt = ref if ref == "": # Insert # Insert uses coordinates around the insert point. @@ -93,9 +101,6 @@ def vcf_to_g_hgvs(self, chrom, position, ref, alt): start = position end = position + len(ref) - 1 - if alt == ".": - alt = ref - var_g = SequenceVariant( ac=ac, type="g", diff --git a/tests/data/cache-py3.hdp b/tests/data/cache-py3.hdp index e7ed304c4ce67c901eece222d8d9475b312ba12f..a7ad77a51c1b083d3f2b7595f4df91984d041335 100644 GIT binary patch delta 94 zcmaDk-{R8(i-s1)7N!>F7M2#)7Pc1l7LFFq7OocV7M?A<55yU}wm%f-W%@Ba%#BTG vx_vS$8{2)YUrb(KrU&rx$xW}m#wtDCijPl#?H)+<0Z3Gajmgn*N~sF7M2#)7Pc1l7LFFq7OocV7M?A<55yT6w?7o;W%|L^!{|Px GR1W~rY!M0o diff --git a/tests/test_hgvs_extras_babelfish.py b/tests/test_hgvs_extras_babelfish.py index 924dd3c2..c0d2d1c5 100644 --- a/tests/test_hgvs_extras_babelfish.py +++ b/tests/test_hgvs_extras_babelfish.py @@ -8,7 +8,17 @@ "NC_000006.12:g.49949407=", [], ("6", 49949407, "A", ".", "identity"), - [("6", 49949407, "A", "A", "identity")], + [("6", 49949407, "A", "A", "identity"), + # Test case insensitivity + ("6", 49949407, "A", "a", "identity"), + ("6", 49949407, "a", "A", "identity"),] + ), + # Test multi-base identity + ( + "NC_000006.12:g.49949407_49949408=", + [], + ("6", 49949407, "AA", ".", "identity"), + [("6", 49949407, "AA", "AA", "identity")], ), # snv ( From 30cd83ff270e1bcb22eb588bb437c8071ab6e4d3 Mon Sep 17 00:00:00 2001 From: Dave Lawrence Date: Thu, 21 Mar 2024 14:28:32 +1030 Subject: [PATCH 2/2] Update cache with tests --- tests/data/cache-py3.hdp | Bin 923986 -> 924046 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/data/cache-py3.hdp b/tests/data/cache-py3.hdp index 75fdcce4e51749a3921e4d14139368f03da73746..54d2f9a632400c42075d85d222126ba6b92ebc72 100644 GIT binary patch delta 69 zcmcb#$)azwMMDc?3sVbo3rh=Y3tJ0&3r7oQ3s(zw3r`F07QUC_jB~fY66Z@}oUWhD ZD#mtC>lc&PgXsZ$d@^iIj*e4G^#DaC7ODUM delta 48 zcmeC%Y;kFmMMDc?3sVbo3rh=Y3tJ0&3r7oQ3s(zw3r`F07QUC_jEUP{iSwl~GL`B9 E0HA~qy8r+H