Skip to content

Commit

Permalink
ported pr248 for vcf4.4 - Allow colon in chromosome names
Browse files Browse the repository at this point in the history
  • Loading branch information
vasudeva8 committed Jul 8, 2024
1 parent 274f7c0 commit d535ac0
Show file tree
Hide file tree
Showing 8 changed files with 574 additions and 410 deletions.
962 changes: 564 additions & 398 deletions inc/vcf/validator_detail_v44.hpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/vcf/vcf_v44.ragel
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@


## A contig must be a sequence name allowed by the SAM format ( regex [!-)+-<>-~][!-~]* ) excluding the characters <>[]:*
## A chromosome must be a string with no white-spaces or colons, and may be surronded by < > symbols (for contigs)
meta_contig_char= alnum | ( punct - (':' | '<' | '>' | '[' | ']' | '*' | '=' | ',' ) ) ;
## A chromosome must be a string with no white-spaces, and may be surrounded by < > symbols (for contigs)
meta_contig_char= alnum | ( punct - ( '<' | '>' | '[' | ']' | '*' | '=' | ',' ) ) ;
chrom_basic = (meta_contig_char - '#') (meta_contig_char)* ;
chrom_contig = '<' chrom_basic '>' ;
chromosome = chrom_basic | chrom_contig ;
Expand Down
4 changes: 2 additions & 2 deletions test/input_files/v4.4/failed/failed_body_chrom_001.vcf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
##fileformat=VCFv4.4
##CauseOfFailure=contig ID contains a colon
##CauseOfFailure=contig ID contains a whitespace
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
chr:1 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
chr 1 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
4 changes: 2 additions & 2 deletions test/input_files/v4.4/failed/failed_body_chrom_002.vcf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
##fileformat=VCFv4.4
##CauseOfFailure=contig ID contains a whitespace
##CauseOfFailure=contig ID contains a comma
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
chr 1 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
chr,1 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
4 changes: 2 additions & 2 deletions test/input_files/v4.4/failed/failed_body_chrom_003.vcf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
##fileformat=VCFv4.4
##CauseOfFailure=contig ID contains a comma
##CauseOfFailure=contig ID contains illegal character
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
chr,1 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
chr*1 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
4 changes: 0 additions & 4 deletions test/input_files/v4.4/failed/failed_body_chrom_004.vcf

This file was deleted.

1 change: 1 addition & 0 deletions test/input_files/v4.4/passed/passed_body_chrom.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
1ABC 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
1.A 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
<1A> 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
1ABC:123 13302 rs180734498 C T 100 PASS AN=2184;AC=249;AF=0.11 GT:DS:GL 0|0:0.050:-0.13,-0.58,-3.62 0|1:1.000:-2.45,-0.00,-5.00
1 change: 1 addition & 0 deletions test/input_files/v4.4/passed/passed_meta_contig.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
##contig=<ID=1,length=123456>
##contig=<ID=1AC,length=123456>
##contig=<ID=ABcd123,length=123456>
##contig=<ID=ABcd:123,length=123456>
##contig=<ID=contig_url,length=123456,URL=ftp://somewhere.org/assembly.fa>
##contig=<ID=contig_accession,species="Homo sapiens",accession=GCA_000001405.1>
#CHROM POS ID REF ALT QUAL FILTER INFO
Expand Down

0 comments on commit d535ac0

Please sign in to comment.