Skip to content

Commit

Permalink
Fix merging of Number=G format strings
Browse files Browse the repository at this point in the history
Format strings appear to be adjacently-placed-in-memory fixed-length
(max_len, src_len, etc) optionally-NUL-terminated arrays, i.e., the same
sort of variable-up-to-a-fixed-length strings as operated on by strncpy().

In copy_string_field(), don't copy trailing NULs.
In merge_format_string(), pad the *output* string with NULs.
  • Loading branch information
jmarshall committed Feb 1, 2016
1 parent e0890a1 commit 5d25295
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion test/norm.merge.out
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
1 105 . TAAACCCTAAA TAA,TAACCCTAAA 999 PASS INDEL;AN=4;AC=2,2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/2:1,2,3,4,.,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF 1/2:1,2,3,4,.,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF
2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
2 114 . TC TTCC,TTC 999 FAIL1 INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1
2 114 . TC TTCC,TTC 999 FAIL1 INDEL;AN=4;AC=2,2 GT:DP:FGS 1/2:1:A,BB,CCC,EEEE,.,FFFFF 1/2:1:AA,BB,CCC,EEEE,.,FFFFF
2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
20 3 . GATG CTATG,GACT 999 PASS INDEL;AN=4;AC=2,2 GT 2/1 2/1
20 5 id0001;id0002 TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,.,6,7,.,.,10,11,.,.,.,15:1 1/2:1,2,3,4,.,6,7,.,.,10,11,.,.,.,15:1
Expand Down
2 changes: 1 addition & 1 deletion test/norm.merge.strict.out
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
1 105 . TAAACCCTAAA TAA,TAACCCTAAA 999 PASS INDEL;AN=4;AC=2,2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/2:1,2,3,4,.,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF 1/2:1,2,3,4,.,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF
2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1
2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP:FGS 1/2:1:A,BB,CCC,EEEE,.,FFFFF 1/2:1:AA,BB,CCC,EEEE,.,FFFFF
2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
20 3 . GATG CTATG,GACT 999 PASS INDEL;AN=4;AC=2,2 GT 2/1 2/1
20 5 id0001;id0002 TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,.,6,7,.,.,10,11,.,.,.,15:1 1/2:1,2,3,4,.,6,7,.,.,10,11,.,.,.,15:1
Expand Down
4 changes: 2 additions & 2 deletions test/norm.merge.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
1 105 . TAAACCCTAAA TAACCCTAAA 999 PASS INDEL;AN=4;AC=2;DP=19;ISTR=SomeString;XRF=1e+06,500000;XRI=1111,5555;XRS=AAA,DDD;XAF=500000;XAI=5555;XAS=DDD;XGF=1e+06,500000,9e+09;XGI=1111,5555,9999;XGS=A,E,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 0/1:1,4,6:1:1e+06,500000:1111,5555:AAAA,CC:500000:5555:BB:1e+06,500000,9e+09:1111,5555,9999:A,EEEE,FFFFF 0/1:1,4,6:1:1e+06,500000:1111,5555:AAAA,CC:500000:5555:BB:1e+06,500000,9e+09:1111,5555,9999:A,EEEE,FFFFF
2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
2 114 . TC TTCC 999 FAIL1 INDEL;AN=4;AC=2 GT:DP 1/0:1 1/0:1
2 114 . TC TTC 999 PASS INDEL;AN=4;AC=2 GT:DP 0/1:1 0/1:1
2 114 . TC TTCC 999 FAIL1 INDEL;AN=4;AC=2 GT:DP:FGS 1/0:1:A,BB,CCC 1/0:1:AA,BB,CCC
2 114 . TC TTC 999 PASS INDEL;AN=4;AC=2 GT:DP:FGS 0/1:1:A,EEEE,FFFFF 0/1:1:AA,EEEE,FFFFF
2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1
20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1
20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0
Expand Down
2 changes: 1 addition & 1 deletion vcfmerge.c
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@ int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst
}
if ( ith_src!=isrc ) return -1; // requested field not found
int end_src = start_src;
while ( end_src<src_len && src[end_src]!=',' ) end_src++;
while ( end_src<src_len && src[end_src] && src[end_src]!=',' ) end_src++;

int nsrc_cpy = end_src - start_src;
if ( nsrc_cpy==1 && src[start_src]=='.' ) return 0; // don't write missing values, dst is already initialized
Expand Down
2 changes: 1 addition & 1 deletion vcfnorm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1287,7 +1287,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
{
kstring_t *tmp = &args->tmp_str[i];
kputsn(tmp->s,tmp->l,&str);
for (j=tmp->l; j<max_len; j++) kputc(0,tmp);
for (j=tmp->l; j<max_len; j++) kputc('\0',&str);
}
args->ntmp_arr2 = str.m;
args->tmp_arr2 = (uint8_t*)str.s;
Expand Down

0 comments on commit 5d25295

Please sign in to comment.