diff --git a/consensus.c b/consensus.c index 454c2dda6..b95e1d678 100644 --- a/consensus.c +++ b/consensus.c @@ -76,6 +76,7 @@ typedef struct int fa_src_pos; // last genomic coordinate read from the input fasta (0-based) char prev_base; // this is only to validate the REF allele in the VCF - the modified fa_buf cannot be used for inserts following deletions, see 600#issuecomment-383186778 int prev_base_pos; // the position of prev_base + int prev_is_insert; rbuf_t vcf_rbuf; bcf1_t **vcf_buf; @@ -531,11 +532,18 @@ static void apply_variant(args_t *args, bcf1_t *rec) ialt = 1; } - // Overlapping variant? Can be still OK iff this is an insertion - if ( rec->pos <= args->fa_frz_pos && (rec->pos!=args->fa_frz_pos || rec->d.allele[0][0]!=rec->d.allele[ialt][0]) ) + // Overlapping variant? + if ( rec->pos <= args->fa_frz_pos ) { - fprintf(stderr,"The site %s:%d overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),rec->pos+1); - return; + // Can be still OK iff this is an insertion (and which does not follow another insertion, see #888). + // This still may not be enough for more complicated cases with multiple duplicate positions + // and other types in between. In such case let the user normalize the VCF and remove duplicates. + if ( !(bcf_get_variant_type(rec,ialt) & VCF_INDEL) || rec->d.var[ialt].n <= 0 || args->prev_is_insert ) + { + fprintf(stderr,"The site %s:%d overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),rec->pos+1); + return; + } + } int len_diff = 0, alen = 0; @@ -592,7 +600,7 @@ static void apply_variant(args_t *args, bcf1_t *rec) } error( "The fasta sequence does not match the REF allele at %s:%d:\n" - " .vcf: [%s]\n" + " .vcf: [%s] <- (REF)\n" " .vcf: [%s] <- (ALT)\n" " .fa: [%s]%c%s\n", bcf_seqname(args->hdr,rec),rec->pos+1, rec->d.allele[0], rec->d.allele[ialt], args->fa_buf.s+idx, @@ -624,9 +632,13 @@ static void apply_variant(args_t *args, bcf1_t *rec) args->prev_base = rec->d.allele[0][rec->rlen - 1]; args->prev_base_pos = rec->pos + rec->rlen - 1; + args->prev_is_insert = 0; } else { + args->prev_is_insert = 1; + args->prev_base_pos = rec->pos; + // insertion ks_resize(&args->fa_buf, args->fa_buf.l + len_diff); memmove(args->fa_buf.s + idx + rec->rlen + len_diff, args->fa_buf.s + idx + rec->rlen, args->fa_buf.l - idx - rec->rlen); diff --git a/test/consensus5.fa b/test/consensus5.fa new file mode 100644 index 000000000..8dabca94a --- /dev/null +++ b/test/consensus5.fa @@ -0,0 +1,2 @@ +>4:86698932-86698942 +GTGAATGAATG diff --git a/test/consensus5.out b/test/consensus5.out new file mode 100644 index 000000000..2dae82853 --- /dev/null +++ b/test/consensus5.out @@ -0,0 +1,2 @@ +>4:86698932-86698942 +GTGAATGAATGAATG diff --git a/test/consensus5.vcf b/test/consensus5.vcf new file mode 100644 index 000000000..045a24096 --- /dev/null +++ b/test/consensus5.vcf @@ -0,0 +1,7 @@ +##fileformat=VCFv4.1 +##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz +##contig= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00119 +4 86698932 rs34791496 G GTGAA 100 PASS . GT 0|1 +4 86698932 rs538589807;rs569662225 GTGAA GTGAATGAA,G 100 PASS . GT 0|1 diff --git a/test/test.pl b/test/test.pl index ce30c2652..21a40fbfa 100755 --- a/test/test.pl +++ b/test/test.pl @@ -393,6 +393,7 @@ test_vcf_consensus($opts,in=>'consensus3',out=>'consensus3.out',fa=>'consensus2.fa',args=>'-H 2 -M "?"'); test_vcf_consensus($opts,in=>'consensus3',out=>'consensus3.2.out',fa=>'consensus2.fa',args=>'-H 2 -M "?" -p xx_'); test_vcf_consensus($opts,in=>'consensus4',out=>'consensus4.out',fa=>'consensus2.fa',args=>''); +test_vcf_consensus($opts,in=>'consensus5',out=>'consensus5.out',fa=>'consensus5.fa',args=>'--haplotype LA'); test_mpileup($opts,in=>[qw(mpileup.1 mpileup.2 mpileup.3)],out=>'mpileup/mpileup.1.out',args=>q[-r17:100-150],test_list=>1); test_mpileup($opts,in=>[qw(mpileup.1 mpileup.2 mpileup.3)],out=>'mpileup/mpileup.2.out',args=>q[-a DP,DV -r17:100-600]); # test files from samtools mpileup test suite test_mpileup($opts,in=>[qw(mpileup.1)],out=>'mpileup/mpileup.3.out',args=>q[-B --ff 0x14 -r17:1050-1060]); # test file converted to vcf from samtools mpileup test suite