Skip to content

Commit 55e53a9

Browse files
committed
Add support for querying genotype fields to +split-vep
so that queries like this are possible: bcftools +split-vep -s worst -f'[%POS\t%SAMPLE\t%GT\t%Consequence\n]' -i'GT="alt"'
1 parent 7661cc8 commit 55e53a9

File tree

6 files changed

+23
-7
lines changed

6 files changed

+23
-7
lines changed

convert.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,11 +1222,7 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
12221222
else if ( !strcmp("_CHROM_POS_ID",key) ) { fmt->type = T_CHROM_POS_ID; }
12231223
else if ( !strcmp("RSX",key) ) { fmt->type = T_RSX; }
12241224
else if ( !strcmp("VKX",key) ) { fmt->type = T_VKX; }
1225-
else if ( id>=0 && bcf_hdr_idinfo_exists(convert->header,BCF_HL_INFO,id) )
1226-
{
1227-
fmt->type = T_INFO;
1228-
fprintf(stderr,"Warning: Assuming INFO/%s\n", key);
1229-
}
1225+
else if ( id>=0 && bcf_hdr_idinfo_exists(convert->header,BCF_HL_INFO,id) ) { fmt->type = T_INFO; }
12301226
}
12311227
if ( fmt->type==T_PBINOM )
12321228
{
@@ -1568,7 +1564,7 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
15681564
for (js=0; js<convert->nsamples; js++)
15691565
{
15701566
// Skip samples when filtering was requested
1571-
if ( *convert->subset_samples && !(*convert->subset_samples)[js] ) continue;
1567+
if ( convert->subset_samples && *convert->subset_samples && !(*convert->subset_samples)[js] ) continue;
15721568

15731569
// Here comes a hack designed for TBCSQ. When running on large files,
15741570
// such as 1000GP, there are too many empty fields in the output and

plugins/split-vep.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ typedef struct
9494
*cols_csq; // the current CSQ transcript split into fields
9595
int min_severity, max_severity; // ignore consequences outside this severity range
9696
int select_tr; // one of SELECT_TR_*
97+
uint8_t *smpl_pass; // for filtering at sample level, used with -f
9798
}
9899
args_t;
99100

@@ -326,6 +327,8 @@ static void init_data(args_t *args)
326327
args->filter = filter_init(args->hdr_out, args->filter_str);
327328
max_unpack |= filter_max_unpack(args->filter);
328329
args->sr->max_unpack = max_unpack;
330+
if ( max_unpack & BCF_UN_FMT )
331+
convert_set_option(args->convert, subset_samples, &args->smpl_pass);
329332
}
330333

331334
// Severity scale
@@ -575,7 +578,7 @@ static void process_record(args_t *args, bcf1_t *rec)
575578
}
576579
if ( args->filter )
577580
{
578-
int pass = filter_test(args->filter, rec, NULL);
581+
int pass = filter_test(args->filter, rec, (const uint8_t**) &args->smpl_pass);
579582
if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
580583
if ( !pass ) return;
581584
}

test/split-vep.3.vcf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
##fileformat=VCFv4.2
2+
##contig=<ID=1,length=249250621>
3+
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|ALLELE_NUM|DISTANCE|STRAND|FLAGS|VARIANT_CLASS|SYMBOL_SOURCE|HGNC_ID|CANONICAL|TSL|APPRIS|CCDS|ENSP|SWISSPROT|TREMBL|UNIPARC|SOURCE|GENE_PHENO|SIFT|PolyPhen|DOMAINS|miRNA|HGVS_OFFSET|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|MAX_AF|MAX_AF_POPS|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE|LoF|LoF_filter|LoF_flags|LoF_info|CADD_PHRED|CADD_RAW|gnomAD2.1|gnomAD2.1_AF_raw|gnomAD2.1_AF_popmax|gnomAD2.1_AF_afr|gnomAD2.1_AF_amr|gnomAD2.1_AF_asj|gnomAD2.1_AF_eas|gnomAD2.1_AF_fin|gnomAD2.1_AF_nfe|gnomAD2.1_AF_oth|gnomAD2.1_AF_sas">
4+
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
5+
##FORMAT=<ID=GT,Number=.,Type=String,Description="Genotype">
6+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SMPLA SMPLB
7+
1 14464 . A T 2235.88 PASS AF=1;CSQ=T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000423562|unprocessed_pseudogene|10/10||ENST00000423562.1:n.1568T>A||1568|||||rs546169444|1||-1||SNV|HGNC|38034||||||||||||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS|||||||||||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000438504|unprocessed_pseudogene|12/12||ENST00000438504.2:n.1682T>A||1682|||||rs546169444|1||-1||SNV|HGNC|38034|YES|||||||||||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS|||||||||||||||||||||||||,T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript||||||||||rs546169444|1|55|1||SNV|HGNC|37102|YES|||||||||||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS|||||||||||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene|11/11||ENST00000488147.1:n.1291T>A||1291|||||rs546169444|1||-1||SNV|HGNC|38034||||||||||||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS|||||||||||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000538476|unprocessed_pseudogene|13/13||ENST00000538476.1:n.1530T>A||1530|||||rs546169444|1||-1||SNV|HGNC|38034||||||||||||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS|||||||||||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000541675|unprocessed_pseudogene|9/9||ENST00000541675.1:n.1315T>A||1315|||||rs546169444|1||-1||SNV|HGNC|38034||||||||||||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS|||||||||||||||||||||||||,T|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000000002|open_chromatin_region||||||||||rs546169444|1||||SNV||||||||||||||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||||||||||| GT 0/0 0/1
8+
1 14469 . C T 37.38 PASS AF=0.5;CSQ=T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000423562|unprocessed_pseudogene|10/10||ENST00000423562.1:n.1563G>A||1563||||||1||-1||SNV|HGNC|38034|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000438504|unprocessed_pseudogene|12/12||ENST00000438504.2:n.1677G>A||1677||||||1||-1||SNV|HGNC|38034|YES||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript|||||||||||1|60|1||SNV|HGNC|37102|YES||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene|11/11||ENST00000488147.1:n.1286G>A||1286||||||1||-1||SNV|HGNC|38034|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000538476|unprocessed_pseudogene|13/13||ENST00000538476.1:n.1525G>A||1525||||||1||-1||SNV|HGNC|38034|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000541675|unprocessed_pseudogene|9/9||ENST00000541675.1:n.1310G>A||1310||||||1||-1||SNV|HGNC|38034|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,T|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000000002|open_chromatin_region|||||||||||1||||SNV||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| GT 0/0 0/1
9+
1 14522 . G A 627.64 PASS AF=0.2;CSQ=A|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000423562|unprocessed_pseudogene|10/10||ENST00000423562.1:n.1510C>T||1510||||||1||-1||SNV|HGNC|38034|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,A|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000438504|unprocessed_pseudogene|12/12||ENST00000438504.2:n.1624C>T||1624||||||1||-1||SNV|HGNC|38034|YES||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript|||||||||||1|113|1||SNV|HGNC|37102|YES||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,A|intron_variant&non_coding_transcript_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene||10/10|ENST00000488147.1:n.1254-21C>T||||||||1||-1||SNV|HGNC|38034|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,A|intron_variant&non_coding_transcript_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000538476|unprocessed_pseudogene||12/12|ENST00000538476.1:n.1492-20C>T||||||||1||-1||SNV|HGNC|38034|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||,A|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000541675|unprocessed_pseudogene|9/9||ENST00000541675.1:n.1257C>T||1257||||||1||-1||SNV|HGNC|38034||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| GT 0/0 0/1

test/split-vep.7.out

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
14464 regulatory_region_variant
2+
14469 regulatory_region_variant
3+
14522 non_coding_transcript_exon_variant

test/split-vep.8.out

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
14464 SMPLB 0/1 regulatory_region_variant
2+
14469 SMPLB 0/1 regulatory_region_variant
3+
14522 SMPLB 0/1 non_coding_transcript_exon_variant

test/test.pl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,8 @@
366366
test_vcf_plugin($opts,in=>'split-vep',out=>'split-vep.4.out',cmd=>'+split-vep',args=>qq[-s primary:missense+ -f'%POS\\n']);
367367
test_vcf_plugin($opts,in=>'split-vep.2',out=>'split-vep.5.out',cmd=>'+split-vep',args=>qq[-s worst -f'%POS\\t%AF\\n']);
368368
test_vcf_plugin($opts,in=>'split-vep.2',out=>'split-vep.6.out',cmd=>'+split-vep',args=>qq[-s worst -f'%POS\\t%INFO/AF\\n']);
369+
test_vcf_plugin($opts,in=>'split-vep.3',out=>'split-vep.7.out',cmd=>'+split-vep',args=>qq[-s worst -f'%POS\\t%Consequence\\n']);
370+
test_vcf_plugin($opts,in=>'split-vep.3',out=>'split-vep.8.out',cmd=>'+split-vep',args=>qq[-s worst -f'[%POS\\t%SAMPLE\\t%GT\\t%Consequence\\n]' -i'GT="alt"']);
369371
test_vcf_concat($opts,in=>['concat.1.a','concat.1.b'],out=>'concat.1.vcf.out',do_bcf=>0,args=>'');
370372
test_vcf_concat($opts,in=>['concat.1.a','concat.1.b'],out=>'concat.1.bcf.out',do_bcf=>1,args=>'');
371373
test_vcf_concat($opts,in=>['concat.2.a','concat.2.b'],out=>'concat.2.vcf.out',do_bcf=>0,args=>'-a');

0 commit comments

Comments
 (0)