Skip to content

Commit

Permalink
call: First step towards revised -m calling (include a prior).
Browse files Browse the repository at this point in the history
  • Loading branch information
pd3 committed Apr 22, 2014
1 parent 94877a1 commit f273772
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 3 deletions.
3 changes: 2 additions & 1 deletion call.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ typedef struct
int32_t *ugts, *cgts; // unconstraind and constrained GTs

// ccall only
double indel_frac, theta, min_lrt, min_perm_p;
double indel_frac, min_lrt, min_perm_p;
double prior_type, pref;
double ref_lk, lk_sum;
int ngrp1_samples, n_perm;
Expand All @@ -93,6 +93,7 @@ typedef struct
int32_t *gts, ac[4]; // GTs and AC (w)
double *pdg; // PLs converted to P(D|G)
float *anno16; int n16; // see anno[16] in bam2bcf.h
double theta; // prior
}
call_t;

Expand Down
3 changes: 2 additions & 1 deletion ccall.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
{
tmpi = p1->cons_llr;
bcf_update_info_int32(call->hdr, rec, "CLR", &tmpi, 1);
// todo: trio calling with -c
if (p1->cons_gt > 0)
{
char tmp[4];
Expand Down Expand Up @@ -301,7 +302,7 @@ int ccall(call_t *call, bcf1_t *rec)
hts_expand(double, 3*nsmpl, call->npdg, call->pdg);
set_pdg3(call->pl2p, call->PLs, call->pdg, nsmpl, ngts);

double em[10];
double em[10] = {-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.,-1.};
int ret = bcf_em1(call, rec, call->ngrp1_samples, 0x1ff, em);

bcf_p1rst_t pr;
Expand Down
18 changes: 18 additions & 0 deletions mcall.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,21 @@ void mcall_init(call_t *call)
bcf_hdr_append(call->hdr,"##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases\">");
bcf_hdr_append(call->hdr,"##INFO=<ID=MQ,Number=1,Type=Integer,Description=\"Average mapping quality\">");

// init the prior
if ( call->theta>0 )
{
int i, n = 0;
if ( !call->ploidy ) n = 2*bcf_hdr_nsamples(call->hdr); // all are diploid
else
{
for (i=0; i<bcf_hdr_nsamples(call->hdr); i++)
n += call->ploidy[i];
}
double aM = 0; // watterson factor
for (i=1; i<n; i++) aM += 1./i;
call->theta = log(call->theta*aM);
}

return;
}

Expand Down Expand Up @@ -441,6 +456,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
pdg += ngts;
}
if ( ia==0 ) ref_lk = lk_tot; // likelihood of 0/0 for all samples
else lk_tot += call->theta; // the prior
UPDATE_MAX_LKs(1<<ia);
}

Expand Down Expand Up @@ -471,6 +487,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
pdg += ngts;
}
if ( ia!=0 || ib!=0 ) lk_tot += call->theta; // the prior
UPDATE_MAX_LKs(1<<ia|1<<ib);
}
}
Expand Down Expand Up @@ -510,6 +527,7 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
pdg += ngts;
}
if ( ia!=0 || ib!=0 || ic!=0 ) lk_tot += call->theta; // the prior
UPDATE_MAX_LKs(1<<ia|1<<ib|1<<ic);
}
}
Expand Down
5 changes: 4 additions & 1 deletion vcfcall.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ static void usage(args_t *args)
fprintf(stderr, " -m, --multiallelic-caller alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
fprintf(stderr, " -n, --novel-rate <float>,[...] likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
fprintf(stderr, " -p, --pval-threshold <float> variant if P(ref|D)<FLOAT with -c [0.5] or another allele accepted if P(chi^2)>=1-FLOAT with -m [1e-2]\n");
fprintf(stderr, " -P, --prior <float> mutation rate, 0 for no prior [1e-3]\n");
fprintf(stderr, " -X, --chromosome-X haploid output for male samples (requires PED file with -s)\n");
fprintf(stderr, " -Y, --chromosome-Y haploid output for males and skips females (requires PED file with -s)\n");

Expand Down Expand Up @@ -445,13 +446,14 @@ int main_vcfcall(int argc, char *argv[])
{"constrain",1,0,'C'},
{"multiallelic-caller",0,0,'m'},
{"pval-threshold",1,0,'p'},
{"prior",1,0,'P'},
{"chromosome-X",0,0,'X'},
{"chromosome-Y",0,0,'Y'},
{"novel-rate",1,0,'n'},
{0,0,0,0}
};

while ((c = getopt_long(argc, argv, "h?O:r:R:s:S:t:T:ANMV:vcmp:C:XYn:", loptions, NULL)) >= 0)
while ((c = getopt_long(argc, argv, "h?O:r:R:s:S:t:T:ANMV:vcmp:C:XYn:P:", loptions, NULL)) >= 0)
{
switch (c)
{
Expand Down Expand Up @@ -483,6 +485,7 @@ int main_vcfcall(int argc, char *argv[])
break;
case 'm': args.flag |= CF_MCALL; break; // multiallelic calling method
case 'p': p_arg = atof(optarg); break;
case 'P': args.aux.theta = atof(optarg); break;
case 'n': parse_novel_rate(&args,optarg); break;
case 'r': args.regions = optarg; break;
case 'R': args.regions = optarg; args.regions_is_file = 1; break;
Expand Down

0 comments on commit f273772

Please sign in to comment.