Skip to content

Commit

Permalink
New cnv command for copy number variation
Browse files Browse the repository at this point in the history
  • Loading branch information
pd3 committed Sep 2, 2014
1 parent 34f212f commit 93f17fe
Show file tree
Hide file tree
Showing 5 changed files with 958 additions and 29 deletions.
109 changes: 83 additions & 26 deletions HMM.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,6 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
int i,j, nstates = hmm->nstates;
for (i=0; i<nstates; i++) hmm->vprob[i] = 1./nstates;


//fprintf(stderr,"DEBUG: ");
//for (i=0; i<2; i++)
//{
// for (j=0; j<2; j++) fprintf(stderr," %e", MAT(hmm->tprob_arr,2,i,j));
//}
//fprintf(stderr,"\n");

// Run Viterbi
uint32_t prev_pos = sites[0];
for (i=0; i<n; i++)
Expand All @@ -138,6 +130,7 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)

_set_tprob(hmm, pos_diff);
if ( hmm->set_tprob ) hmm->set_tprob(hmm, prev_pos, sites[i], hmm->set_tprob_data);
prev_pos = sites[i];

double vnorm = 0;
for (j=0; j<nstates; j++)
Expand All @@ -155,24 +148,6 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
}
for (j=0; j<nstates; j++) hmm->vprob_tmp[j] /= vnorm;
double *tmp = hmm->vprob; hmm->vprob = hmm->vprob_tmp; hmm->vprob_tmp = tmp;

#if 0
fprintf(stderr,"%d: vprob=", sites[i]+1);
for (j=0; j<nstates; j++) fprintf(stderr," %f", hmm->vprob[j]);
fprintf(stderr,"\teprob=");
for (j=0; j<nstates; j++) fprintf(stderr," %f", eprob[j]);
fprintf(stderr,"\tvpath=");
for (j=0; j<nstates; j++) fprintf(stderr," %d", vpath[j]);
fprintf(stderr,"\ttprob=");
for (j=0; j<nstates; j++)
{
int k;
for (k=0; k<nstates; k++) fprintf(stderr," %f", MAT(hmm->curr_tprob,hmm->nstates,j,k));
}
fprintf(stderr,"\t %d\n", pos_diff);
#endif

prev_pos = sites[i];
}

// Find the most likely state
Expand All @@ -189,6 +164,85 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
}
}

void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
{
// Init arrays when run for the first time
if ( hmm->nsites < n )
{
hmm->nsites = n;
hmm->fwd = (double*) realloc(hmm->fwd, sizeof(double)*(hmm->nsites+1)*hmm->nstates);
}
if ( !hmm->fwd )
{
hmm->fwd = (double*) malloc(sizeof(double)*hmm->nstates*(hmm->nsites+1));
hmm->bwd = (double*) malloc(sizeof(double)*hmm->nstates);
hmm->bwd_tmp = (double*) malloc(sizeof(double)*hmm->nstates);
}


// Init all states with equal likelihood
int i,j,k, nstates = hmm->nstates;
for (i=0; i<nstates; i++) hmm->fwd[i] = 1./hmm->nstates;
for (i=0; i<nstates; i++) hmm->bwd[i] = 1./hmm->nstates;

// Run fwd
uint32_t prev_pos = sites[0];
for (i=0; i<n; i++)
{
double *fwd_prev = &hmm->fwd[i*nstates];
double *fwd = &hmm->fwd[(i+1)*nstates];
double *eprob = &eprobs[i*nstates];

int pos_diff = sites[i] == prev_pos ? 0 : sites[i] - prev_pos - 1;

_set_tprob(hmm, pos_diff);
if ( hmm->set_tprob ) hmm->set_tprob(hmm, prev_pos, sites[i], hmm->set_tprob_data);
prev_pos = sites[i];

double norm = 0;
for (j=0; j<nstates; j++)
{
double pval = 0;
for (k=0; k<nstates; k++)
pval += fwd_prev[k] * MAT(hmm->curr_tprob,hmm->nstates,j,k);
fwd[j] = pval * eprob[j];
norm += fwd[j];
}
for (j=0; j<nstates; j++) fwd[j] /= norm;
}

// Run bwd
double *bwd = hmm->bwd, *bwd_tmp = hmm->bwd_tmp;
prev_pos = sites[n-1];
for (i=0; i<n; i++)
{
double *fwd = &hmm->fwd[(n-i)*nstates];
double *eprob = &eprobs[(n-i-1)*nstates];

int pos_diff = sites[n-i-1] == prev_pos ? 0 : prev_pos - sites[n-i-1] - 1;

_set_tprob(hmm, pos_diff);
if ( hmm->set_tprob ) hmm->set_tprob(hmm, sites[n-i-1], prev_pos, hmm->set_tprob_data);
prev_pos = sites[n-i-1];

double norm = 0;
for (j=0; j<nstates; j++)
{
double pval = 0;
for (k=0; k<nstates; k++)
pval += bwd[k] * eprob[k] * MAT(hmm->curr_tprob,hmm->nstates,k,j);
bwd_tmp[j] = pval;
norm += pval;
}
for (j=0; j<nstates; j++)
{
bwd_tmp[j] /= norm;
fwd[j] *= bwd_tmp[j]; // fwd now stores fwd*bwd
}
double *tmp = bwd_tmp; bwd_tmp = bwd; bwd = tmp;
}
}

void hmm_destroy(hmm_t *hmm)
{
free(hmm->vprob);
Expand All @@ -197,6 +251,9 @@ void hmm_destroy(hmm_t *hmm)
free(hmm->curr_tprob);
free(hmm->tmp);
free(hmm->tprob_arr);
free(hmm->fwd);
free(hmm->bwd);
free(hmm->bwd_tmp);
free(hmm);
}

13 changes: 12 additions & 1 deletion HMM.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct _hmm_t
double *vprob, *vprob_tmp; // viterbi probs [nstates]
uint8_t *vpath; // viterbi path [nstates*nsites]
double *bwd, *bwd_tmp; // bwd probs [nstates]
double *fwd; // fwd probs [nsites]
double *fwd; // fwd probs [nstates*(nsites+1)]
int nsites;

int ntprob_arr; // number of pre-calculated tprob matrices
Expand Down Expand Up @@ -80,6 +80,17 @@ void hmm_set_tprob_func(hmm_t *hmm, set_tprob_f set_tprob, void *data);
* vpath[nstates*i].
*/
void hmm_run_viterbi(hmm_t *hmm, int nsites, double *eprob, uint32_t *sites);

/**
* hmm_run_fwd_bwd() - run the forward-backward algorithm
* @nsites: number of sites
* @eprob: emission probabilities for each site and state (nsites x nstates)
* @sites: list of positions
*
* When done, hmm->fwd[] contains the calculated fwd*bwd probabilities. The
* probability of i-th state at j-th site can be accessed as fwd[j*nstates+i].
*/
void hmm_run_fwd_bwd(hmm_t *hmm, int nsites, double *eprob, uint32_t *sites);
void hmm_destroy(hmm_t *hmm);

#endif
Expand Down
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ DFLAGS =
OBJS = main.o vcfindex.o tabix.o \
vcfstats.o vcfisec.o vcfmerge.o vcfquery.o vcffilter.o filter.o vcfsom.o \
vcfnorm.o vcfgtcheck.o vcfview.o vcfannotate.o vcfroh.o vcfconcat.o \
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o HMM.o tsv2vcf.o \
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o tsv2vcf.o \
vcfcnv.o HMM.o \
ccall.o em.o prob1.o kmin.o # the original samtools calling
INCLUDES = -I. -I$(HTSDIR)

Expand Down Expand Up @@ -95,6 +96,8 @@ convert_h = convert.h $(htslib_vcf_h)
tsv2vcf_h = tsv2vcf.h $(htslib_vcf_h)
filter_h = filter.h $(htslib_vcf_h)
prob1_h = prob1.h $(htslib_vcf_h) $(call_h)
roh_h = HMM.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(HTSDIR)/htslib/kstring.h $(HTSDIR)/htslib/kseq.h $(bcftools_h)
cnv_h = HMM.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h)

main.o: main.c $(htslib_hts_h) version.h $(bcftools_h)
vcfannotate.o: vcfannotate.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(HTSDIR)/htslib/kseq.h $(bcftools_h) vcmp.h $(filter_h)
Expand All @@ -108,7 +111,8 @@ vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfu
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) vcmp.h $(HTSDIR)/htslib/khash.h
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(bcftools_h) rbuf.h
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h)
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(HTSDIR)/htslib/kstring.h $(HTSDIR)/htslib/kseq.h $(bcftools_h) rbuf.h
vcfroh.o: vcfroh.c $(roh_h)
vcfcnv.o: vcfcnv.c $(cnv_h)
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h)
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h)
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h)
Expand Down
5 changes: 5 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ int main_vcfroh(int argc, char *argv[]);
int main_vcfconcat(int argc, char *argv[]);
int main_reheader(int argc, char *argv[]);
int main_vcfconvert(int argc, char *argv[]);
int main_vcfcnv(int argc, char *argv[]);

typedef struct
{
Expand Down Expand Up @@ -131,6 +132,10 @@ static cmd_t cmds[] =
.alias = "call",
.help = "SNP/indel calling"
},
{ .func = main_vcfcnv,
.alias = "cnv",
.help = "-HMM CNV calling" // do not advertise yet
},
{ .func = main_vcffilter,
.alias = "filter",
.help = "filter VCF/BCF files using fixed thresholds"
Expand Down
Loading

0 comments on commit 93f17fe

Please sign in to comment.