Skip to content

Commit

Permalink
code backup; more changes coming later
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Mar 4, 2013
1 parent 733410b commit 59bc934
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 13 deletions.
28 changes: 20 additions & 8 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ mem_opt_t *mem_opt_init()
mem_opt_t *o;
o = calloc(1, sizeof(mem_opt_t));
o->flag = 0;
o->a = 1; o->b = 4; o->q = 6; o->r = 1; o->w = 100;
o->a = 1; o->b = 4; o->q = 6; o->r = 1; o->w = 60; o->max_w = 500;
o->pen_unpaired = 9;
o->pen_clip = 5;
o->min_seed_len = 19;
Expand Down Expand Up @@ -492,7 +492,7 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)

void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
{
int i, k;
int i, k, max_off[2], aw[2]; // aw: actual bandwidth used in extension
int64_t rlen, rmax[2], tmp, max = 0;
const mem_seed_t *s;
uint8_t *rseq = 0;
Expand Down Expand Up @@ -549,40 +549,52 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int

a = kv_pushp(mem_alnreg_t, *av);
memset(a, 0, sizeof(mem_alnreg_t));
a->w = aw[0] = aw[1] = opt->w;

if (s->qbeg) { // left extension
uint8_t *rs, *qs;
int qle, tle, gtle, gscore;
int qle, tle, gtle, gscore, tmps = -1;
qs = malloc(s->qbeg);
for (i = 0; i < s->qbeg; ++i) qs[i] = query[s->qbeg - 1 - i];
tmp = s->rbeg - rmax[0];
rs = malloc(tmp);
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, opt->w, s->len * opt->a, &qle, &tle, &gtle, &gscore);
for (aw[0] = opt->w;; aw[0] <<= 1) {
a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]);
if (bwa_verbose >= 4) printf("L\t%d < %d; w=%d; max_off=%d\n", tmps, a->score, aw[0], max_off[0]); fflush(stdout);
if (a->score == tmps || aw[0]<<1 > opt->max_w || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
tmps = a->score;
}
// check whether we prefer to reach the end of the query
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qb = s->qbeg - qle, a->rb = s->rbeg - tle; // local hits
else a->qb = 0, a->rb = s->rbeg - gtle; // reach the end
free(qs); free(rs);
} else a->score = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;

if (s->qbeg + s->len != l_query) { // right extension
int qle, tle, qe, re, gtle, gscore;
int qle, tle, qe, re, gtle, gscore, tmps = -1;
qe = s->qbeg + s->len;
re = s->rbeg + s->len - rmax[0];
assert(re >= 0);
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, opt->w, a->score, &qle, &tle, &gtle, &gscore);
for (aw[1] = opt->w;; aw[1] <<= 1) {
a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], a->score, &qle, &tle, &gtle, &gscore, &max_off[1]);
if (bwa_verbose >= 4) printf("R\t%d < %d; w=%d; max_off=%d\n", tmps, a->score, aw[1], max_off[1]); fflush(stdout);
if (a->score == tmps || aw[1]<<1 > opt->max_w || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
tmps = a->score;
}
// similar to the above
if (gscore <= 0 || gscore <= a->score - opt->pen_clip) a->qe = qe + qle, a->re = rmax[0] + re + tle;
else a->qe = l_query, a->re = rmax[0] + re + gtle;
} else a->qe = l_query, a->re = s->rbeg + s->len;
if (bwa_verbose >= 4) { printf("[%d] score=%d\t[%d,%d) <=> [%ld,%ld)\n", k, a->score, a->qb, a->qe, (long)a->rb, (long)a->re); fflush(stdout); }
if (bwa_verbose >= 4) { printf("[%d]\taw={%d,%d}\tscore=%d\t[%d,%d) <=> [%ld,%ld)\n", k, aw[0], aw[1], a->score, a->qb, a->qe, (long)a->rb, (long)a->re); fflush(stdout); }

// compute seedcov
for (i = 0, a->seedcov = 0; i < c->n; ++i) {
const mem_seed_t *t = &c->seeds[i];
if (t->qbeg >= a->qb && t->qbeg + t->len <= a->qe && t->rbeg >= a->rb && t->rbeg + t->len <= a->re) // seed fully contained
a->seedcov += t->len; // this is not very accurate, but for approx. mapQ, this is good enough
}
a->w = aw[0] > aw[1]? aw[0] : aw[1];
}
free(srt); free(rseq);
}
Expand Down Expand Up @@ -750,7 +762,7 @@ void mem_sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, b
h.qual = p->secondary >= 0? 0 : mem_approx_mapq_se(opt, p);
if (k == 0) mapq0 = h.qual;
else if (h.qual > mapq0) h.qual = mapq0;
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m);
bwa_hit2sam(&str, opt->mat, opt->q, opt->r, p->w, bns, pac, s, &h, opt->flag&MEM_F_HARDCLIP, m);
}
} else bwa_hit2sam(&str, opt->mat, opt->q, opt->r, opt->w, bns, pac, s, 0, opt->flag&MEM_F_HARDCLIP, m);
s->sam = str.s;
Expand Down
2 changes: 2 additions & 0 deletions bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ typedef struct {
int pen_unpaired; // phred-scaled penalty for unpaired reads
int pen_clip; // clipping penalty. This score is not deducted from the DP score.
int w; // band width
int max_w; // max band width

int flag; // see MEM_F_* macros
int min_seed_len; // minimum seed length
Expand All @@ -45,6 +46,7 @@ typedef struct {
int sub; // 2nd best SW score
int csub; // SW score of a tandem hit
int sub_n; // approximate number of suboptimal hits
int w; // actual band width used in extension
int seedcov; // length of regions coverged by seeds
int secondary; // index of the parent hit shadowing the current hit; <0 if primary
} mem_alnreg_t;
Expand Down
13 changes: 9 additions & 4 deletions ksw.c
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,11 @@ typedef struct {
int32_t h, e;
} eh_t;

int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore)
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off)
{
eh_t *eh; // score array
int8_t *qp; // query profile
int i, j, k, gapoe = gapo + gape, beg, end, max, max_i, max_j, max_gap, max_ie, gscore;
int i, j, k, gapoe = gapo + gape, beg, end, max, max_i, max_j, max_gap, max_ie, gscore, max_off;
if (h0 < 0) h0 = 0;
// allocate memory
qp = malloc(qlen * m);
Expand All @@ -386,6 +386,7 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
w = w < max_gap? w : max_gap;
// DP loop
max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1;
max_off = 0;
beg = 0, end = qlen;
for (i = 0; LIKELY(i < tlen); ++i) {
int f = 0, h1, m = 0, mj = -1;
Expand All @@ -410,7 +411,7 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
h = h > e? h : e;
h = h > f? h : f;
h1 = h; // save H(i,j) to h1 for the next column
mj = m > h? mj : j;
mj = m > h? mj : j; // record the position where max score is achieved
m = m > h? m : h; // m is stored at eh[mj+1]
h -= gapoe;
h = h > 0? h : 0;
Expand All @@ -426,7 +427,10 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
gscore = gscore > h1? gscore : h1;
}
if (m == 0) break;
if (m > max) max = m, max_i = i, max_j = mj;
if (m > max) {
max = m, max_i = i, max_j = mj;
max_off = max_off > abs(mj - i)? max_off : abs(mj - i);
}
// update beg and end for the next round
for (j = mj; j >= beg && eh[j].h; --j);
beg = j + 1;
Expand All @@ -439,6 +443,7 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
if (_tle) *_tle = max_i + 1;
if (_gtle) *_gtle = max_ie + 1;
if (_gscore) *_gscore = gscore;
if (_max_off) *_max_off = max_off;
return max;
}

Expand Down
2 changes: 1 addition & 1 deletion ksw.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ extern "C" {
*
* @return best semi-local alignment score
*/
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *qle, int *tle, int *gtle, int *gscore);
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);

#ifdef __cplusplus
}
Expand Down

0 comments on commit 59bc934

Please sign in to comment.