Skip to content

Commit

Permalink
updated mappy and example.c
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Oct 16, 2017
1 parent adf6cd7 commit b24c9c9
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 15 deletions.
3 changes: 1 addition & 2 deletions example.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ int main(int argc, char *argv[])
mm_reg1_t *r = &reg[j];
assert(r->p); // with MM_F_CIGAR, this should not be NULL
printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]);
printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re,
r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen, r->mapq);
printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq);
for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings!
printf("%d%c", r->p->cigar[i]>>4, "MIDSHN"[r->p->cigar[i]&0xf]);
putchar('\n');
Expand Down
10 changes: 7 additions & 3 deletions python/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,13 @@ properties:

* **mapq**: mapping quality

* **NM**: number of mismatches and gaps in the alignment

* **blen**: length of the alignment, including both alignment matches and gaps
but excluding ambiguous bases.

* **mlen**: length of the matching bases in the alignment, excluding ambiguous
base matches.

* **NM**: number of mismatches, gaps and ambiguous poistions in the alignment

* **trans_strand**: transcript strand. +1 if on the forward strand; -1 if on the
reverse strand; 0 if unknown
Expand All @@ -124,7 +128,7 @@ the following format:

::

q_st q_en strand ctg ctg_len r_st r_en blen-NM blen mapq cg:Z:cigar_str
q_st q_en strand ctg ctg_len r_st r_en mlen blen mapq cg:Z:cigar_str

It is effectively the PAF format without the QueryName and QueryLength columns
(the first two columns in PAF).
Expand Down
7 changes: 4 additions & 3 deletions python/cmappy.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ typedef struct {
const char *ctg;
int32_t ctg_start, ctg_end;
int32_t qry_start, qry_end;
int32_t blen, NM, ctg_len;
int32_t blen, mlen, NM, ctg_len;
uint8_t mapq, is_primary;
int8_t strand, trans_strand;
int32_t n_cigar32;
Expand All @@ -27,8 +27,9 @@ static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
h->qry_start = r->qs, h->qry_end = r->qe;
h->strand = r->rev? -1 : 1;
h->mapq = r->mapq;
h->blen = r->p->blen;
h->NM = r->p->n_diff;
h->mlen = r->mlen;
h->blen = r->blen;
h->NM = r->blen - r->mlen + r->p->n_ambi;
h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
h->is_primary = (r->id == r->parent);
h->n_cigar32 = r->p->n_cigar;
Expand Down
5 changes: 4 additions & 1 deletion python/cmappy.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,20 @@ cdef extern from "minimap.h":
int flag
int bw
int max_gap, max_gap_ref
int max_frag_len
int max_chain_skip
int min_cnt
int min_chain_score
float mask_level
float pri_ratio
int best_n
float min_iden
int max_join_long, max_join_short
int min_join_flank_sc
int a, b, q, e, q2, e2
int noncan
int zdrop
int end_bonus
int min_dp_max
int min_ksw_len
int pe_ori, pe_bonus
Expand Down Expand Up @@ -86,7 +89,7 @@ cdef extern from "cmappy.h":
const char *ctg
int32_t ctg_start, ctg_end
int32_t qry_start, qry_end
int32_t blen, NM, ctg_len
int32_t blen, mlen, NM, ctg_len
uint8_t mapq, is_primary
int8_t strand, trans_strand
int32_t n_cigar32
Expand Down
16 changes: 11 additions & 5 deletions python/mappy.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ cmappy.mm_reset_timer()
cdef class Alignment:
cdef int _ctg_len, _r_st, _r_en
cdef int _q_st, _q_en
cdef int _NM, _blen
cdef int _NM, _mlen, _blen
cdef int8_t _strand, _trans_strand
cdef uint8_t _mapq, _is_primary
cdef _ctg, _cigar # these are python objects

def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, blen, NM, trans_strand):
def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand):
self._ctg, self._ctg_len, self._r_st, self._r_en = str(ctg), cl, cs, ce
self._strand, self._q_st, self._q_en = strand, qs, qe
self._NM, self._blen = NM, blen
self._NM, self._mlen, self._blen = NM, mlen, blen
self._mapq = mapq
self._cigar = cigar
self._is_primary = is_primary
Expand All @@ -39,6 +39,12 @@ cdef class Alignment:
@property
def trans_strand(self): return self._trans_strand

@property
def blen(self): return self._blen

@property
def mlen(self): return self._mlen

@property
def NM(self): return self._NM

Expand Down Expand Up @@ -71,7 +77,7 @@ cdef class Alignment:
elif self._trans_strand < 0: ts = 'ts:A:-'
else: ts = 'ts:A:.'
return "\t".join([str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en),
str(self._blen - self._NM), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str])
str(self._mlen), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str])

cdef class ThreadBuffer:
cdef cmappy.mm_tbuf_t *_b
Expand Down Expand Up @@ -135,7 +141,7 @@ cdef class Aligner:
for k in range(h.n_cigar32):
c = h.cigar32[k]
cigar.append([c>>4, c&0xf])
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.blen, h.NM, h.trans_strand)
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand)
cmappy.mm_free_reg1(&regs[i])
free(regs)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def readme():
keywords = 'sequence-alignment',
scripts = ['python/minimap2.py'],
ext_modules = [Extension('mappy',
sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c',
sources = [module_src, 'align.c', 'bseq.c', 'chain.c', 'format.c', 'hit.c', 'index.c', 'pe.c',
'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c',
'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c'],
depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',
Expand Down

0 comments on commit b24c9c9

Please sign in to comment.