Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 1 addition & 38 deletions cram/cram_encode.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "cram/cram.h"
#include "cram/os.h"
#include "sam_internal.h" // for nibble2base
#include "htslib/hts.h"
#include "htslib/hts_endian.h"

Expand Down Expand Up @@ -2681,44 +2682,6 @@ static cram_container *cram_next_container(cram_fd *fd, bam_seq_t *b) {
return c;
}

/*
* Convert a nibble encoded BAM sequence to a string of bases.
*
* We do this 2 bp at a time for speed. Equiv to:
*
* for (i = 0; i < len; i++)
* seq[i] = seq_nt16_str[bam_seqi(nib, i)];
*/
static void nibble2base(uint8_t *nib, char *seq, int len) {
static const char code2base[512] =
"===A=C=M=G=R=S=V=T=W=Y=H=K=D=B=N"
"A=AAACAMAGARASAVATAWAYAHAKADABAN"
"C=CACCCMCGCRCSCVCTCWCYCHCKCDCBCN"
"M=MAMCMMMGMRMSMVMTMWMYMHMKMDMBMN"
"G=GAGCGMGGGRGSGVGTGWGYGHGKGDGBGN"
"R=RARCRMRGRRRSRVRTRWRYRHRKRDRBRN"
"S=SASCSMSGSRSSSVSTSWSYSHSKSDSBSN"
"V=VAVCVMVGVRVSVVVTVWVYVHVKVDVBVN"
"T=TATCTMTGTRTSTVTTTWTYTHTKTDTBTN"
"W=WAWCWMWGWRWSWVWTWWWYWHWKWDWBWN"
"Y=YAYCYMYGYRYSYVYTYWYYYHYKYDYBYN"
"H=HAHCHMHGHRHSHVHTHWHYHHHKHDHBHN"
"K=KAKCKMKGKRKSKVKTKWKYKHKKKDKBKN"
"D=DADCDMDGDRDSDVDTDWDYDHDKDDDBDN"
"B=BABCBMBGBRBSBVBTBWBYBHBKBDBBBN"
"N=NANCNMNGNRNSNVNTNWNYNHNKNDNBNN";

int i, len2 = len/2;
seq[0] = 0;

for (i = 0; i < len2; i++)
// Note size_t cast helps gcc optimiser.
memcpy(&seq[i*2], &code2base[(size_t)nib[i]*2], 2);

if ((i *= 2) < len)
seq[i] = seq_nt16_str[bam_seqi(nib, i)];
}

/*
* Converts a single bam record into a cram record.
* Possibly used within a thread.
Expand Down
21 changes: 10 additions & 11 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -3034,22 +3034,21 @@ static int sam_format1_append(const bam_hdr_t *h, const bam1_t *b, kstring_t *st
uint8_t *s = bam_get_seq(b);
if (ks_resize(str, str->l+2+2*c->l_qseq) < 0) goto mem_err;
char *cp = str->s + str->l;
int lq2 = c->l_qseq / 2;
for (i = 0; i < lq2; i++) {
uint8_t b = s[i];
cp[i*2+0] = "=ACMGRSVTWYHKDBN"[b>>4];
cp[i*2+1] = "=ACMGRSVTWYHKDBN"[b&0xf];
}
for (i *= 2; i < c->l_qseq; ++i)
cp[i] = "=ACMGRSVTWYHKDBN"[bam_seqi(s, i)];
cp[i++] = '\t';
cp += i;

// Sequence, 2 bases at a time
nibble2base(s, cp, c->l_qseq);
cp[c->l_qseq] = '\t';
cp += c->l_qseq+1;

// Quality
s = bam_get_qual(b);
i = 0;
if (s[0] == 0xff) {
cp[i++] = '*';
} else {
for (i = 0; i < c->l_qseq; ++i)
// local copy of c->l_qseq to aid unrolling
uint32_t lqseq = c->l_qseq;
for (i = 0; i < lqseq; ++i)
cp[i]=s[i]+33;
}
cp[i] = 0;
Expand Down
37 changes: 37 additions & 0 deletions sam_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,43 @@ static inline int possibly_expand_bam_data(bam1_t *b, size_t bytes) {
return sam_realloc_bam_data(b, new_len);
}

/*
* Convert a nibble encoded BAM sequence to a string of bases.
*
* We do this 2 bp at a time for speed. Equiv to:
*
* for (i = 0; i < len; i++)
* seq[i] = seq_nt16_str[bam_seqi(nib, i)];
*/
static inline void nibble2base(uint8_t *nib, char *seq, int len) {
static const char code2base[512] =
"===A=C=M=G=R=S=V=T=W=Y=H=K=D=B=N"
"A=AAACAMAGARASAVATAWAYAHAKADABAN"
"C=CACCCMCGCRCSCVCTCWCYCHCKCDCBCN"
"M=MAMCMMMGMRMSMVMTMWMYMHMKMDMBMN"
"G=GAGCGMGGGRGSGVGTGWGYGHGKGDGBGN"
"R=RARCRMRGRRRSRVRTRWRYRHRKRDRBRN"
"S=SASCSMSGSRSSSVSTSWSYSHSKSDSBSN"
"V=VAVCVMVGVRVSVVVTVWVYVHVKVDVBVN"
"T=TATCTMTGTRTSTVTTTWTYTHTKTDTBTN"
"W=WAWCWMWGWRWSWVWTWWWYWHWKWDWBWN"
"Y=YAYCYMYGYRYSYVYTYWYYYHYKYDYBYN"
"H=HAHCHMHGHRHSHVHTHWHYHHHKHDHBHN"
"K=KAKCKMKGKRKSKVKTKWKYKHKKKDKBKN"
"D=DADCDMDGDRDSDVDTDWDYDHDKDDDBDN"
"B=BABCBMBGBRBSBVBTBWBYBHBKBDBBBN"
"N=NANCNMNGNRNSNVNTNWNYNHNKNDNBNN";

int i, len2 = len/2;
seq[0] = 0;

for (i = 0; i < len2; i++)
// Note size_t cast helps gcc optimiser.
memcpy(&seq[i*2], &code2base[(size_t)nib[i]*2], 2);

if ((i *= 2) < len)
seq[i] = seq_nt16_str[bam_seqi(nib, i)];
}

#ifdef __cplusplus
}
Expand Down