-
Notifications
You must be signed in to change notification settings - Fork 3
/
base_bwt.cpp
92 lines (72 loc) · 2.15 KB
/
base_bwt.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
//C headers
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/stat.h>
//C++ headers
#include <fstream>
#include <string>
#include <vector>
//Custom headers
#include "string_util.h"
#include "rle_bwt.h"
using namespace std;
BaseBWT::BaseBWT() {
}
void BaseBWT::constructTotalCounts() {
//the setup
this->totalCounts = vector<uint64_t>(VC_LEN, 0);
uint8_t prevChar = 255;
uint8_t currentChar;
uint64_t powerMultiple = 1;
uint64_t bwtSize = this->bwt.size();
uint64_t currentCount;
//go through each run and add the symbol counts
for(uint64_t x = 0; x < bwtSize; x++) {
currentChar = this->bwt[x] & MASK;
if(currentChar == prevChar) {
powerMultiple *= NUM_POWER;
}
else {
powerMultiple = 1;
}
prevChar = currentChar;
currentCount = (this->bwt[x] >> LETTER_BITS)* powerMultiple;
this->totalCounts[currentChar] += currentCount;
}
}
void BaseBWT::constructIndexing() {
this->startIndex = vector<uint64_t>(VC_LEN, 0);
this->endIndex = vector<uint64_t>(VC_LEN, 0);
uint64_t pos = 0;
for(uint64_t x = 0; x < VC_LEN; x++) {
this->startIndex[x] = pos;
pos += this->totalCounts[x];
this->endIndex[x] = pos;
}
this->totalSize = pos;
}
BaseBWT::~BaseBWT() {
}
uint64_t BaseBWT::countKmer(uint8_t * kmer, uint64_t kmerSize) {
bwtRange ret;
ret.l = 0;
ret.h = this->totalSize;
for(int64_t x = kmerSize-1; x >= 0 && ret.l != ret.h; x--) {
ret = this->constrainRange(kmer[x], ret);
}
return ret.h-ret.l;
}
vector<uint64_t> BaseBWT::countPileup_i(vector<uint8_t> seq, uint64_t kmerSize) {
uint64_t seqLen = seq.size();
if(seqLen < kmerSize) {
return vector<uint64_t>(0);
}
uint64_t numCounts = seqLen-kmerSize+1;
vector<uint64_t> ret = vector<uint64_t>(numCounts);
vector<uint8_t> revComp = string_util::reverseComplement_i(seq);
for(uint64_t x = 0; x < numCounts; x++) {
ret[x] = this->countKmer(&seq[x], kmerSize)+this->countKmer(&revComp[seqLen-kmerSize-x], kmerSize);
}
return ret;
}