Skip to content

Commit

Permalink
clang-format
Browse files Browse the repository at this point in the history
  • Loading branch information
ldenti committed Oct 20, 2022
1 parent ceabcc3 commit 7e0a613
Show file tree
Hide file tree
Showing 31 changed files with 3,553 additions and 3,314 deletions.
92 changes: 47 additions & 45 deletions assembler.cpp
Original file line number Diff line number Diff line change
@@ -1,55 +1,57 @@
#include "assembler.hpp"

void Assembler::run() {
auto c = Configuration::getInstance();
int num_batches = c->aggregate_batches;
int tau = -1; // c->cutoff;
auto c = Configuration::getInstance();
int num_batches = c->aggregate_batches;
int tau = -1; // c->cutoff;

lprint({"Assembling high-abundance strings from", to_string(num_batches), "batches.."});
#pragma omp parallel for num_threads(c->threads)
for (int j = 0; j < num_batches; j++) {
lprint({"Loading batch", to_string(j) + ".."}) ;
string s_j = std::to_string(j);
string inpath = c->workdir + "/solution_batch_" + s_j + ".sfs";
string outpath = c->workdir + "/solution_batch_" + s_j + ".assembled.sfs";
ofstream outf(outpath);
map<string, vector<SFS>> SFSs = parse_sfsfile(inpath, tau);
//cout << SFSs.size() << "SFS in total." << endl ;
for (map<string, vector<SFS>>::iterator it = SFSs.begin(); it != SFSs.end(); ++it) {
string ridx = it->first;
vector<SFS> sfs = it->second;
vector<SFS> assembled_sfs = assemble(sfs);
bool is_first = true;
for (const SFS &sfs : assembled_sfs) {
outf << (is_first ? ridx : "*") << "\t"
<< "\t" << sfs.s << "\t" << sfs.l << "\t" << sfs.c << endl;
is_first = false;
}
}
outf.close();
lprint({"Assembling high-abundance strings from", to_string(num_batches),
"batches.."});
#pragma omp parallel for num_threads(c->threads)
for (int j = 0; j < num_batches; j++) {
lprint({"Loading batch", to_string(j) + ".."});
string s_j = std::to_string(j);
string inpath = c->workdir + "/solution_batch_" + s_j + ".sfs";
string outpath = c->workdir + "/solution_batch_" + s_j + ".assembled.sfs";
ofstream outf(outpath);
map<string, vector<SFS>> SFSs = parse_sfsfile(inpath, tau);
// cout << SFSs.size() << "SFS in total." << endl ;
for (map<string, vector<SFS>>::iterator it = SFSs.begin(); it != SFSs.end();
++it) {
string ridx = it->first;
vector<SFS> sfs = it->second;
vector<SFS> assembled_sfs = assemble(sfs);
bool is_first = true;
for (const SFS &sfs : assembled_sfs) {
outf << (is_first ? ridx : "*") << "\t"
<< "\t" << sfs.s << "\t" << sfs.l << "\t" << sfs.c << endl;
is_first = false;
}
}
outf.close();
}
}

vector<SFS> Assembler::assemble(vector<SFS> &sfs) {
vector<SFS> assembled_sfs;
sort(sfs.begin(), sfs.end());
int i = 0;
while (i < sfs.size()) {
uint j;
for (j = i + 1; j < sfs.size(); ++j) {
if (sfs[j - 1].s + sfs[j - 1].l <= sfs[j].s) {
// non-overlapping
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
i = j;
break;
}
}
if (j == sfs.size()) {
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
i = j;
}
vector<SFS> assembled_sfs;
sort(sfs.begin(), sfs.end());
int i = 0;
while (i < sfs.size()) {
uint j;
for (j = i + 1; j < sfs.size(); ++j) {
if (sfs[j - 1].s + sfs[j - 1].l <= sfs[j].s) {
// non-overlapping
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
i = j;
break;
}
}
return assembled_sfs;
if (j == sfs.size()) {
uint l = sfs[j - 1].s + sfs[j - 1].l - sfs[i].s;
assembled_sfs.push_back(SFS(sfs[i].s, l, 1, sfs[i].isreversed));
i = j;
}
}
return assembled_sfs;
}
4 changes: 2 additions & 2 deletions assembler.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#ifndef ASSEMBLER_HPP
#define ASSEMBLER_HPP

#include <list>
#include <fstream>
#include <list>

#include "sfs.hpp"
#include "config.hpp"
#include "sfs.hpp"

using namespace std;

Expand Down
231 changes: 117 additions & 114 deletions bam.cpp
Original file line number Diff line number Diff line change
@@ -1,134 +1,137 @@
#include "bam.hpp"

using namespace std ;
using namespace std;

uint32_t cigar_len_mask = 0xFFFFFFF0 ;
uint32_t cigar_type_mask = 0xF ;
uint32_t cigar_len_mask = 0xFFFFFFF0;
uint32_t cigar_type_mask = 0xF;

string print_cigar_symbol(int type) {
if (type == BAM_CMATCH) {
return "M" ;
}
if (type == BAM_CINS) {
return "I" ;
}
if (type == BAM_CDEL) {
return "D" ;
}
if (type == BAM_CSOFT_CLIP) {
return "S" ;
}
if (type == BAM_CHARD_CLIP) {
return "H" ;
}
return "X" ;
if (type == BAM_CMATCH) {
return "M";
}
if (type == BAM_CINS) {
return "I";
}
if (type == BAM_CDEL) {
return "D";
}
if (type == BAM_CSOFT_CLIP) {
return "S";
}
if (type == BAM_CHARD_CLIP) {
return "H";
}
return "X";
}

vector<pair<uint32_t, uint32_t>> decode_cigar(bam1_t* read) {
// get CIGAR
vector<pair<uint32_t, uint32_t>> cigar_offsets ;
uint32_t* cigar = bam_get_cigar(read) ;
int offset = 0 ;
for (int i = 0; i < read->core.n_cigar; i++) {
uint32_t type = cigar[i] & cigar_type_mask ;
uint32_t length = cigar[i] >> 4 ;
cigar_offsets.push_back(make_pair(length, type)) ;
}
return cigar_offsets ;
vector<pair<uint32_t, uint32_t>> decode_cigar(bam1_t *read) {
// get CIGAR
vector<pair<uint32_t, uint32_t>> cigar_offsets;
uint32_t *cigar = bam_get_cigar(read);
int offset = 0;
for (int i = 0; i < read->core.n_cigar; i++) {
uint32_t type = cigar[i] & cigar_type_mask;
uint32_t length = cigar[i] >> 4;
cigar_offsets.push_back(make_pair(length, type));
}
return cigar_offsets;
}

uint8_t* encode_cigar(vector<pair<uint32_t, uint32_t>> cigar) {
uint32_t* cigar_bytes = (uint32_t*) malloc(sizeof(uint32_t) * cigar.size()) ;
for (int i = 0; i < cigar.size(); i++) {
cigar_bytes[i] = (cigar[i].first << 4) | (cigar[i].second & cigar_type_mask) ;
}
return (uint8_t*) cigar_bytes ;
uint8_t *encode_cigar(vector<pair<uint32_t, uint32_t>> cigar) {
uint32_t *cigar_bytes = (uint32_t *)malloc(sizeof(uint32_t) * cigar.size());
for (int i = 0; i < cigar.size(); i++) {
cigar_bytes[i] =
(cigar[i].first << 4) | (cigar[i].second & cigar_type_mask);
}
return (uint8_t *)cigar_bytes;
}

uint8_t* encode_bam_seq(char* seq) {
int n = (strlen(seq) + 1) >> 1 ;
int l_seq = strlen(seq) ;
uint8_t* seq_bytes = (uint8_t*) malloc(sizeof(uint8_t) * n) ;
int i = 0 ;
n = 0 ;
for (i = 0; i + 1 < l_seq; i += 2) {
seq_bytes[n] = (seq_nt16_table[(unsigned char)seq[i]] << 4) | seq_nt16_table[(unsigned char)seq[i + 1]];
n += 1 ;
}
for (; i < l_seq; i++) {
seq_bytes[n] = seq_nt16_table[(unsigned char)seq[i]] << 4;
n += 1 ;
}
return seq_bytes ;
uint8_t *encode_bam_seq(char *seq) {
int n = (strlen(seq) + 1) >> 1;
int l_seq = strlen(seq);
uint8_t *seq_bytes = (uint8_t *)malloc(sizeof(uint8_t) * n);
int i = 0;
n = 0;
for (i = 0; i + 1 < l_seq; i += 2) {
seq_bytes[n] = (seq_nt16_table[(unsigned char)seq[i]] << 4) |
seq_nt16_table[(unsigned char)seq[i + 1]];
n += 1;
}
for (; i < l_seq; i++) {
seq_bytes[n] = seq_nt16_table[(unsigned char)seq[i]] << 4;
n += 1;
}
return seq_bytes;
}

char reverse_complement_base(char base) {
if (base == 'C' || base == 'c') {
return 'G' ;
}
if (base == 'A' || base == 'a') {
return 'T' ;
}
if (base == 'G' || base == 'g') {
return 'C' ;
}
if (base == 'T' || base == 't') {
return 'A' ;
}
else {
return 'N' ;
}
if (base == 'C' || base == 'c') {
return 'G';
}
if (base == 'A' || base == 'a') {
return 'T';
}
if (base == 'G' || base == 'g') {
return 'C';
}
if (base == 'T' || base == 't') {
return 'A';
} else {
return 'N';
}
}

void reverse_complement_read(char* seq) {
int l = strlen(seq) ;
int i = 0 ;
while (i < l / 2) {
auto t = reverse_complement_base(seq[l - i]) ;
seq[l - 1 - i] = reverse_complement_base(seq[i]) ;
seq[i] = t ;
i += 1 ;
}
void reverse_complement_read(char *seq) {
int l = strlen(seq);
int i = 0;
while (i < l / 2) {
auto t = reverse_complement_base(seq[l - i]);
seq[l - 1 - i] = reverse_complement_base(seq[i]);
seq[i] = t;
i += 1;
}
}

vector<pair<int, int>> get_aligned_pairs(bam1_t *alignment) {
vector<pair<int, int>> result ;
uint ref_pos = alignment->core.pos ;
uint read_pos = 0 ;
auto cigar_offsets = decode_cigar(alignment) ;
int m = 0 ;
while (true) {
if (m == cigar_offsets.size()) {
break ;
}
if (cigar_offsets[m].second == BAM_CMATCH or cigar_offsets[m].second == BAM_CEQUAL or cigar_offsets[m].second == BAM_CDIFF) {
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
result.push_back(make_pair(read_pos, i));
read_pos++;
}
ref_pos += cigar_offsets[m].first;
} else if (cigar_offsets[m].second == BAM_CINS or cigar_offsets[m].second == BAM_CSOFT_CLIP) {
for (uint i = 0; i < cigar_offsets[m].first; ++i) {
result.push_back(make_pair(read_pos, -1));
read_pos++;
}
} else if (cigar_offsets[m].second == BAM_CDEL) {
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
result.push_back(make_pair(-1, i));
}
ref_pos += cigar_offsets[m].first;
} else if (cigar_offsets[m].second == BAM_CHARD_CLIP) {
// advances neither
} else if (cigar_offsets[m].second == BAM_CREF_SKIP) {
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
result.push_back(make_pair(-1, i));
}
ref_pos += cigar_offsets[m].first;
} else { //if (cigar_offsets[m].second == BAM_CPAD) {
//TODO
}
m++ ;
}
return result;
vector<pair<int, int>> result;
uint ref_pos = alignment->core.pos;
uint read_pos = 0;
auto cigar_offsets = decode_cigar(alignment);
int m = 0;
while (true) {
if (m == cigar_offsets.size()) {
break;
}
if (cigar_offsets[m].second == BAM_CMATCH or
cigar_offsets[m].second == BAM_CEQUAL or
cigar_offsets[m].second == BAM_CDIFF) {
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
result.push_back(make_pair(read_pos, i));
read_pos++;
}
ref_pos += cigar_offsets[m].first;
} else if (cigar_offsets[m].second == BAM_CINS or
cigar_offsets[m].second == BAM_CSOFT_CLIP) {
for (uint i = 0; i < cigar_offsets[m].first; ++i) {
result.push_back(make_pair(read_pos, -1));
read_pos++;
}
} else if (cigar_offsets[m].second == BAM_CDEL) {
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
result.push_back(make_pair(-1, i));
}
ref_pos += cigar_offsets[m].first;
} else if (cigar_offsets[m].second == BAM_CHARD_CLIP) {
// advances neither
} else if (cigar_offsets[m].second == BAM_CREF_SKIP) {
for (uint i = ref_pos; i < ref_pos + cigar_offsets[m].first; ++i) {
result.push_back(make_pair(-1, i));
}
ref_pos += cigar_offsets[m].first;
} else { // if (cigar_offsets[m].second == BAM_CPAD) {
// TODO
}
m++;
}
return result;
}

Loading

0 comments on commit 7e0a613

Please sign in to comment.