-
Notifications
You must be signed in to change notification settings - Fork 3
/
file_iterators.cpp
96 lines (78 loc) · 2.53 KB
/
file_iterators.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
//C headers
#include <stdint.h>
//C++ headers
#include <fstream>
#include <string>
#include <vector>
//my headers
#include "file_iterators.h"
using namespace std;
FastaIterator::FastaIterator(string fastFN) {
//open the file
this->fastFN = fastFN;
this->ifp.open(this->fastFN);
this->is_fq = fastFN.at(fastFN.length() - 1) == 'q';
//read the first line so we're in the correct state
this->isMoreData = (bool)getline(this->ifp, this->nextLine);
}
struct LongReadFA FastaIterator::getNextRead() {
struct LongReadFA ret;
ret.label = this->nextLine;
if(this->is_fq) {
// reads only sequence data from a FASTQ file, ignoring quality strings
ret.label.replace(0,1,">"); // change fastq to fasta format label line
// sequence
getline(this->ifp, ret.seq);
// qual header '+'
getline(this->ifp, this->nextLine);
// quality string
getline(this->ifp, this->nextLine);
// next label (if any)
this->isMoreData = (bool)getline(this->ifp, this->nextLine);
return ret;
}
vector<string> seqFrags = vector<string>();
uint64_t seqLen = 0;
while(getline(this->ifp, this->nextLine)) {
if(this->nextLine[0] == '>') {
//put the string together and return
ret.seq.resize(seqLen);
uint64_t currPos = 0;
for(uint64_t x = 0; x < seqFrags.size(); x++) {
ret.seq.replace(currPos, seqFrags[x].size(), seqFrags[x]);
currPos += seqFrags[x].size();
}
return ret;
}
else {
//push back a fragments
seqFrags.push_back(this->nextLine);
seqLen += this->nextLine.size();
}
}
//we hit the last line
this->isMoreData = false;
//put the string together and return
ret.seq.resize(seqLen);
uint64_t currPos = 0;
for(uint64_t x = 0; x < seqFrags.size(); x++) {
ret.seq.replace(currPos, seqFrags[x].size(), seqFrags[x]);
currPos += seqFrags[x].size();
}
return ret;
}
FastaWriter::FastaWriter(string fastaFN, int symsPerLine) {
this->fastaFN = fastaFN;
this->symsPerLine = symsPerLine;
this->ofp.open(this->fastaFN);
}
FastaWriter::~FastaWriter() {
this->ofp.close();
}
bool FastaWriter::writeRead(LongReadFA r) {
this->ofp << r.label << "\n";
for(uint64_t x = 0; x < r.seq.size(); x += this->symsPerLine) {
this->ofp << r.seq.substr(x, this->symsPerLine) << "\n";
}
return true;
}