-
Notifications
You must be signed in to change notification settings - Fork 295
/
read_aligner.py
executable file
·64 lines (48 loc) · 1.92 KB
/
read_aligner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#! /usr/bin/env python
"""
Error correct reads based on a counting hash from a diginorm step.
Output sequences will be put in @@@.
% python scripts/error-correct-pass2 <counting.kh> <data1> [ <data2> <...> ]
Use '-h' for parameter help.
"""
import sys
import screed
import os
import khmer
from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
from khmer.khmer_args import build_counting_args
from khmer.khmer_args import add_loadhash_args
###
DEFAULT_COVERAGE = 20
DEFAULT_MAX_ERROR_REGION = 40
def main():
parser = build_counting_args()
parser.add_argument("--trusted-cov", dest="trusted_cov", type=int, default=2)
parser.add_argument("--theta", type=float, default=1.0)
parser.add_argument("input_table")
parser.add_argument("input_filenames", nargs="+")
add_loadhash_args(parser)
args = parser.parse_args()
counting_ht = args.input_table
infiles = args.input_filenames
print >>sys.stderr, 'file with ht: %s' % counting_ht
print >>sys.stderr, 'loading hashtable'
ht = khmer.load_counting_hash(counting_ht)
K = ht.ksize()
aligner = khmer.new_readaligner(ht, args.trusted_cov, args.theta) # counting hash, trusted kmer coverage cutoff, bits theta (threshold value for terminating unproductive alignemnts)
### the filtering loop
for infile in infiles:
print >>sys.stderr, 'aligning', infile
for n, record in enumerate(screed.open(infile)):
name = record['name']
seq = record['sequence'].upper()
print >>sys.stderr, name
print >>sys.stderr, seq
score, graph_alignment, read_alignment, truncated = aligner.align(seq)
print >>sys.stderr, score
print >>sys.stderr, graph_alignment
print >>sys.stderr, read_alignment
print >>sys.stderr, truncated
print ">{0}\n{1}".format(name, graph_alignment)
if __name__ == '__main__':
main()