-
Notifications
You must be signed in to change notification settings - Fork 295
/
degree-by-position.py
executable file
·47 lines (36 loc) · 1.09 KB
/
degree-by-position.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#! /usr/bin/env python2
#
# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
# Copyright (C) Michigan State University, 2009-2013. It is licensed under
# the three-clause BSD license; see doc/LICENSE.txt.
# Contact: khmer-project@idyll.org
#
import khmer
import sys
from screed.fasta import fasta_iter
K = 32
HASHTABLE_SIZE = int(8e9)
N_HT = 4
def main():
outfp = open(sys.argv[2], 'w')
ht = khmer.new_hashbits(K, HASHTABLE_SIZE, N_HT)
ht.consume_fasta(sys.argv[1])
hist = [0] * 200
histcount = [0] * 200
for n, record in enumerate(fasta_iter(open(sys.argv[1]))):
if n % 10000 == 0:
print '...', n
seq = record['sequence']
for pos in range(0, len(seq) - K + 1):
kmer = seq[pos:pos + K]
count = ht.kmer_degree(kmer)
hist[pos] += count
histcount[pos] += 1
for i in range(len(hist)):
total = hist[i]
count = histcount[i]
if not count:
continue
print >>outfp, i, total, count, total / float(count)
if __name__ == '__main__':
main()