-
Notifications
You must be signed in to change notification settings - Fork 295
/
Copy pathtest_filter.py
73 lines (51 loc) · 1.98 KB
/
test_filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#
# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
# Copyright (C) Michigan State University, 2009-2013. It is licensed under
# the three-clause BSD license; see doc/LICENSE.txt. Contact: ctb@msu.edu
#
import khmer
from screed.fasta import fasta_iter
from nose.plugins.attrib import attr
import khmer_tst_utils as utils
def teardown():
utils.cleanup()
def load_fa_seq_names(filename):
fp = open(filename)
records = list(fasta_iter(fp))
names = [r['name'] for r in records]
return names
class Test_Filter(object):
@attr('highmem')
def test_abund(self):
ht = khmer.new_hashtable(10, 4 ** 10)
filename = utils.get_test_data('test-abund-read.fa')
outname = utils.get_temp_filename('test_abund.out')
ht.consume_fasta(filename)
ht.output_fasta_kmer_pos_freq(filename, outname)
fd = open(outname, "r")
output = fd.readlines()
assert len(output) == 1
output = output[0]
output = output.strip().split()
assert ['1'] * (114 - 10 + 1) == output
fd.close()
@attr('highmem')
def test_filter_sodd():
K = 32
HASHTABLE_SIZE = int(8e7)
N_HT = 4
MAX_SODD = 3
ht = khmer.new_hashbits(K, HASHTABLE_SIZE, N_HT)
filename = utils.get_test_data('../../data/high-sodd.fa')
ht.consume_fasta(filename)
seq = "CGTTAGTTGCGGTGCCGACCGGCAAACTTGGTTTTGCCAAAAATTTTTACAGTTAGAAATTATTC" \
"ACAAAGTTGCACCGGAATTCGGTTACAAACGTCATTCTAACTAAT"
trim_seq, trim_at = ht.trim_on_sodd(seq, MAX_SODD)
assert trim_seq == "CGTTAGTTGCGGTGCCGACCGGCAAACTTGGT"
seq = "ACAAAATTCCACATATAGTCATAATTGTGGGCAATTTTCGTCCCAAATTAGTTAGAATGACGTTT" \
"GTAACCGAATTCCGGTGCAACTTTGTGAATAATTTCTAACTGTAAAAAT"
trim_seq, trim_at = ht.trim_on_sodd(seq, MAX_SODD)
assert trim_seq == "ACAAAATTCCACATATAGTCATAATTGTGGGCAATT"
seq = "GCACGCAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG"
trim_seq, trim_at = ht.trim_on_sodd(seq, MAX_SODD)
assert trim_seq == seq