forked from WojciechMula/pyahocorasick
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark3.py
117 lines (76 loc) · 2.54 KB
/
benchmark3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from time import clock
from random import choice, randint, seed
from sys import stdout
import ahocorasick
def write(str):
stdout.write(str)
stdout.flush()
def writeln(str):
stdout.write(str)
stdout.write('\n')
class ElapsedTime:
def __init__(self, msg):
self.msg = msg
def __enter__(self):
write("%-40s: " % self.msg)
self.start = clock()
def __exit__(self, a1, a2, a3):
self.stop = clock()
writeln("%0.3f s" % self.get_time())
def get_time(self):
return self.stop - self.start
class Test:
def __init__(self, max_word_length, count):
self.min_word_length = 3
self.max_word_length = max_word_length
self.count = count
self.words = []
self.inexisting = []
self.input = ""
self.automaton = None
seed(0) # make sure that tests will be repeatable
def init_data(self):
def random_word(length):
chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
return ''.join(choice(chars) for _ in range(length))
for i in range(self.count):
length = randint(self.min_word_length, self.max_word_length)
self.words.append(random_word(length))
length = randint(self.min_word_length, self.max_word_length)
self.inexisting.append(random_word(length))
self.input = random_word(self.count)
assert(len(self.words) == len(self.inexisting))
def add_words(self):
self.automaton = ahocorasick.Automaton()
A = self.automaton
for word in self.words:
A.add_word(word, word)
def build(self):
self.automaton.make_automaton()
def lookup(self):
n = len(self.words)
A = self.automaton
for i in range(n):
A.get(self.words[i])
A.get(self.inexisting[i], "unknown")
def search(self):
A = self.automaton
n = 0
for item in A.iter(self.input):
n += 1
def run(self):
with ElapsedTime("Generating data (%d words)" % self.count):
self.init_data()
with ElapsedTime("Add words"):
self.add_words()
with ElapsedTime("Building automaton"):
self.build()
with ElapsedTime("Look up"):
self.lookup()
with ElapsedTime("Search"):
self.search()
def main():
test = Test(32, 1000000)
test.run()
if __name__ == '__main__':
main()