This repository was archived by the owner on Jan 17, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_stream_processor.py
120 lines (98 loc) · 2.95 KB
/
test_stream_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# coding: utf-8
"""
"""
import pytest
import stream_processor as sp
from stream_processor import Token as tk
TOKEN_EXAMPLES = (
(r'<', [tk.START_GARBAGE]),
(r'>', [tk.END_GARBAGE]),
(r'c', [tk.CHARACTER]),
(r'!c', [tk.ESCAPE, tk.CHARACTER]),
(r'{c', [tk.START_GROUP, tk.CHARACTER]),
(r'}', [tk.END_GROUP]),
(r',', [tk.SEPARATOR]),
)
ALL_GARBAGE = (
r'<>',
r'<random characters>',
r'<<<<>',
r'<{!>}>',
r'<!!>',
r'<!!!>>',
r'<{o"i!a,<{i<a>',
)
GROUPS = (
(r'{}', 1),
(r'{{{}}}', 3),
(r'{{},{}}', 3),
(r'{{{},{},{{}}}}', 6),
(r'{<{},{},{{}}>}', 1),
(r'{<a>,<a>,<a>,<a>}', 1),
(r'{{<a>},{<a>},{<a>},{<a>}}', 5),
(r'{{<!>},{<!>},{<!>},{<a>}}', 2)
)
GROUPS_SCORE = (
(r'{}', 1),
(r'{{{}}}', 6),
(r'{{},{}}', 5),
(r'{{{},{},{{}}}}', 16),
(r'{<a>,<a>,<a>,<a>}', 1),
(r'{{<ab>},{<ab>},{<ab>},{<ab>}}', 9),
(r'{{<!!>},{<!!>},{<!!>},{<!!>}}', 9),
(r'{{<a!>},{<a!>},{<a!>},{<ab>}}', 3)
)
GARBAGE_SCORE = {
(r'<>', 0),
(r'<random characters>', 17),
(r'<<<<>', 3),
(r'<{!>}>', 2),
(r'<!!>', 0),
(r'<!!!>>', 0),
(r'<{o"i!a,<{i<a>', 10)
}
@pytest.mark.parametrize("test_input,expected", TOKEN_EXAMPLES)
def test_parser(test_input, expected):
"""Test that we tokenize individual characters OK."""
tokens = list(sp.tokenize(test_input))
assert tokens == expected
@pytest.mark.parametrize("test_input", ALL_GARBAGE)
def test_all_garbage_naive(test_input):
"""Just verifies that there are barbage book-ends."""
tokens = list(sp.tokenize(test_input))
assert tokens[0] is tk.START_GARBAGE
assert tokens[-1] is tk.END_GARBAGE
def test_small_naive_token_stream():
"""Test a small stream tokenizes OK."""
tokens = list(sp.tokenize('{<abc>}'))
assert tokens == [
tk.START_GROUP,
tk.START_GARBAGE,
tk.CHARACTER,
tk.CHARACTER,
tk.CHARACTER,
tk.END_GARBAGE,
tk.END_GROUP
]
@pytest.mark.parametrize("test_input", ALL_GARBAGE)
def test_all_garbage(test_input):
"""Verifies that garbage is properly stripped out.
NOTE: garbage start and end tokens are still emitted.
"""
tokens = list(sp.strip_garbage_contents(sp.tokenize(test_input)))
assert tokens == [tk.START_GARBAGE, tk.END_GARBAGE]
@pytest.mark.parametrize("test_input,expected", GROUPS)
def test_count_groups(test_input, expected):
"""Tests that we can count groups"""
token_count = sp.count_groups(test_input)
assert token_count == expected
@pytest.mark.parametrize("test_input,expected", GROUPS_SCORE)
def test_score_groups(test_input, expected):
"""Tests that we can give scores to the groups"""
score = sp.score_groups(test_input)
assert score == expected
@pytest.mark.parametrize("test_input,expected", GARBAGE_SCORE)
def test_score_garbage(test_input, expected):
"""Tests that we can count the garbage"""
score = sp.score_garbage(test_input)
assert score == expected