-
Notifications
You must be signed in to change notification settings - Fork 30
/
tests.py
35 lines (28 loc) · 1.05 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pytest
from evaluate import string_shingle_matching, _ngrams, _tokenize
def test_tokenize():
assert _tokenize('a b,cd:e(foo,bar) ') == \
['a', 'b', 'cd', 'e', 'foo', 'bar']
@pytest.mark.parametrize(
['text', 'n', 'expected'],
[('!', 4, []),
('a,b c ', 5, [('a', 'b', 'c')]),
('aa 11 c 22', 3, [('aa', '11', 'c'), ('11', 'c', '22')]),
('a b c a b c', 3, [('a', 'b', 'c'), ('b', 'c', 'a'),
('c', 'a', 'b'), ('a', 'b', 'c')]),
])
def test_ngrams(text, n, expected):
assert _ngrams(text, n) == expected
@pytest.mark.parametrize(
['true', 'pred', 'tp_fp_fn'],
[('a b c', 'a b c', (1, 0, 0)),
('a b c d', 'a b c', (0.5, 0, 0.5)),
('a b c', 'a b c d', (0.5, 0.5, 0)),
('', '', (0, 0, 0)),
('a', '', (0, 0, 1)),
('', 'a', (0, 1, 0)),
('a b c a b c', 'a b c', (0.25, 0, 0.75)),
('a b c', 'a b c a b c', (0.25, 0.75, 0)),
])
def test_string_shingle_matching(true, pred, tp_fp_fn):
assert string_shingle_matching(true, pred, ngram_n=3) == tp_fp_fn