-
Notifications
You must be signed in to change notification settings - Fork 271
/
test_langscripts.py
57 lines (45 loc) · 1.63 KB
/
test_langscripts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from __future__ import annotations
import pytest
from attr import dataclass
from mteb.languages import LanguageScripts
@dataclass
class LangScriptTestCase:
args: dict
contains_language: list[str]
not_contains_language: list[str]
contains_script: list[str]
not_contains_script: list[str]
test_cases = [
LangScriptTestCase(
args={"languages": ["fra"], "scripts": None},
contains_language=["fra", "fra-Latn"],
not_contains_language=["eng"],
contains_script=[],
not_contains_script=["Latn"],
),
LangScriptTestCase(
args={"languages": ["fra", "eng"], "scripts": ["Latn"]},
contains_language=["fra", "fra-Latn", "eng", "eng-Latn"],
not_contains_language=["deu"],
contains_script=["Latn"],
not_contains_script=["Cyrl"],
),
LangScriptTestCase(
args={"languages": ["fra-Latn"]},
contains_language=["fra", "fra-Latn"],
not_contains_language=["eng", "eng-Latn"],
contains_script=["Latn"],
not_contains_script=["Cyrl"],
),
]
@pytest.mark.parametrize("test_case", test_cases)
def test_langscripts(test_case: LangScriptTestCase):
langscripts = LanguageScripts.from_languages_and_scripts(**test_case.args)
for lang in test_case.contains_language:
assert langscripts.contains_language(lang)
for lang in test_case.not_contains_language:
assert not langscripts.contains_language(lang)
for script in test_case.contains_script:
assert langscripts.contains_script(script)
for script in test_case.not_contains_script:
assert not langscripts.contains_script(script)