forked from bootphon/phonemizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_espeak.py
225 lines (179 loc) · 7.22 KB
/
test_espeak.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Test of the espeak backend"""
# pylint: disable=missing-docstring
# pylint: disable=redefined-outer-name
import os
import shutil
import pytest
from phonemizer.backend import EspeakBackend
from phonemizer.backend.espeak.wrapper import EspeakWrapper
from phonemizer.separator import Separator, default_separator
def test_bad_text():
backend = EspeakBackend('en-us')
text = 'hello world'
with pytest.raises(RuntimeError) as err:
backend.phonemize(text, default_separator, True)
assert 'input text to phonemize() is str' in str(err)
assert backend.phonemize(
[text], default_separator, True) == ['həloʊ wɜːld']
def test_english():
backend = EspeakBackend('en-us')
text = ['hello world', 'goodbye', 'third line', 'yet another']
out = backend.phonemize(text, default_separator, True)
assert out == ['həloʊ wɜːld', 'ɡʊdbaɪ', 'θɜːd laɪn', 'jɛt ɐnʌðɚ']
def test_stress():
backend = EspeakBackend('en-us', with_stress=False)
assert backend.phonemize(
['hello world'], default_separator, True) == ['həloʊ wɜːld']
backend = EspeakBackend('en-us', with_stress=True)
assert backend.phonemize(
['hello world'], default_separator, True) == ['həlˈoʊ wˈɜːld']
def test_french():
backend = EspeakBackend('fr-fr')
text = ['bonjour le monde']
sep = Separator(word=';eword ', syllable=None, phone=' ')
expected = ['b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword ']
out = backend.phonemize(text, sep, False)
assert out == expected
@pytest.mark.skipif(
(
not EspeakBackend.is_espeak_ng() or
# Arabic is not supported by the Windows msi installer from espeak-ng
# github release
not EspeakBackend.is_supported_language('ar')),
reason='Arabic is not supported')
def test_arabic():
backend = EspeakBackend('ar')
text = ['السلام عليكم']
sep = Separator()
# Arabic seems to have changed starting at espeak-ng-1.49.3
if EspeakBackend.version() >= (1, 49, 3):
expected = ['ʔassalaːm ʕliːkm ']
else:
expected = ['ʔassalaam ʕaliijkum ']
out = backend.phonemize(text, sep, False)
assert out == expected
# see https://github.com/bootphon/phonemizer/issues/31
def test_phone_separator_simple():
text = ['The lion and the tiger ran']
sep = Separator(phone='_')
backend = EspeakBackend('en-us')
output = backend.phonemize(text, separator=sep, strip=True)
expected = ['ð_ə l_aɪə_n æ_n_d ð_ə t_aɪ_ɡ_ɚ ɹ_æ_n']
assert expected == output
output = backend.phonemize(text, separator=sep, strip=False)
expected = ['ð_ə_ l_aɪə_n_ æ_n_d_ ð_ə_ t_aɪ_ɡ_ɚ_ ɹ_æ_n_ ']
assert expected == output
@pytest.mark.parametrize(
'text, expected',
(('the hello but the', 'ð_ə h_ə_l_oʊ b_ʌ_t ð_ə'),
# ('Here there and everywhere', 'h_ɪɹ ð_ɛɹ æ_n_d ɛ_v_ɹ_ɪ_w_ɛɹ'),
# ('He was hungry and tired.', 'h_iː w_ʌ_z h_ʌ_ŋ_ɡ_ɹ_i æ_n_d t_aɪɚ_d'),
('He was hungry but tired.', 'h_iː w_ʌ_z h_ʌ_ŋ_ɡ_ɹ_i b_ʌ_t t_aɪɚ_d')))
def test_phone_separator(text, expected):
sep = Separator(phone='_')
backend = EspeakBackend('en-us')
output = backend.phonemize([text], separator=sep, strip=True)[0]
assert output == expected
@pytest.mark.skipif(
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
reason='cannot modify environment')
def test_path_good():
espeak = EspeakBackend.library()
try:
EspeakBackend.set_library(None)
assert espeak == EspeakBackend.library()
library = EspeakWrapper().library_path
EspeakBackend.set_library(library)
test_english()
# restore the espeak path to default
finally:
EspeakBackend.set_library(None)
@pytest.mark.skipif(
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
reason='cannot modify environment')
def test_path_bad():
try:
# corrupt the default espeak path, try to use python executable instead
binary = shutil.which('python')
EspeakBackend.set_library(binary)
with pytest.raises(RuntimeError):
EspeakBackend('en-us')
with pytest.raises(RuntimeError):
EspeakBackend.version()
EspeakBackend.set_library(__file__)
with pytest.raises(RuntimeError):
EspeakBackend('en-us')
# restore the espeak path to default
finally:
EspeakBackend.set_library(None)
@pytest.mark.skipif(
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
reason='cannot modify environment')
def test_path_venv():
try:
os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = (
shutil.which('python'))
with pytest.raises(RuntimeError):
EspeakBackend('en-us').phonemize(['hello'])
with pytest.raises(RuntimeError):
EspeakBackend.version()
os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = __file__
with pytest.raises(RuntimeError):
EspeakBackend.version()
finally:
try:
del os.environ['PHONEMIZER_ESPEAK_LIBRARY']
except KeyError:
pass
@pytest.mark.skipif(
not EspeakBackend.is_espeak_ng(),
reason='tie only compatible with espeak-ng')
@pytest.mark.parametrize(
'tie, expected', [
(False, 'dʒ_æ_k_i_ tʃ_æ_n_ '),
(True, 'd͡ʒæki t͡ʃæn '),
('8', 'd8ʒæki t8ʃæn ')])
def test_tie_simple(caplog, tie, expected):
backend = EspeakBackend('en-us', tie=tie)
assert backend.phonemize(
['Jackie Chan'],
separator=Separator(word=' ', phone='_'))[0] == expected
if tie:
messages = [msg[2] for msg in caplog.record_tuples]
assert (
'cannot use ties AND phone separation, ignoring phone separator'
in messages)
@pytest.mark.skipif(
not EspeakBackend.is_espeak_ng(),
reason='tie only compatible with espeak-ng')
def test_tie_utf8():
# NOTE this is a bug in espeak to append ties on (en) language switch
# flags. For now phonemizer does not fix it.
backend = EspeakBackend('fr-fr', tie=True)
# used to be 'bɔ̃͡ʒuʁ '
assert backend.phonemize(['bonjour']) == ['bɔ̃ʒuʁ ']
# used to be 'ty ɛm lə (͡e͡n͡)fʊtbɔ͡ːl(͡f͡r͡)'
assert backend.phonemize(
['tu aimes le football']) == ['ty ɛm lə (͡e͡n)fʊtbɔːl(͡f͡r) ']
assert backend.phonemize(
['bonjour apple']) == ['bɔ̃ʒuʁ (͡e͡n)apə͡l(͡f͡r) ']
@pytest.mark.skipif(
not EspeakBackend.is_espeak_ng(),
reason='tie only compatible with espeak-ng')
def test_tie_bad():
with pytest.raises(RuntimeError):
EspeakBackend('en-us', tie='abc')