Skip to content

Commit

Permalink
Merge pull request #17 from idiap/espeak-stderr
Browse files Browse the repository at this point in the history
fix(espeak_wrapper): capture stderr separately
  • Loading branch information
eginhard authored May 7, 2024
2 parents 0630450 + 98e21d0 commit 63bfb9f
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 43 deletions.
89 changes: 47 additions & 42 deletions TTS/tts/utils/text/phonemizers/espeak_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Wrapper to call the espeak/espeak-ng phonemizer."""

import logging
import re
import subprocess
from typing import Dict, List
from typing import Optional

from packaging.version import Version

Expand All @@ -11,7 +13,7 @@
logger = logging.getLogger(__name__)


def is_tool(name):
def _is_tool(name) -> bool:
from shutil import which

return which(name) is not None
Expand All @@ -22,31 +24,33 @@ def is_tool(name):
espeak_version_pattern = re.compile(r"text-to-speech:\s(?P<version>\d+\.\d+(\.\d+)?)")


def get_espeak_version():
def get_espeak_version() -> str:
"""Return version of the `espeak` binary."""
output = subprocess.getoutput("espeak --version")
match = espeak_version_pattern.search(output)

return match.group("version")


def get_espeakng_version():
def get_espeakng_version() -> str:
"""Return version of the `espeak-ng` binary."""
output = subprocess.getoutput("espeak-ng --version")
return output.split()[3]


# priority: espeakng > espeak
if is_tool("espeak-ng"):
if _is_tool("espeak-ng"):
_DEF_ESPEAK_LIB = "espeak-ng"
_DEF_ESPEAK_VER = get_espeakng_version()
elif is_tool("espeak"):
elif _is_tool("espeak"):
_DEF_ESPEAK_LIB = "espeak"
_DEF_ESPEAK_VER = get_espeak_version()
else:
_DEF_ESPEAK_LIB = None
_DEF_ESPEAK_VER = None


def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]:
"""Run espeak with the given arguments."""
cmd = [
espeak_lib,
Expand All @@ -60,19 +64,20 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
) as p:
res = iter(p.stdout.readline, b"")
err = iter(p.stderr.readline, b"")
for line in err:
logger.warning("espeakng: %s", line.decode("utf-8").strip())
if not sync:
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
return res
res2 = []
for line in res:
res2.append(line)
res2 = list(res)
p.stdout.close()
if p.stderr:
p.stderr.close()
Expand All @@ -83,7 +88,7 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:


class ESpeak(BasePhonemizer):
"""ESpeak wrapper calling `espeak` or `espeak-ng` from the command-line the perform G2P
"""Wrapper calling `espeak` or `espeak-ng` from the command-line to perform G2P.
Args:
language (str):
Expand All @@ -108,13 +113,17 @@ class ESpeak(BasePhonemizer):
"""

_ESPEAK_LIB = _DEF_ESPEAK_LIB
_ESPEAK_VER = _DEF_ESPEAK_VER

def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
if self._ESPEAK_LIB is None:
raise Exception(" [!] No espeak backend found. Install espeak-ng or espeak to your system.")
self.backend = self._ESPEAK_LIB
def __init__(
self,
language: str,
backend: Optional[str] = None,
punctuations: str = Punctuation.default_puncs(),
keep_puncs: bool = True,
):
if _DEF_ESPEAK_LIB is None:
msg = "[!] No espeak backend found. Install espeak-ng or espeak to your system."
raise FileNotFoundError(msg)
self.backend = _DEF_ESPEAK_LIB

# band-aid for backwards compatibility
if language == "en":
Expand All @@ -127,35 +136,37 @@ def __init__(self, language: str, backend=None, punctuations=Punctuation.default
self.backend = backend

@property
def backend(self):
def backend(self) -> str:
return self._ESPEAK_LIB

@property
def backend_version(self):
def backend_version(self) -> str:
return self._ESPEAK_VER

@backend.setter
def backend(self, backend):
def backend(self, backend: str) -> None:
if backend not in ["espeak", "espeak-ng"]:
raise Exception("Unknown backend: %s" % backend)
msg = f"Unknown backend: {backend}"
raise ValueError(msg)
self._ESPEAK_LIB = backend
self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version()

def auto_set_espeak_lib(self) -> None:
if is_tool("espeak-ng"):
if _is_tool("espeak-ng"):
self._ESPEAK_LIB = "espeak-ng"
self._ESPEAK_VER = get_espeakng_version()
elif is_tool("espeak"):
elif _is_tool("espeak"):
self._ESPEAK_LIB = "espeak"
self._ESPEAK_VER = get_espeak_version()
else:
raise Exception("Cannot set backend automatically. espeak-ng or espeak not found")
msg = "Cannot set backend automatically. espeak-ng or espeak not found"
raise FileNotFoundError(msg)

@staticmethod
def name():
def name() -> str:
return "espeak"

def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str:
def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False) -> str:
"""Convert input text to phonemes.
Args:
Expand Down Expand Up @@ -190,7 +201,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str:
args.append(text)
# compute phonemes
phonemes = ""
for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
for line in _espeak_exe(self.backend, args, sync=True):
logger.debug("line: %s", repr(line))
ph_decoded = line.decode("utf8").strip()
# espeak:
Expand All @@ -207,11 +218,11 @@ def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str:
phonemes += ph_decoded.strip()
return phonemes.replace("_", separator)

def _phonemize(self, text, separator=None):
def _phonemize(self, text: str, separator: str = "") -> str:
return self.phonemize_espeak(text, separator, tie=False)

@staticmethod
def supported_languages() -> Dict:
def supported_languages() -> dict[str, str]:
"""Get a dictionary of supported languages.
Returns:
Expand All @@ -221,16 +232,14 @@ def supported_languages() -> Dict:
return {}
args = ["--voices"]
langs = {}
count = 0
for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True):
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)):
line = line.decode("utf8").strip()
if count > 0:
cols = line.split()
lang_code = cols[1]
lang_name = cols[3]
langs[lang_code] = lang_name
logger.debug("line: %s", repr(line))
count += 1
return langs

def version(self) -> str:
Expand All @@ -239,16 +248,12 @@ def version(self) -> str:
Returns:
str: Version of the used backend.
"""
args = ["--version"]
for line in _espeak_exe(self.backend, args, sync=True):
version = line.decode("utf8").strip().split()[2]
logger.debug("line: %s", repr(line))
return version
return self.backend_version

@classmethod
def is_available(cls):
"""Return true if ESpeak is available else false"""
return is_tool("espeak") or is_tool("espeak-ng")
def is_available(cls) -> bool:
"""Return true if ESpeak is available else false."""
return _is_tool("espeak") or _is_tool("espeak-ng")


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ requires = [
]

[tool.ruff]
target-version = "py39"
line-length = 120
lint.extend-select = [
"B033", # duplicate-value
Expand Down
2 changes: 1 addition & 1 deletion tests/tts_tests/test_losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_in_out(self): # pylint: disable=no-self-use
late_x = -200.0 * sequence_mask(length + 1, 100).float() + 100.0 # simulate logits on late stopping

loss = layer(true_x, target, length)
self.assertEqual(loss.item(), 0.0)
self.assertAlmostEqual(loss.item(), 0.0)

loss = layer(early_x, target, length)
self.assertAlmostEqual(loss.item(), 2.1053, places=4)
Expand Down

0 comments on commit 63bfb9f

Please sign in to comment.