sync/tts_cache

port the new TTS cache from mycroft-core
OpenVoiceOS · Nov 17, 2021 · 77439f1 · 77439f1
1 parent 7c2250d
commit 77439f1
Show file tree

Hide file tree

Showing 5 changed files with 340 additions and 10 deletions.
diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py
@@ -22,28 +22,31 @@
         engine.playback.stop()
 """
 import hashlib
+import os
 import os.path
 import random
 import re
+import subprocess
+from inspect import signature
 from os.path import isfile, join
 from queue import Queue, Empty
 from threading import Thread
 from time import time, sleep
-import subprocess
-import os
-from inspect import signature
+
+from phoneme_guesser.exceptions import FailedToGuessPhonemes
 
 from ovos_utils import resolve_resource_file
+from ovos_utils.configuration import read_mycroft_config
 from ovos_utils.enclosure.api import EnclosureAPI
 from ovos_utils.lang.phonemes import get_phonemes
-from phoneme_guesser.exceptions import FailedToGuessPhonemes
 from ovos_utils.lang.visimes import VISIMES
 from ovos_utils.log import LOG
 from ovos_utils.messagebus import Message, FakeBus as BUS
+from ovos_utils.metrics import Stopwatch
 from ovos_utils.signal import check_for_signal, create_signal
 from ovos_utils.sound import play_mp3, play_wav
-from ovos_utils.metrics import Stopwatch
-from ovos_utils.configuration import read_mycroft_config
+import requests
+
 
 EMPTY_PLAYBACK_QUEUE_TUPLE = (None, None, None, None, None)
 
@@ -60,6 +63,7 @@ def get_cache_directory(folder):
             os.makedirs(path)
         return path
 
+
 class PlaybackThread(Thread):
     """Thread class for playing back tts audio and sending
     viseme data to enclosure.
@@ -628,3 +632,37 @@ def get_tts(self, sentence, wav_file, lang=None):
         files, phonemes = self.sentence_to_files(sentence)
         wav_file = self.concat(files, wav_file)
         return wav_file, phonemes
+
+
+class RemoteTTSException(Exception):
+    pass
+
+
+class RemoteTTSTimeoutException(RemoteTTSException):
+    pass
+
+
+class RemoteTTS(TTS):
+    """
+    Abstract class for a Remote TTS engine implementation.
+    This class is only provided as import for mycroft plugins that do not use OPM
+    Usage is discouraged
+    """
+    def __init__(self, lang, config, url, api_path, validator):
+        super(RemoteTTS, self).__init__(lang, config, validator)
+        self.api_path = api_path
+        self.auth = None
+        self.url = config.get('url', url).rstrip('/')
+
+    def build_request_params(self, sentence):
+        pass
+
+    def get_tts(self, sentence, wav_file, lang=None):
+        r = requests.get(
+            self.url + self.api_path, params=self.build_request_params(sentence),
+            timeout=10, verify=False, auth=self.auth)
+        if r.status_code != 200:
+            return None
+        with open(wav_file, 'wb') as f:
+            f.write(r.content)
+        return wav_file, None
diff --git a/ovos_plugin_manager/utils.py → ovos_plugin_manager/utils/__init__.py b/ovos_plugin_manager/utils.py → ovos_plugin_manager/utils/__init__.py
diff --git a/ovos_plugin_manager/utils/tts_cache.py b/ovos_plugin_manager/utils/tts_cache.py
@@ -0,0 +1,293 @@
+import hashlib
+import json
+import os
+import shutil
+from pathlib import Path
+from stat import S_ISREG, ST_MTIME, ST_MODE, ST_SIZE
+
+from ovos_utils.file_utils import get_cache_directory
+from ovos_utils.log import LOG
+
+
+def hash_sentence(sentence: str):
+    """Convert the sentence into a hash value used for the file name
+
+    Args:
+        sentence: The sentence to be cached
+    """
+    encoded_sentence = sentence.encode("utf-8", "ignore")
+    sentence_hash = hashlib.md5(encoded_sentence).hexdigest()
+    return sentence_hash
+
+
+def hash_from_path(path: Path) -> str:
+    """Returns hash from a given path.
+
+    Simply removes extension and folder structure leaving the hash.
+
+    NOTE: this does not do any hashing at all and naming is misleading
+          however we keep the method around for backwards compat imports
+          this is exclusively for usage with cached TTS files
+
+    Args:
+        path: path to get hash from
+
+    Returns:
+        Hash reference for file.
+    """
+    # NOTE: this does not do any hashing at all and naming is misleading
+    # however we keep the method around for backwards compat imports
+    # this is assumed to be used only to load cached TTS which is already named with an hash
+    return path.with_suffix('').name
+
+
+def mb_to_bytes(size):
+    """Takes a size in MB and returns the number of bytes.
+
+    Args:
+        size(int/float): size in Mega Bytes
+
+    Returns:
+        (int/float) size in bytes
+    """
+    return size * 1024 * 1024
+
+
+def _get_cache_entries(directory):
+    """Get information tuple for all regular files in directory.
+
+    Args:
+        directory (str): path to directory to check
+
+    Returns:
+        (tuple) (modification time, size, filepath)
+    """
+    entries = (os.path.join(directory, fn) for fn in os.listdir(directory))
+    entries = ((os.stat(path), path) for path in entries)
+
+    # leave only regular files, insert modification date
+    return ((stat[ST_MTIME], stat[ST_SIZE], path)
+            for stat, path in entries if S_ISREG(stat[ST_MODE]))
+
+
+def _delete_oldest(entries, bytes_needed):
+    """Delete files with oldest modification date until space is freed.
+
+    Args:
+        entries (tuple): file + file stats tuple
+        bytes_needed (int): disk space that needs to be freed
+
+    Returns:
+        (list) all removed paths
+    """
+    deleted_files = []
+    space_freed = 0
+    for moddate, fsize, path in sorted(entries):
+        try:
+            os.remove(path)
+            space_freed += fsize
+            deleted_files.append(path)
+        except Exception:
+            pass
+
+        if space_freed > bytes_needed:
+            break  # deleted enough!
+
+    return deleted_files
+
+
+def curate_cache(directory, min_free_percent=5.0, min_free_disk=50):
+    """Clear out the directory if needed.
+
+    The curation will only occur if both the precentage and actual disk space
+    is below the limit. This assumes all the files in the directory can be
+    deleted as freely.
+
+    Args:
+        directory (str): directory path that holds cached files
+        min_free_percent (float): percentage (0.0-100.0) of drive to keep free,
+                                  default is 5% if not specified.
+        min_free_disk (float): minimum allowed disk space in MB, default
+                               value is 50 MB if not specified.
+    """
+    # Simpleminded implementation -- keep a certain percentage of the
+    # disk available.
+    # TODO: Would be easy to add more options, like whitelisted files, etc.
+    deleted_files = []
+
+    # Get the disk usage statistics bout the given path
+    space = shutil.disk_usage(directory)
+
+    percent_free = space.free * 100 / space.total
+
+    min_free_disk = mb_to_bytes(min_free_disk)
+
+    if percent_free < min_free_percent and space.free < min_free_disk:
+        LOG.info('Low diskspace detected, cleaning cache')
+        # calculate how many bytes we need to delete
+        bytes_needed = (min_free_percent - percent_free) / 100.0 * space.total
+        bytes_needed = int(bytes_needed + 1.0)
+
+        # get all entries in the directory w/ stats
+        entries = _get_cache_entries(directory)
+        # delete as many as needed starting with the oldest
+        deleted_files = _delete_oldest(entries, bytes_needed)
+
+    return deleted_files
+
+
+class AudioFile:
+    def __init__(self, cache_dir: Path, sentence_hash: str, file_type: str):
+        self.name = f"{sentence_hash}.{file_type}"
+        self.path = cache_dir.joinpath(self.name)
+
+    def save(self, audio: bytes):
+        """Write a TTS cache file containing the audio to be spoken.
+        Args:
+            audio: TTS inference of a sentence
+        """
+        try:
+            with open(self.path, "wb") as audio_file:
+                audio_file.write(audio)
+        except Exception:
+            LOG.exception("Failed to write {} to cache".format(self.name))
+
+    def exists(self):
+        return self.path.exists()
+
+
+class PhonemeFile:
+    def __init__(self, cache_dir: Path, sentence_hash: str):
+        self.name = f"{sentence_hash}.pho"
+        self.path = cache_dir.joinpath(self.name)
+
+    def load(self):
+        """Load phonemes from cache file."""
+        phonemes = None
+        if self.path.exists():
+            try:
+                with open(self.path) as phoneme_file:
+                    phonemes = phoneme_file.read().strip()
+            except Exception:
+                LOG.exception("Failed to read phoneme from cache")
+
+        return json.loads(phonemes)
+
+    def save(self, phonemes):
+        """Write a TTS cache file containing the phoneme to be displayed.
+        Args:
+            phonemes: instructions for how to make the mouth on a device move
+        """
+        try:
+            rec = json.dumps(phonemes)
+            with open(self.path, "w") as phoneme_file:
+                phoneme_file.write(rec)
+        except Exception:
+            LOG.error(f"Failed to write {self.name} to cache")
+
+    def exists(self):
+        return self.path.exists()
+
+
+class TextToSpeechCache:
+    """Class for all persistent and temporary caching operations."""
+
+    def __init__(self, tts_config, tts_name, audio_file_type):
+        self.config = tts_config
+        self.tts_name = tts_name
+        if "preloaded_cache" in self.config:
+            self.persistent_cache_dir = Path(self.config["preloaded_cache"])
+            os.makedirs(str(self.persistent_cache_dir), exist_ok=True)
+        else:
+            self.persistent_cache_dir = None
+        self.temporary_cache_dir = Path(
+            get_cache_directory("tts/" + tts_name)
+        )
+        os.makedirs(str(self.temporary_cache_dir), exist_ok=True)
+        self.audio_file_type = audio_file_type
+        self.cached_sentences = {}
+        # curate cache if disk usage is above min %
+        self.min_free_percent = self.config.get("min_free_percent", 75)
+
+    def __contains__(self, sha):
+        """The cache contains a SHA if it knows of it and it exists on disk."""
+        if sha not in self.cached_sentences:
+            return False  # Doesn't know of it
+        else:
+            # Audio file must exist, phonemes are optional.
+            audio, phonemes = self.cached_sentences[sha]
+            return (audio.exists() and
+                    (phonemes is None or phonemes.exists()))
+
+    def load_persistent_cache(self):
+        """Load the contents of dialog files to the persistent cache directory.
+        Parse the dialog files in the resource directory into sentences.  Then
+        add the audio for each sentence to the cache directory.
+        NOTE: There may be files pre-loaded in the persistent cache directory
+        prior to run time, such as pre-recorded audio files.  This will add
+        files that do not already exist.
+        ANOTHER NOTE:  Mimic2 is the only TTS engine that supports
+        downloading missing files. This logic will need to change if another
+        TTS engine implements it.
+        """
+        if self.persistent_cache_dir is not None:
+            LOG.info("Adding dialog resources to persistent TTS cache...")
+            self._load_existing_audio_files()
+            self._load_existing_phoneme_files()
+            LOG.info("Persistent TTS cache files added successfully.")
+
+    def _load_existing_audio_files(self):
+        """Find the TTS audio files already in the persistent cache."""
+        glob_pattern = "*." + self.audio_file_type
+        for file_path in self.persistent_cache_dir.glob(glob_pattern):
+            sentence_hash = file_path.name.split(".")[0]
+            audio_file = AudioFile(
+                self.persistent_cache_dir, sentence_hash, self.audio_file_type
+            )
+            self.cached_sentences[sentence_hash] = audio_file, None
+
+    def _load_existing_phoneme_files(self):
+        """Find the TTS phoneme files already in the persistent cache.
+        A phoneme file is no good without an audio file to pair it with.  If
+        no audio file matches, do not load the phoneme.
+        """
+        for file_path in self.persistent_cache_dir.glob("*.pho"):
+            sentence_hash = file_path.name.split(".")[0]
+            cached_sentence = self.cached_sentences.get(sentence_hash)
+            if cached_sentence is not None:
+                audio_file = cached_sentence[0]
+                phoneme_file = PhonemeFile(
+                    self.persistent_cache_dir, sentence_hash
+                )
+                self.cached_sentences[sentence_hash] = audio_file, phoneme_file
+
+    def clear(self):
+        """Remove all files from the temporary cache."""
+        for cache_file_path in self.temporary_cache_dir.iterdir():
+            if cache_file_path.is_dir():
+                for sub_path in cache_file_path.iterdir():
+                    if sub_path.is_file():
+                        sub_path.unlink()
+            elif cache_file_path.is_file():
+                cache_file_path.unlink()
+
+    def curate(self):
+        """Remove cache data if disk space is running low."""
+        files_removed = curate_cache(str(self.temporary_cache_dir),
+                                     min_free_percent=self.min_free_percent)
+        hashes = set([hash_from_path(Path(path)) for path in files_removed])
+        for sentence_hash in hashes:
+            if sentence_hash in self.cached_sentences:
+                self.cached_sentences.pop(sentence_hash)
+
+    def define_audio_file(self, sentence_hash: str) -> AudioFile:
+        """Build an instance of an object representing an audio file."""
+        audio_file = AudioFile(
+            self.temporary_cache_dir, sentence_hash, self.audio_file_type
+        )
+        return audio_file
+
+    def define_phoneme_file(self, sentence_hash: str) -> PhonemeFile:
+        """Build an instance of an object representing an phoneme file."""
+        phoneme_file = PhonemeFile(self.temporary_cache_dir, sentence_hash)
+        return phoneme_file
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-ovos_utils>=0.0.12a9
+ovos_utils>=0.0.14a2
 requests
-phoneme_guesser
-memory-tempfile
+phoneme_guesser
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='ovos-plugin-manager',
-    version='0.0.2',
+    version='0.0.3a1',
     packages=['ovos_plugin_manager', 'ovos_plugin_manager.templates'],
     url='https://github.com/OpenVoiceOS/OVOS-plugin-manager',
     license='Apache-2.0',