Skip to content

Commit

Permalink
Merge pull request #217 from idiap/stdout
Browse files Browse the repository at this point in the history
fix(bin): log to stdout in cli tools
  • Loading branch information
eginhard authored Dec 17, 2024
2 parents 9d5fc60 + 6a52c8a commit 370fb1d
Show file tree
Hide file tree
Showing 17 changed files with 56 additions and 28 deletions.
3 changes: 2 additions & 1 deletion TTS/bin/compute_attention_masks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import importlib
import logging
import os
import sys
from argparse import RawTextHelpFormatter

import numpy as np
Expand All @@ -18,7 +19,7 @@
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger

if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/compute_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import logging
import os
import sys
from argparse import RawTextHelpFormatter

import torch
Expand Down Expand Up @@ -102,7 +103,7 @@ def compute_embeddings(


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/compute_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import glob
import logging
import os
import sys

import numpy as np
from tqdm import tqdm
Expand All @@ -18,7 +19,7 @@

def main():
"""Run preprocessing process."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stderr, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/eval_encoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import logging
import sys
from argparse import RawTextHelpFormatter

import torch
Expand Down Expand Up @@ -53,7 +54,7 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="""Compute the accuracy of the encoder.\n\n"""
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/extract_tts_spectrograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import argparse
import logging
import os
import sys

import numpy as np
import torch
Expand Down Expand Up @@ -273,7 +274,7 @@ def main(args): # pylint: disable=redefined-outer-name


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser()
parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/find_unique_chars.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
import logging
import sys
from argparse import RawTextHelpFormatter

from TTS.config import load_config
Expand All @@ -10,7 +11,7 @@


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/find_unique_phonemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import logging
import multiprocessing
import sys
from argparse import RawTextHelpFormatter

from tqdm.contrib.concurrent import process_map
Expand All @@ -20,7 +21,7 @@ def compute_phonemes(item):


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# pylint: disable=W0601
global c, phonemizer
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/remove_silence_using_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import multiprocessing
import os
import pathlib
import sys

import torch
from tqdm import tqdm
Expand Down Expand Up @@ -77,7 +78,7 @@ def preprocess_audios():


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end"
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/synthesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ def parse_args() -> argparse.Namespace:

def main() -> None:
"""Entry point for `tts` command line interface."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
args = parse_args()
stream = sys.stderr if args.pipe_out else sys.stdout
setup_logger("TTS", level=logging.INFO, stream=stream, formatter=ConsoleFormatter())

pipe_out = sys.stdout if args.pipe_out else None

Expand Down
2 changes: 1 addition & 1 deletion TTS/bin/train_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def main(args): # pylint: disable=redefined-outer-name


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()

Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/train_tts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import sys
from dataclasses import dataclass, field

from trainer import Trainer, TrainerArgs
Expand All @@ -17,7 +18,7 @@ class TrainTTSArgs(TrainerArgs):

def main():
"""Run `tts` model training directly by a `config.json` file."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# init trainer args
train_args = TrainTTSArgs()
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/train_vocoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import sys
from dataclasses import dataclass, field

from trainer import Trainer, TrainerArgs
Expand All @@ -18,7 +19,7 @@ class TrainVocoderArgs(TrainerArgs):

def main():
"""Run `tts` model training directly by a `config.json` file."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# init trainer args
train_args = TrainVocoderArgs()
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/tune_wavegrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
import logging
import sys
from itertools import product as cartesian_product

import numpy as np
Expand All @@ -17,7 +18,7 @@
from TTS.vocoder.models import setup_model

if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, help="Path to model checkpoint.")
Expand Down
2 changes: 1 addition & 1 deletion TTS/encoder/utils/prepare_voxceleb.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def processor(directory, subset, force_process):


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
if len(sys.argv) != 4:
print("Usage: python prepare_data.py save_directory user password")
sys.exit()
Expand Down
2 changes: 1 addition & 1 deletion TTS/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from TTS.utils.synthesizer import Synthesizer

logger = logging.getLogger(__name__)
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())


def create_argparser() -> argparse.ArgumentParser:
Expand Down
24 changes: 17 additions & 7 deletions TTS/utils/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import datetime
import importlib
import logging
import os
import re
from pathlib import Path
from typing import Any, Callable, Dict, Optional, TypeVar, Union
from typing import Any, Callable, Dict, Optional, TextIO, TypeVar, Union

import torch
from packaging.version import Version
Expand Down Expand Up @@ -107,25 +108,34 @@ def setup_logger(
level: int = logging.INFO,
*,
formatter: Optional[logging.Formatter] = None,
screen: bool = False,
tofile: bool = False,
log_dir: str = "logs",
stream: Optional[TextIO] = None,
log_dir: Optional[Union[str, os.PathLike[Any]]] = None,
log_name: str = "log",
) -> None:
"""Set up a logger.
Args:
logger_name: Name of the logger to set up
level: Logging level
formatter: Formatter for the logger
stream: Add a StreamHandler for the given stream, e.g. sys.stderr or sys.stdout
log_dir: Folder to write the log file (no file created if None)
log_name: Prefix of the log file name
"""
lg = logging.getLogger(logger_name)
if formatter is None:
formatter = logging.Formatter(
"%(asctime)s.%(msecs)03d - %(levelname)-8s - %(name)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S"
)
lg.setLevel(level)
if tofile:
if log_dir is not None:
Path(log_dir).mkdir(exist_ok=True, parents=True)
log_file = Path(log_dir) / f"{log_name}_{get_timestamp()}.log"
fh = logging.FileHandler(log_file, mode="w")
fh.setFormatter(formatter)
lg.addHandler(fh)
if screen:
sh = logging.StreamHandler()
if stream is not None:
sh = logging.StreamHandler(stream)
sh.setFormatter(formatter)
lg.addHandler(sh)

Expand Down
18 changes: 12 additions & 6 deletions docs/source/models/xtts.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,13 @@ from TTS.api import TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")

# generate speech by cloning a voice using default settings
tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
file_path="output.wav",
speaker="Ana Florence",
language="en",
split_sentences=True
)
tts.tts_to_file(
text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
file_path="output.wav",
speaker="Ana Florence",
language="en",
split_sentences=True
)
```


Expand Down Expand Up @@ -230,6 +231,11 @@ out = model.inference(
torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
```

You can also use the Coqui speakers:

```python
gpt_cond_latent, speaker_embedding = model.speaker_manager.speakers["Ana Florence"].values()
```

#### Streaming manually

Expand Down

0 comments on commit 370fb1d

Please sign in to comment.