Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Python logging instead of print() #3

Merged
merged 6 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
refactor: remove verbose arguments
Can be handled by adjusting logging levels instead.
  • Loading branch information
eginhard committed Apr 3, 2024
commit b711e19cb6783251cb5f771e75e9f6d1385513f6
4 changes: 2 additions & 2 deletions TTS/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
"""
super().__init__()
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar)
self.config = load_config(config_path) if config_path else None
self.synthesizer = None
self.voice_converter = None
Expand Down Expand Up @@ -125,7 +125,7 @@ def get_models_file_path():

@staticmethod
def list_models():
return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False).list_models()
return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False).list_models()

def download_model_by_name(self, model_name: str):
model_path, config_path, model_item = self.manager.download_model(model_name)
Expand Down
5 changes: 2 additions & 3 deletions TTS/bin/extract_tts_spectrograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
use_cuda = torch.cuda.is_available()


def setup_loader(ap, r, verbose=False):
def setup_loader(ap, r):
tokenizer, _ = TTSTokenizer.init_from_config(c)
dataset = TTSDataset(
outputs_per_step=r,
Expand All @@ -37,7 +37,6 @@ def setup_loader(ap, r, verbose=False):
phoneme_cache_path=c.phoneme_cache_path,
precompute_num_workers=0,
use_noise_augment=False,
verbose=verbose,
speaker_id_mapping=speaker_manager.name_to_id if c.use_speaker_embedding else None,
d_vector_mapping=speaker_manager.embeddings if c.use_d_vector_file else None,
)
Expand Down Expand Up @@ -257,7 +256,7 @@ def main(args): # pylint: disable=redefined-outer-name
print("\n > Model has {} parameters".format(num_params), flush=True)
# set r
r = 1 if c.model.lower() == "glow_tts" else model.decoder.r
own_loader = setup_loader(ap, r, verbose=True)
own_loader = setup_loader(ap, r)

extract_spectrograms(
own_loader,
Expand Down
9 changes: 5 additions & 4 deletions TTS/bin/train_encoder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import logging
import os
import sys
import time
Expand Down Expand Up @@ -31,7 +32,7 @@
print(" > Number of GPUs: ", num_gpus)


def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False):
def setup_loader(ap: AudioProcessor, is_val: bool = False):
num_utter_per_class = c.num_utter_per_class if not is_val else c.eval_num_utter_per_class
num_classes_in_batch = c.num_classes_in_batch if not is_val else c.eval_num_classes_in_batch

Expand All @@ -42,7 +43,6 @@ def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False
voice_len=c.voice_len,
num_utter_per_class=num_utter_per_class,
num_classes_in_batch=num_classes_in_batch,
verbose=verbose,
augmentation_config=c.audio_augmentation if not is_val else None,
use_torch_spec=c.model_params.get("use_torch_spec", False),
)
Expand Down Expand Up @@ -278,9 +278,10 @@ def main(args): # pylint: disable=redefined-outer-name
# pylint: disable=redefined-outer-name
meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=True)

train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False, verbose=True)
logging.getLogger("TTS.encoder.dataset").setLevel(logging.INFO)
train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False)
if c.run_eval:
eval_data_loader, _, _ = setup_loader(ap, is_val=True, verbose=True)
eval_data_loader, _, _ = setup_loader(ap, is_val=True)
else:
eval_data_loader = None

Expand Down
1 change: 0 additions & 1 deletion TTS/bin/tune_wavegrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
return_segments=False,
use_noise_augment=False,
use_cache=False,
verbose=True,
)
loader = DataLoader(
dataset,
Expand Down
16 changes: 6 additions & 10 deletions TTS/encoder/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def __init__(
voice_len=1.6,
num_classes_in_batch=64,
num_utter_per_class=10,
verbose=False,
augmentation_config=None,
use_torch_spec=None,
):
Expand All @@ -27,7 +26,6 @@ def __init__(
ap (TTS.tts.utils.AudioProcessor): audio processor object.
meta_data (list): list of dataset instances.
seq_len (int): voice segment length in seconds.
verbose (bool): print diagnostic information.
"""
super().__init__()
self.config = config
Expand All @@ -36,7 +34,6 @@ def __init__(
self.seq_len = int(voice_len * self.sample_rate)
self.num_utter_per_class = num_utter_per_class
self.ap = ap
self.verbose = verbose
self.use_torch_spec = use_torch_spec
self.classes, self.items = self.__parse_items()

Expand All @@ -53,13 +50,12 @@ def __init__(
if "gaussian" in augmentation_config.keys():
self.gaussian_augmentation_config = augmentation_config["gaussian"]

if self.verbose:
logger.info("DataLoader initialization")
logger.info(" | Classes per batch: %d", num_classes_in_batch)
logger.info(" | Number of instances: %d", len(self.items))
logger.info(" | Sequence length: %d", self.seq_len)
logger.info(" | Number of classes: %d", len(self.classes))
logger.info(" | Classes: %d", self.classes)
logger.info("DataLoader initialization")
logger.info(" | Classes per batch: %d", num_classes_in_batch)
logger.info(" | Number of instances: %d", len(self.items))
logger.info(" | Sequence length: %d", self.seq_len)
logger.info(" | Number of classes: %d", len(self.classes))
logger.info(" | Classes: %d", self.classes)

def load_wav(self, filename):
audio = self.ap.load_wav(filename, sr=self.ap.sample_rate)
Expand Down
1 change: 1 addition & 0 deletions TTS/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from TTS.utils.synthesizer import Synthesizer

logger = logging.getLogger(__name__)
logging.getLogger("TTS").setLevel(logging.INFO)


def create_argparser():
Expand Down
11 changes: 1 addition & 10 deletions TTS/tts/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def __init__(
language_id_mapping: Dict = None,
use_noise_augment: bool = False,
start_by_longest: bool = False,
verbose: bool = False,
):
"""Generic 📂 data loader for `tts` models. It is configurable for different outputs and needs.

Expand Down Expand Up @@ -140,8 +139,6 @@ def __init__(
use_noise_augment (bool): Enable adding random noise to wav for augmentation. Defaults to False.

start_by_longest (bool): Start by longest sequence. It is especially useful to check OOM. Defaults to False.

verbose (bool): Print diagnostic information. Defaults to false.
"""
super().__init__()
self.batch_group_size = batch_group_size
Expand All @@ -165,7 +162,6 @@ def __init__(
self.use_noise_augment = use_noise_augment
self.start_by_longest = start_by_longest

self.verbose = verbose
self.rescue_item_idx = 1
self.pitch_computed = False
self.tokenizer = tokenizer
Expand All @@ -183,8 +179,7 @@ def __init__(
self.energy_dataset = EnergyDataset(
self.samples, self.ap, cache_path=energy_cache_path, precompute_num_workers=precompute_num_workers
)
if self.verbose:
self.print_logs()
self.print_logs()

@property
def lengths(self):
Expand Down Expand Up @@ -700,14 +695,12 @@ def __init__(
samples: Union[List[List], List[Dict]],
ap: "AudioProcessor",
audio_config=None, # pylint: disable=unused-argument
verbose=False,
cache_path: str = None,
precompute_num_workers=0,
normalize_f0=True,
):
self.samples = samples
self.ap = ap
self.verbose = verbose
self.cache_path = cache_path
self.normalize_f0 = normalize_f0
self.pad_id = 0.0
Expand Down Expand Up @@ -850,14 +843,12 @@ def __init__(
self,
samples: Union[List[List], List[Dict]],
ap: "AudioProcessor",
verbose=False,
cache_path: str = None,
precompute_num_workers=0,
normalize_energy=True,
):
self.samples = samples
self.ap = ap
self.verbose = verbose
self.cache_path = cache_path
self.normalize_energy = normalize_energy
self.pad_id = 0.0
Expand Down
1 change: 0 additions & 1 deletion TTS/tts/models/base_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,6 @@ def get_data_loader(
phoneme_cache_path=config.phoneme_cache_path,
precompute_num_workers=config.precompute_num_workers,
use_noise_augment=False if is_eval else config.use_noise_augment,
verbose=verbose,
speaker_id_mapping=speaker_id_mapping,
d_vector_mapping=d_vector_mapping if config.use_d_vector_file else None,
tokenizer=self.tokenizer,
Expand Down
5 changes: 1 addition & 4 deletions TTS/tts/models/delightful_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,15 +331,13 @@ def __init__(
self,
ap,
samples: Union[List[List], List[Dict]],
verbose=False,
cache_path: str = None,
precompute_num_workers=0,
normalize_f0=True,
):
super().__init__(
samples=samples,
ap=ap,
verbose=verbose,
cache_path=cache_path,
precompute_num_workers=precompute_num_workers,
normalize_f0=normalize_f0,
Expand Down Expand Up @@ -1455,7 +1453,6 @@ def get_data_loader(
compute_f0=config.compute_f0,
f0_cache_path=config.f0_cache_path,
attn_prior_cache_path=config.attn_prior_cache_path if config.use_attn_priors else None,
verbose=verbose,
tokenizer=self.tokenizer,
start_by_longest=config.start_by_longest,
)
Expand Down Expand Up @@ -1532,7 +1529,7 @@ def on_epoch_end(self, trainer): # pylint: disable=unused-argument

@staticmethod
def init_from_config(
config: "DelightfulTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=False
config: "DelightfulTTSConfig", samples: Union[List[List], List[Dict]] = None
): # pylint: disable=unused-argument
"""Initiate model from config

Expand Down
7 changes: 3 additions & 4 deletions TTS/tts/models/glow_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class GlowTTS(BaseTTS):
>>> from TTS.tts.configs.glow_tts_config import GlowTTSConfig
>>> from TTS.tts.models.glow_tts import GlowTTS
>>> config = GlowTTSConfig()
>>> model = GlowTTS.init_from_config(config, verbose=False)
>>> model = GlowTTS.init_from_config(config)
"""

def __init__(
Expand Down Expand Up @@ -543,18 +543,17 @@ def on_train_step_start(self, trainer):
self.run_data_dep_init = trainer.total_steps_done < self.data_dep_init_steps

@staticmethod
def init_from_config(config: "GlowTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
def init_from_config(config: "GlowTTSConfig", samples: Union[List[List], List[Dict]] = None):
"""Initiate model from config

Args:
config (VitsConfig): Model config.
samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
Defaults to None.
verbose (bool): If True, print init messages. Defaults to True.
"""
from TTS.utils.audio import AudioProcessor

ap = AudioProcessor.init_from_config(config, verbose)
ap = AudioProcessor.init_from_config(config)
tokenizer, new_config = TTSTokenizer.init_from_config(config)
speaker_manager = SpeakerManager.init_from_config(config, samples)
return GlowTTS(new_config, ap, tokenizer, speaker_manager)
5 changes: 2 additions & 3 deletions TTS/tts/models/neuralhmm_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,18 +238,17 @@ def get_criterion():
return NLLLoss()

@staticmethod
def init_from_config(config: "NeuralhmmTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
def init_from_config(config: "NeuralhmmTTSConfig", samples: Union[List[List], List[Dict]] = None):
"""Initiate model from config

Args:
config (VitsConfig): Model config.
samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
Defaults to None.
verbose (bool): If True, print init messages. Defaults to True.
"""
from TTS.utils.audio import AudioProcessor

ap = AudioProcessor.init_from_config(config, verbose)
ap = AudioProcessor.init_from_config(config)
tokenizer, new_config = TTSTokenizer.init_from_config(config)
speaker_manager = SpeakerManager.init_from_config(config, samples)
return NeuralhmmTTS(new_config, ap, tokenizer, speaker_manager)
Expand Down
5 changes: 2 additions & 3 deletions TTS/tts/models/overflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,18 +253,17 @@ def get_criterion():
return NLLLoss()

@staticmethod
def init_from_config(config: "OverFlowConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
def init_from_config(config: "OverFlowConfig", samples: Union[List[List], List[Dict]] = None):
"""Initiate model from config

Args:
config (VitsConfig): Model config.
samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
Defaults to None.
verbose (bool): If True, print init messages. Defaults to True.
"""
from TTS.utils.audio import AudioProcessor

ap = AudioProcessor.init_from_config(config, verbose)
ap = AudioProcessor.init_from_config(config)
tokenizer, new_config = TTSTokenizer.init_from_config(config)
speaker_manager = SpeakerManager.init_from_config(config, samples)
return Overflow(new_config, ap, tokenizer, speaker_manager)
Expand Down
5 changes: 2 additions & 3 deletions TTS/tts/models/vits.py
Original file line number Diff line number Diff line change
Expand Up @@ -1612,7 +1612,6 @@ def get_data_loader(
max_audio_len=config.max_audio_len,
phoneme_cache_path=config.phoneme_cache_path,
precompute_num_workers=config.precompute_num_workers,
verbose=verbose,
tokenizer=self.tokenizer,
start_by_longest=config.start_by_longest,
)
Expand Down Expand Up @@ -1779,7 +1778,7 @@ def load_fairseq_checkpoint(
assert not self.training

@staticmethod
def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]] = None):
"""Initiate model from config

Args:
Expand All @@ -1802,7 +1801,7 @@ def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]
upsample_rate == effective_hop_length
), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {effective_hop_length}"

ap = AudioProcessor.init_from_config(config, verbose=verbose)
ap = AudioProcessor.init_from_config(config)
tokenizer, new_config = TTSTokenizer.init_from_config(config)
speaker_manager = SpeakerManager.init_from_config(config, samples)
language_manager = LanguageManager.init_from_config(config)
Expand Down
18 changes: 6 additions & 12 deletions TTS/utils/audio/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,6 @@ class AudioProcessor(object):

stats_path (str, optional):
Path to the computed stats file. Defaults to None.

verbose (bool, optional):
enable/disable logging. Defaults to True.

"""

def __init__(
Expand Down Expand Up @@ -175,7 +171,6 @@ def __init__(
do_rms_norm=False,
db_level=None,
stats_path=None,
verbose=True,
**_,
):
# setup class attributed
Expand Down Expand Up @@ -231,10 +226,9 @@ def __init__(
self.win_length <= self.fft_size
), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
members = vars(self)
if verbose:
logger.info("Setting up Audio Processor...")
for key, value in members.items():
logger.info(" | %s: %s", key, value)
logger.info("Setting up Audio Processor...")
for key, value in members.items():
logger.info(" | %s: %s", key, value)
# create spectrogram utils
self.mel_basis = build_mel_basis(
sample_rate=self.sample_rate,
Expand All @@ -253,10 +247,10 @@ def __init__(
self.symmetric_norm = None

@staticmethod
def init_from_config(config: "Coqpit", verbose=True):
def init_from_config(config: "Coqpit"):
if "audio" in config:
return AudioProcessor(verbose=verbose, **config.audio)
return AudioProcessor(verbose=verbose, **config)
return AudioProcessor(**config.audio)
return AudioProcessor(**config)

### normalization ###
def normalize(self, S: np.ndarray) -> np.ndarray:
Expand Down
Loading