refactor: remove verbose arguments

Can be handled by adjusting logging levels instead.
idiap · eginhard · Apr 11, 2024 · Nov 13, 2023 · Nov 18, 2023 · Apr 2, 2024
commit b711e19cb6783251cb5f771e75e9f6d1385513f6
diff --git a/TTS/api.py b/TTS/api.py
@@ -62,7 +62,7 @@ def __init__(
             gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
         """
         super().__init__()
-        self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
+        self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar)
         self.config = load_config(config_path) if config_path else None
         self.synthesizer = None
         self.voice_converter = None
@@ -125,7 +125,7 @@ def get_models_file_path():
 
     @staticmethod
     def list_models():
-        return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False).list_models()
+        return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False).list_models()
 
     def download_model_by_name(self, model_name: str):
         model_path, config_path, model_item = self.manager.download_model(model_name)

diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py
@@ -21,7 +21,7 @@
 use_cuda = torch.cuda.is_available()
 
 
-def setup_loader(ap, r, verbose=False):
+def setup_loader(ap, r):
     tokenizer, _ = TTSTokenizer.init_from_config(c)
     dataset = TTSDataset(
         outputs_per_step=r,
@@ -37,7 +37,6 @@ def setup_loader(ap, r, verbose=False):
         phoneme_cache_path=c.phoneme_cache_path,
         precompute_num_workers=0,
         use_noise_augment=False,
-        verbose=verbose,
         speaker_id_mapping=speaker_manager.name_to_id if c.use_speaker_embedding else None,
         d_vector_mapping=speaker_manager.embeddings if c.use_d_vector_file else None,
     )
@@ -257,7 +256,7 @@ def main(args):  # pylint: disable=redefined-outer-name
     print("\n > Model has {} parameters".format(num_params), flush=True)
     # set r
     r = 1 if c.model.lower() == "glow_tts" else model.decoder.r
-    own_loader = setup_loader(ap, r, verbose=True)
+    own_loader = setup_loader(ap, r)
 
     extract_spectrograms(
         own_loader,

diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import logging
 import os
 import sys
 import time
@@ -31,7 +32,7 @@
 print(" > Number of GPUs: ", num_gpus)
 
 
-def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False):
+def setup_loader(ap: AudioProcessor, is_val: bool = False):
     num_utter_per_class = c.num_utter_per_class if not is_val else c.eval_num_utter_per_class
     num_classes_in_batch = c.num_classes_in_batch if not is_val else c.eval_num_classes_in_batch
 
@@ -42,7 +43,6 @@ def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False
         voice_len=c.voice_len,
         num_utter_per_class=num_utter_per_class,
         num_classes_in_batch=num_classes_in_batch,
-        verbose=verbose,
         augmentation_config=c.audio_augmentation if not is_val else None,
         use_torch_spec=c.model_params.get("use_torch_spec", False),
     )
@@ -278,9 +278,10 @@ def main(args):  # pylint: disable=redefined-outer-name
     # pylint: disable=redefined-outer-name
     meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=True)
 
-    train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False, verbose=True)
+    logging.getLogger("TTS.encoder.dataset").setLevel(logging.INFO)
+    train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False)
     if c.run_eval:
-        eval_data_loader, _, _ = setup_loader(ap, is_val=True, verbose=True)
+        eval_data_loader, _, _ = setup_loader(ap, is_val=True)
     else:
         eval_data_loader = None
 

diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py
@@ -55,7 +55,6 @@
         return_segments=False,
         use_noise_augment=False,
         use_cache=False,
-        verbose=True,
     )
     loader = DataLoader(
         dataset,

diff --git a/TTS/encoder/dataset.py b/TTS/encoder/dataset.py
@@ -18,7 +18,6 @@ def __init__(
         voice_len=1.6,
         num_classes_in_batch=64,
         num_utter_per_class=10,
-        verbose=False,
         augmentation_config=None,
         use_torch_spec=None,
     ):
@@ -27,7 +26,6 @@ def __init__(
             ap (TTS.tts.utils.AudioProcessor): audio processor object.
             meta_data (list): list of dataset instances.
             seq_len (int): voice segment length in seconds.
-            verbose (bool): print diagnostic information.
         """
         super().__init__()
         self.config = config
@@ -36,7 +34,6 @@ def __init__(
         self.seq_len = int(voice_len * self.sample_rate)
         self.num_utter_per_class = num_utter_per_class
         self.ap = ap
-        self.verbose = verbose
         self.use_torch_spec = use_torch_spec
         self.classes, self.items = self.__parse_items()
 
@@ -53,13 +50,12 @@ def __init__(
             if "gaussian" in augmentation_config.keys():
                 self.gaussian_augmentation_config = augmentation_config["gaussian"]
 
-        if self.verbose:
-            logger.info("DataLoader initialization")
-            logger.info(" | Classes per batch: %d", num_classes_in_batch)
-            logger.info(" | Number of instances: %d", len(self.items))
-            logger.info(" | Sequence length: %d", self.seq_len)
-            logger.info(" | Number of classes: %d", len(self.classes))
-            logger.info(" | Classes: %d", self.classes)
+        logger.info("DataLoader initialization")
+        logger.info(" | Classes per batch: %d", num_classes_in_batch)
+        logger.info(" | Number of instances: %d", len(self.items))
+        logger.info(" | Sequence length: %d", self.seq_len)
+        logger.info(" | Number of classes: %d", len(self.classes))
+        logger.info(" | Classes: %d", self.classes)
 
     def load_wav(self, filename):
         audio = self.ap.load_wav(filename, sr=self.ap.sample_rate)

diff --git a/TTS/server/server.py b/TTS/server/server.py
@@ -20,6 +20,7 @@
 from TTS.utils.synthesizer import Synthesizer
 
 logger = logging.getLogger(__name__)
+logging.getLogger("TTS").setLevel(logging.INFO)
 
 
 def create_argparser():

diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py
@@ -82,7 +82,6 @@ def __init__(
         language_id_mapping: Dict = None,
         use_noise_augment: bool = False,
         start_by_longest: bool = False,
-        verbose: bool = False,
     ):
         """Generic 📂 data loader for `tts` models. It is configurable for different outputs and needs.
 
@@ -140,8 +139,6 @@ def __init__(
             use_noise_augment (bool): Enable adding random noise to wav for augmentation. Defaults to False.
 
             start_by_longest (bool): Start by longest sequence. It is especially useful to check OOM. Defaults to False.
-
-            verbose (bool): Print diagnostic information. Defaults to false.
         """
         super().__init__()
         self.batch_group_size = batch_group_size
@@ -165,7 +162,6 @@ def __init__(
         self.use_noise_augment = use_noise_augment
         self.start_by_longest = start_by_longest
 
-        self.verbose = verbose
         self.rescue_item_idx = 1
         self.pitch_computed = False
         self.tokenizer = tokenizer
@@ -183,8 +179,7 @@ def __init__(
             self.energy_dataset = EnergyDataset(
                 self.samples, self.ap, cache_path=energy_cache_path, precompute_num_workers=precompute_num_workers
             )
-        if self.verbose:
-            self.print_logs()
+        self.print_logs()
 
     @property
     def lengths(self):
@@ -700,14 +695,12 @@ def __init__(
         samples: Union[List[List], List[Dict]],
         ap: "AudioProcessor",
         audio_config=None,  # pylint: disable=unused-argument
-        verbose=False,
         cache_path: str = None,
         precompute_num_workers=0,
         normalize_f0=True,
     ):
         self.samples = samples
         self.ap = ap
-        self.verbose = verbose
         self.cache_path = cache_path
         self.normalize_f0 = normalize_f0
         self.pad_id = 0.0
@@ -850,14 +843,12 @@ def __init__(
         self,
         samples: Union[List[List], List[Dict]],
         ap: "AudioProcessor",
-        verbose=False,
         cache_path: str = None,
         precompute_num_workers=0,
         normalize_energy=True,
     ):
         self.samples = samples
         self.ap = ap
-        self.verbose = verbose
         self.cache_path = cache_path
         self.normalize_energy = normalize_energy
         self.pad_id = 0.0

diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py
@@ -333,7 +333,6 @@ def get_data_loader(
                 phoneme_cache_path=config.phoneme_cache_path,
                 precompute_num_workers=config.precompute_num_workers,
                 use_noise_augment=False if is_eval else config.use_noise_augment,
-                verbose=verbose,
                 speaker_id_mapping=speaker_id_mapping,
                 d_vector_mapping=d_vector_mapping if config.use_d_vector_file else None,
                 tokenizer=self.tokenizer,

diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py
@@ -331,15 +331,13 @@ def __init__(
         self,
         ap,
         samples: Union[List[List], List[Dict]],
-        verbose=False,
         cache_path: str = None,
         precompute_num_workers=0,
         normalize_f0=True,
     ):
         super().__init__(
             samples=samples,
             ap=ap,
-            verbose=verbose,
             cache_path=cache_path,
             precompute_num_workers=precompute_num_workers,
             normalize_f0=normalize_f0,
@@ -1455,7 +1453,6 @@ def get_data_loader(
                 compute_f0=config.compute_f0,
                 f0_cache_path=config.f0_cache_path,
                 attn_prior_cache_path=config.attn_prior_cache_path if config.use_attn_priors else None,
-                verbose=verbose,
                 tokenizer=self.tokenizer,
                 start_by_longest=config.start_by_longest,
             )
@@ -1532,7 +1529,7 @@ def on_epoch_end(self, trainer):  # pylint: disable=unused-argument
 
     @staticmethod
     def init_from_config(
-        config: "DelightfulTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=False
+        config: "DelightfulTTSConfig", samples: Union[List[List], List[Dict]] = None
     ):  # pylint: disable=unused-argument
         """Initiate model from config
 

diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py
@@ -56,7 +56,7 @@ class GlowTTS(BaseTTS):
         >>> from TTS.tts.configs.glow_tts_config import GlowTTSConfig
         >>> from TTS.tts.models.glow_tts import GlowTTS
         >>> config = GlowTTSConfig()
-        >>> model = GlowTTS.init_from_config(config, verbose=False)
+        >>> model = GlowTTS.init_from_config(config)
     """
 
     def __init__(
@@ -543,18 +543,17 @@ def on_train_step_start(self, trainer):
         self.run_data_dep_init = trainer.total_steps_done < self.data_dep_init_steps
 
     @staticmethod
-    def init_from_config(config: "GlowTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
+    def init_from_config(config: "GlowTTSConfig", samples: Union[List[List], List[Dict]] = None):
         """Initiate model from config
 
         Args:
             config (VitsConfig): Model config.
             samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
                 Defaults to None.
-            verbose (bool): If True, print init messages. Defaults to True.
         """
         from TTS.utils.audio import AudioProcessor
 
-        ap = AudioProcessor.init_from_config(config, verbose)
+        ap = AudioProcessor.init_from_config(config)
         tokenizer, new_config = TTSTokenizer.init_from_config(config)
         speaker_manager = SpeakerManager.init_from_config(config, samples)
         return GlowTTS(new_config, ap, tokenizer, speaker_manager)
diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py
@@ -238,18 +238,17 @@ def get_criterion():
         return NLLLoss()
 
     @staticmethod
-    def init_from_config(config: "NeuralhmmTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
+    def init_from_config(config: "NeuralhmmTTSConfig", samples: Union[List[List], List[Dict]] = None):
         """Initiate model from config
 
         Args:
             config (VitsConfig): Model config.
             samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
                 Defaults to None.
-            verbose (bool): If True, print init messages. Defaults to True.
         """
         from TTS.utils.audio import AudioProcessor
 
-        ap = AudioProcessor.init_from_config(config, verbose)
+        ap = AudioProcessor.init_from_config(config)
         tokenizer, new_config = TTSTokenizer.init_from_config(config)
         speaker_manager = SpeakerManager.init_from_config(config, samples)
         return NeuralhmmTTS(new_config, ap, tokenizer, speaker_manager)

diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py
@@ -253,18 +253,17 @@ def get_criterion():
         return NLLLoss()
 
     @staticmethod
-    def init_from_config(config: "OverFlowConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
+    def init_from_config(config: "OverFlowConfig", samples: Union[List[List], List[Dict]] = None):
         """Initiate model from config
 
         Args:
             config (VitsConfig): Model config.
             samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training.
                 Defaults to None.
-            verbose (bool): If True, print init messages. Defaults to True.
         """
         from TTS.utils.audio import AudioProcessor
 
-        ap = AudioProcessor.init_from_config(config, verbose)
+        ap = AudioProcessor.init_from_config(config)
         tokenizer, new_config = TTSTokenizer.init_from_config(config)
         speaker_manager = SpeakerManager.init_from_config(config, samples)
         return Overflow(new_config, ap, tokenizer, speaker_manager)

diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py
@@ -1612,7 +1612,6 @@ def get_data_loader(
                 max_audio_len=config.max_audio_len,
                 phoneme_cache_path=config.phoneme_cache_path,
                 precompute_num_workers=config.precompute_num_workers,
-                verbose=verbose,
                 tokenizer=self.tokenizer,
                 start_by_longest=config.start_by_longest,
             )
@@ -1779,7 +1778,7 @@ def load_fairseq_checkpoint(
             assert not self.training
 
     @staticmethod
-    def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
+    def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]] = None):
         """Initiate model from config
 
         Args:
@@ -1802,7 +1801,7 @@ def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]
                 upsample_rate == effective_hop_length
             ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {effective_hop_length}"
 
-        ap = AudioProcessor.init_from_config(config, verbose=verbose)
+        ap = AudioProcessor.init_from_config(config)
         tokenizer, new_config = TTSTokenizer.init_from_config(config)
         speaker_manager = SpeakerManager.init_from_config(config, samples)
         language_manager = LanguageManager.init_from_config(config)

diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py
@@ -135,10 +135,6 @@ class AudioProcessor(object):
 
         stats_path (str, optional):
             Path to the computed stats file. Defaults to None.
-
-        verbose (bool, optional):
-            enable/disable logging. Defaults to True.
-
     """
 
     def __init__(
@@ -175,7 +171,6 @@ def __init__(
         do_rms_norm=False,
         db_level=None,
         stats_path=None,
-        verbose=True,
         **_,
     ):
         # setup class attributed
@@ -231,10 +226,9 @@ def __init__(
             self.win_length <= self.fft_size
         ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
         members = vars(self)
-        if verbose:
-            logger.info("Setting up Audio Processor...")
-            for key, value in members.items():
-                logger.info(" | %s: %s", key, value)
+        logger.info("Setting up Audio Processor...")
+        for key, value in members.items():
+            logger.info(" | %s: %s", key, value)
         # create spectrogram utils
         self.mel_basis = build_mel_basis(
             sample_rate=self.sample_rate,
@@ -253,10 +247,10 @@ def __init__(
             self.symmetric_norm = None
 
     @staticmethod
-    def init_from_config(config: "Coqpit", verbose=True):
+    def init_from_config(config: "Coqpit"):
         if "audio" in config:
-            return AudioProcessor(verbose=verbose, **config.audio)
-        return AudioProcessor(verbose=verbose, **config)
+            return AudioProcessor(**config.audio)
+        return AudioProcessor(**config)
 
     ### normalization ###
     def normalize(self, S: np.ndarray) -> np.ndarray: