fix linter

coqui-ai · Dec 20, 2021 · e995a63 · e995a63
1 parent 1472b6d
commit e995a63
Show file tree

Hide file tree

Showing 6 changed files with 14 additions and 22 deletions.
diff --git a/TTS/bin/find_unique_phonemes.py b/TTS/bin/find_unique_phonemes.py
@@ -3,7 +3,6 @@
 import multiprocessing
 from argparse import RawTextHelpFormatter
 
-import numpy
 from tqdm.contrib.concurrent import process_map
 
 from TTS.config import load_config

diff --git a/TTS/bin/remove_silence_using_vad.py b/TTS/bin/remove_silence_using_vad.py
@@ -6,12 +6,7 @@
 import multiprocessing
 import os
 import pathlib
-import sys
 import wave
-from itertools import chain
-
-import numpy as np
-import tqdm
 import webrtcvad
 from tqdm.contrib.concurrent import process_map
 
@@ -47,8 +42,8 @@ def write_wave(path, audio, sample_rate):
 class Frame(object):
     """Represents a "frame" of audio data."""
 
-    def __init__(self, bytes, timestamp, duration):
-        self.bytes = bytes
+    def __init__(self, _bytes, timestamp, duration):
+        self.bytes =_bytes
         self.timestamp = timestamp
         self.duration = duration
 
@@ -121,7 +116,7 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram
                 # We want to yield all the audio we see from now until
                 # we are NOTTRIGGERED, but we have to start with the
                 # audio that's already in the ring buffer.
-                for f, s in ring_buffer:
+                for f, _ in ring_buffer:
                     voiced_frames.append(f)
                 ring_buffer.clear()
         else:
@@ -146,11 +141,10 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram
 
 
 def remove_silence(filepath):
-    filename = os.path.basename(filepath)
     output_path = filepath.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, ""))
     # ignore if the file exists
     if os.path.exists(output_path) and not args.force:
-        return False
+        return
     # create all directory structure
     pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
     padding_duration_ms = 300  # default 300
@@ -166,7 +160,7 @@ def remove_silence(filepath):
     if num_segments != 0:
         for i, segment in reversed(list(enumerate(segments))):
             if i >= 1:
-                if flag == False:
+                if not flag:
                     concat_segment = segment
                     flag = True
                 else:
@@ -176,11 +170,12 @@ def remove_silence(filepath):
                     segment = segment + concat_segment
                 write_wave(output_path, segment, sample_rate)
                 print(output_path)
-                return True
+                return
     else:
         print("> Just Copying the file to:", output_path)
         # if fail to remove silence just write the file
         write_wave(output_path, audio, sample_rate)
+        return
 
 
 def preprocess_audios():
@@ -198,11 +193,9 @@ def preprocess_audios():
 
 
 if __name__ == "__main__":
-    """
-    usage
-    python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2
-    """
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(
+        description="python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2"
+    )
     parser.add_argument("-i", "--input_dir", type=str, default="../VCTK-Corpus", help="Dataset root dir")
     parser.add_argument(
         "-o", "--output_dir", type=str, default="../VCTK-Corpus-removed-silence", help="Output Dataset dir"

diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
@@ -59,7 +59,7 @@ def mozilla_de(root_path, meta_file, **kwargs):  # pylint: disable=unused-argume
     return items
 
 
-def mailabs(root_path, meta_files=None):
+def mailabs(root_path, meta_files=None, ununsed_speakers=None):
     """Normalizes M-AI-Labs meta data files to TTS format
 
     Args:

diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py
@@ -1,5 +1,4 @@
 import math
-import random
 from dataclasses import dataclass, field
 from itertools import chain
 from typing import Dict, List, Tuple
@@ -747,7 +746,7 @@ def forward_fine_tuning(
 
             # inverse decoder and get the output
             z_f_pred = self.flow(z_ft, y_mask, g=g, reverse=True)
-            z_slice, slice_ids = rand_segment(z_f_pred, y_lengths, self.spec_segment_size)
+            z_slice, slice_ids = rand_segments(z_f_pred, y_lengths, self.spec_segment_size)
 
         o = self.waveform_decoder(z_slice, g=g)
 

diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py
@@ -7,6 +7,7 @@
 import numpy as np
 import torch
 from coqpit import Coqpit
+from torch.utils.data.sampler import WeightedRandomSampler
 
 from TTS.config import load_config
 from TTS.speaker_encoder.utils.generic_utils import setup_model

diff --git a/notebooks/dataset_analysis/analyze.py b/notebooks/dataset_analysis/analyze.py
@@ -180,7 +180,7 @@ def plot_phonemes(train_path, cmu_dict_path, save_path):
 
     plt.figure()
     plt.rcParams["figure.figsize"] = (50, 20)
-    barplot = sns.barplot(x, y)
+    barplot = sns.barplot(x=x, y=y)
     if save_path:
         fig = barplot.get_figure()
         fig.savefig(os.path.join(save_path, "phoneme_dist"))