Skip to content

Commit

Permalink
fix linter
Browse files Browse the repository at this point in the history
  • Loading branch information
WeberJulian authored and erogol committed Dec 20, 2021
1 parent 1472b6d commit e995a63
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 22 deletions.
1 change: 0 additions & 1 deletion TTS/bin/find_unique_phonemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import multiprocessing
from argparse import RawTextHelpFormatter

import numpy
from tqdm.contrib.concurrent import process_map

from TTS.config import load_config
Expand Down
27 changes: 10 additions & 17 deletions TTS/bin/remove_silence_using_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,7 @@
import multiprocessing
import os
import pathlib
import sys
import wave
from itertools import chain

import numpy as np
import tqdm
import webrtcvad
from tqdm.contrib.concurrent import process_map

Expand Down Expand Up @@ -47,8 +42,8 @@ def write_wave(path, audio, sample_rate):
class Frame(object):
"""Represents a "frame" of audio data."""

def __init__(self, bytes, timestamp, duration):
self.bytes = bytes
def __init__(self, _bytes, timestamp, duration):
self.bytes =_bytes
self.timestamp = timestamp
self.duration = duration

Expand Down Expand Up @@ -121,7 +116,7 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram
# We want to yield all the audio we see from now until
# we are NOTTRIGGERED, but we have to start with the
# audio that's already in the ring buffer.
for f, s in ring_buffer:
for f, _ in ring_buffer:
voiced_frames.append(f)
ring_buffer.clear()
else:
Expand All @@ -146,11 +141,10 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram


def remove_silence(filepath):
filename = os.path.basename(filepath)
output_path = filepath.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, ""))
# ignore if the file exists
if os.path.exists(output_path) and not args.force:
return False
return
# create all directory structure
pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
padding_duration_ms = 300 # default 300
Expand All @@ -166,7 +160,7 @@ def remove_silence(filepath):
if num_segments != 0:
for i, segment in reversed(list(enumerate(segments))):
if i >= 1:
if flag == False:
if not flag:
concat_segment = segment
flag = True
else:
Expand All @@ -176,11 +170,12 @@ def remove_silence(filepath):
segment = segment + concat_segment
write_wave(output_path, segment, sample_rate)
print(output_path)
return True
return
else:
print("> Just Copying the file to:", output_path)
# if fail to remove silence just write the file
write_wave(output_path, audio, sample_rate)
return


def preprocess_audios():
Expand All @@ -198,11 +193,9 @@ def preprocess_audios():


if __name__ == "__main__":
"""
usage
python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2
"""
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(
description="python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2"
)
parser.add_argument("-i", "--input_dir", type=str, default="../VCTK-Corpus", help="Dataset root dir")
parser.add_argument(
"-o", "--output_dir", type=str, default="../VCTK-Corpus-removed-silence", help="Output Dataset dir"
Expand Down
2 changes: 1 addition & 1 deletion TTS/tts/datasets/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def mozilla_de(root_path, meta_file, **kwargs): # pylint: disable=unused-argume
return items


def mailabs(root_path, meta_files=None):
def mailabs(root_path, meta_files=None, ununsed_speakers=None):
"""Normalizes M-AI-Labs meta data files to TTS format
Args:
Expand Down
3 changes: 1 addition & 2 deletions TTS/tts/models/vits.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import math
import random
from dataclasses import dataclass, field
from itertools import chain
from typing import Dict, List, Tuple
Expand Down Expand Up @@ -747,7 +746,7 @@ def forward_fine_tuning(

# inverse decoder and get the output
z_f_pred = self.flow(z_ft, y_mask, g=g, reverse=True)
z_slice, slice_ids = rand_segment(z_f_pred, y_lengths, self.spec_segment_size)
z_slice, slice_ids = rand_segments(z_f_pred, y_lengths, self.spec_segment_size)

o = self.waveform_decoder(z_slice, g=g)

Expand Down
1 change: 1 addition & 0 deletions TTS/tts/utils/speakers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
import torch
from coqpit import Coqpit
from torch.utils.data.sampler import WeightedRandomSampler

from TTS.config import load_config
from TTS.speaker_encoder.utils.generic_utils import setup_model
Expand Down
2 changes: 1 addition & 1 deletion notebooks/dataset_analysis/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def plot_phonemes(train_path, cmu_dict_path, save_path):

plt.figure()
plt.rcParams["figure.figsize"] = (50, 20)
barplot = sns.barplot(x, y)
barplot = sns.barplot(x=x, y=y)
if save_path:
fig = barplot.get_figure()
fig.savefig(os.path.join(save_path, "phoneme_dist"))
Expand Down

0 comments on commit e995a63

Please sign in to comment.