Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions configs/default.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
data:
data_dir: 'H:\Deepsync\backup\fastspeech\data\'
wav_dir: 'H:\Deepsync\backup\deepsync\LJSpeech-1.1\wavs\'
data_dir: '/workspace/data/'
wav_dir: '/workspace/LJSpeech-1.1/wavs/'
# Compute statistics
e_mean: 21.578571319580078
e_std: 18.916799545288086
Expand Down Expand Up @@ -106,10 +106,12 @@ model:


train:
discriminator_start: 20000
rep_discriminator: 1
# optimization related
eos: False #True
opt: 'noam'
accum_grad: 4
accum_grad: 1
grad_clip: 1.0
weight_decay: 0.001
patience: 0
Expand All @@ -125,7 +127,7 @@ train:
seed: 1 # random seed number
resume: "" # the snapshot path to resume (if set empty, no effect)
use_phonemes: True
batch_size : 16
batch_size : 24
# other
melgan_vocoder : True
save_interval : 1000
Expand All @@ -134,4 +136,4 @@ train:
summary_interval : 200
validation_step : 500
tts_max_mel_len : 870 # if you have a couple of extremely long spectrograms you might want to use this
tts_bin_lengths : True # bins the spectrogram lengths before sampling in data loader - speeds up training
tts_bin_lengths : True # bins the spectrogram lengths before sampling in data loader - speeds up training
62 changes: 62 additions & 0 deletions core/discriminator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import torch
import torch.nn as nn
import torch.nn.functional as F


class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()

self.discriminator = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=3, stride=1, padding = 1),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding = 1),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding = 1),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(64, 1, kernel_size=3, stride=1, padding = 1)
#nn.Flatten(), # add conv2d a 1 channel
#nn.Linear(46240,256)
)

def forward(self, x):
'''
we directly predict score without last sigmoid function
since we're using Least Squares GAN (https://arxiv.org/abs/1611.04076)
'''
# print(x.shape, "Input to Discriminator")
return self.discriminator(x)

def weights_init(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find("BatchNorm2d") != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)

class SFDiscriminator(nn.Module):
def __init__(self):
super().__init__()
self.disc1 = Discriminator()
self.disc2 = Discriminator()
self.disc3 = Discriminator()
self.apply(weights_init)
def forward(self, x, start):
results = []
results.append(self.disc1(x[:, :, start: start + 40, 0:40]))
results.append(self.disc2(x[:, :, start: start + 40, 20:60]))
results.append(self.disc3(x[:, :, start: start + 40, 40:80, ]))
return results

if __name__ == '__main__':
model = SFDiscriminator()

x = torch.randn(16, 1, 40, 80)
print(x.shape)

out = model(x)
print(len(out), "Shape of output")

pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)
Loading