Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Buethe committed Sep 28, 2024
1 parent 4fe9cde commit d9d7a49
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 21 deletions.
22 changes: 16 additions & 6 deletions dnn/torch/osce/models/lavoce.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,13 @@ def __init__(self,
norm_p=2,
avg_pool_k=4,
pulses=False,
power_pulses=False,
pulse_power=9,
innovate1=True,
innovate2=False,
innovate3=False,
ftrans_k=2):
ftrans_k=2,
shape_bias=True):

super().__init__()

Expand All @@ -90,6 +93,8 @@ def __init__(self,
self.preemph = preemph
self.pulses = pulses
self.ftrans_k = ftrans_k
self.power_pulses = power_pulses
self.pulse_power = pulse_power

assert self.FEATURE_FRAME_SIZE % self.FRAME_SIZE == 0
self.upsamp_factor = self.FEATURE_FRAME_SIZE // self.FRAME_SIZE
Expand All @@ -101,7 +106,7 @@ def __init__(self,
self.feature_net = LPCNetFeatureNet(num_features + pitch_embedding_dim, cond_dim, self.upsamp_factor)

# noise shaper
self.noise_shaper = NoiseShaper(cond_dim, self.FRAME_SIZE)
self.noise_shaper = NoiseShaper(cond_dim, self.FRAME_SIZE, bias=shape_bias)

# comb filters
left_pad = self.kernel_size // 2
Expand All @@ -117,9 +122,9 @@ def __init__(self,
self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)

# non-linear transforms
self.tdshape1 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate1)
self.tdshape2 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate2)
self.tdshape3 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate3)
self.tdshape1 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate1, bias=shape_bias)
self.tdshape2 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate2, bias=shape_bias)
self.tdshape3 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate3, bias=shape_bias)

# combinators
self.af2 = LimitedAdaptiveConv1d(2, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
Expand Down Expand Up @@ -151,6 +156,11 @@ def create_phase_signals(self, periods):
pulse_a = torch.relu(chunk_sin - alpha) / (1 - alpha)
pulse_b = torch.relu(-chunk_sin - alpha) / (1 - alpha)

chunk = torch.cat((pulse_a, pulse_b), dim = 1)
elif self.power_pulses:
chunk_sin = torch.sin(f * progression + phase0).view(batch_size, 1, self.FRAME_SIZE)
pulse_a = torch.relu(chunk_sin) ** self.pulse_power
pulse_b = torch.relu(-chunk_sin) ** self.pulse_power
chunk = torch.cat((pulse_a, pulse_b), dim = 1)
else:
chunk_sin = torch.sin(f * progression + phase0).view(batch_size, 1, self.FRAME_SIZE)
Expand All @@ -176,7 +186,7 @@ def flop_count(self, rate=16000, verbose=False):
af_flops = self.af1.flop_count(rate) + self.af2.flop_count(rate) + self.af3.flop_count(rate) + self.af4.flop_count(rate) + self.af_prescale.flop_count(rate) + self.af_mix.flop_count(rate)
feature_flops = (_conv1d_flop_count(self.post_cf1, frame_rate) + _conv1d_flop_count(self.post_cf2, frame_rate)
+ _conv1d_flop_count(self.post_af1, frame_rate) + _conv1d_flop_count(self.post_af2, frame_rate) + _conv1d_flop_count(self.post_af3, frame_rate))
shape_flops = self.tdshape1.flop_count(rate) + self.tdshape2.flop_count(rate) + self.tdshape3.flop_count(rate)
shape_flops = self.tdshape1.flop_count(rate) + self.tdshape2.flop_count(rate) + self.tdshape3.flop_count(rate) + self.noise_shaper.flop_count(rate)

if verbose:
print(f"feature net: {feature_net_flops / 1e6} MFLOPS")
Expand Down
7 changes: 4 additions & 3 deletions dnn/torch/osce/utils/layers/noise_shaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class NoiseShaper(nn.Module):

def __init__(self,
feature_dim,
frame_size=160
frame_size=160,
bias=True
):
"""
Expand All @@ -58,8 +59,8 @@ def __init__(self,
self.frame_size = frame_size

# feature transform
self.feature_alpha1 = nn.Conv1d(self.feature_dim, frame_size, 2)
self.feature_alpha2 = nn.Conv1d(frame_size, frame_size, 2)
self.feature_alpha1 = nn.Conv1d(self.feature_dim, frame_size, 2, bias=bias)
self.feature_alpha2 = nn.Conv1d(frame_size, frame_size, 2, bias=bias)


def flop_count(self, rate):
Expand Down
21 changes: 11 additions & 10 deletions dnn/torch/osce/utils/layers/td_shaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ def __init__(self,
innovate=False,
pool_after=False,
softquant=False,
apply_weight_norm=False
apply_weight_norm=False,
bias=True
):
"""
Expand Down Expand Up @@ -51,21 +52,21 @@ def __init__(self,
norm = torch.nn.utils.weight_norm if apply_weight_norm else lambda x, name=None: x

# feature transform
self.feature_alpha1_f = norm(nn.Conv1d(self.feature_dim, frame_size, 2))
self.feature_alpha1_t = norm(nn.Conv1d(self.env_dim, frame_size, 2))
self.feature_alpha2 = norm(nn.Conv1d(frame_size, frame_size, 2))
self.feature_alpha1_f = norm(nn.Conv1d(self.feature_dim, frame_size, 2, bias=bias))
self.feature_alpha1_t = norm(nn.Conv1d(self.env_dim, frame_size, 2, bias=bias))
self.feature_alpha2 = norm(nn.Conv1d(frame_size, frame_size, 2, bias=bias))

if softquant:
self.feature_alpha1_f = soft_quant(self.feature_alpha1_f)

if self.innovate:
self.feature_alpha1b_f = norm(nn.Conv1d(self.feature_dim, frame_size, 2))
self.feature_alpha1b_t = norm(nn.Conv1d(self.env_dim, frame_size, 2))
self.feature_alpha1c_f = norm(nn.Conv1d(self.feature_dim, frame_size, 2))
self.feature_alpha1c_t = norm(nn.Conv1d(self.env_dim, frame_size, 2))
self.feature_alpha1b_f = norm(nn.Conv1d(self.feature_dim, frame_size, 2, bias=bias))
self.feature_alpha1b_t = norm(nn.Conv1d(self.env_dim, frame_size, 2, bias=bias))
self.feature_alpha1c_f = norm(nn.Conv1d(self.feature_dim, frame_size, 2, bias=bias))
self.feature_alpha1c_t = norm(nn.Conv1d(self.env_dim, frame_size, 2, bias=bias))

self.feature_alpha2b = norm(nn.Conv1d(frame_size, frame_size, 2))
self.feature_alpha2c = norm(nn.Conv1d(frame_size, frame_size, 2))
self.feature_alpha2b = norm(nn.Conv1d(frame_size, frame_size, 2, bias=bias))
self.feature_alpha2c = norm(nn.Conv1d(frame_size, frame_size, 2, bias=bias))

if softquant:
self.feature_alpha1b_f = soft_quant(self.feature_alpha1b_f)
Expand Down
4 changes: 2 additions & 2 deletions dnn/torch/osce/utils/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,8 @@
'name': 'lavoce'
},
'training': {
'batch_size': 64,
'epochs': 50,
'batch_size': 128,
'epochs': 100,
'gen_lr_reduction': 1,
'lambda_feat': 1.0,
'lambda_reg': 0.6,
Expand Down

0 comments on commit d9d7a49

Please sign in to comment.