Skip to content

Commit

Permalink
chore: code cleanup by ruff fix
Browse files Browse the repository at this point in the history
  • Loading branch information
magic-akari committed Jun 25, 2023
1 parent 30975cd commit 88be209
Show file tree
Hide file tree
Showing 48 changed files with 101 additions and 175 deletions.
1 change: 1 addition & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

6 changes: 6 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"recommendations": [
"charliermarsh.ruff",
"ms-python.python"
]
}
1 change: 0 additions & 1 deletion cluster/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import torch
from sklearn.cluster import KMeans

Expand Down
1 change: 0 additions & 1 deletion cluster/kmeans.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import math,pdb
import torch,pynvml
from torch.nn.functional import normalize
from time import time
Expand Down
16 changes: 7 additions & 9 deletions cluster/train_cluster.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import time,pdb
import time
import tqdm
from time import time as ttime
import os
from pathlib import Path
import logging
Expand All @@ -12,8 +11,7 @@

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
from time import time as ttime
import pynvml,torch
import torch

def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=False):#gpu_minibatch真拉,虽然库支持但是也不考虑
logger.info(f"Loading features from {in_dir}")
Expand All @@ -29,22 +27,22 @@ def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=
features = features.astype(np.float32)
logger.info(f"Clustering features of shape: {features.shape}")
t = time.time()
if(use_gpu==False):
if(use_gpu is False):
if use_minibatch:
kmeans = MiniBatchKMeans(n_clusters=n_clusters,verbose=verbose, batch_size=4096, max_iter=80).fit(features)
else:
kmeans = KMeans(n_clusters=n_clusters,verbose=verbose).fit(features)
else:
kmeans = KMeansGPU(n_clusters=n_clusters, mode='euclidean', verbose=2 if verbose else 0,max_iter=500,tol=1e-2)#
features=torch.from_numpy(features)#.to(device)
labels = kmeans.fit_predict(features)#
kmeans.fit_predict(features)#

print(time.time()-t, "s")

x = {
"n_features_in_": kmeans.n_features_in_ if use_gpu==False else features.shape[1],
"_n_threads": kmeans._n_threads if use_gpu==False else 4,
"cluster_centers_": kmeans.cluster_centers_ if use_gpu==False else kmeans.centroids.cpu().numpy(),
"n_features_in_": kmeans.n_features_in_ if use_gpu is False else features.shape[1],
"_n_threads": kmeans._n_threads if use_gpu is False else 4,
"cluster_centers_": kmeans.cluster_centers_ if use_gpu is False else kmeans.centroids.cpu().numpy(),
}
print("end")

Expand Down
12 changes: 5 additions & 7 deletions data_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import time
import os
import random
import numpy as np
import torch
import torch.utils.data

import modules.commons as commons
import utils
from modules.mel_processing import spectrogram_torch, spec_to_mel_torch, spectrogram_torch
from modules.mel_processing import spectrogram_torch, spectrogram_torch
from utils import load_wav_to_torch, load_filepaths_and_text

# import h5py
Expand Down Expand Up @@ -87,7 +85,7 @@ def get_audio(self, filename):
assert abs(audio_norm.shape[1]-lmin * self.hop_length) < 3 * self.hop_length
spec, c, f0, uv = spec[:, :lmin], c[:, :lmin], f0[:lmin], uv[:lmin]
audio_norm = audio_norm[:, :lmin * self.hop_length]
if volume!= None:
if volume is not None:
volume = volume[:lmin]
return c, f0, spec, audio_norm, spk, uv, volume

Expand All @@ -96,7 +94,7 @@ def random_slice(self, c, f0, spec, audio_norm, spk, uv, volume):
# print("skip too short audio:", filename)
# return None

if random.choice([True, False]) and self.vol_aug and volume!=None:
if random.choice([True, False]) and self.vol_aug and volume is not None:
max_amp = float(torch.max(torch.abs(audio_norm))) + 1e-5
max_shift = min(1, np.log10(1/max_amp))
log10_vol_shift = random.uniform(-1, max_shift)
Expand All @@ -114,7 +112,7 @@ def random_slice(self, c, f0, spec, audio_norm, spk, uv, volume):
end = start + 790
spec, c, f0, uv = spec[:, start:end], c[:, start:end], f0[start:end], uv[start:end]
audio_norm = audio_norm[:, start * self.hop_length : end * self.hop_length]
if volume !=None:
if volume is not None:
volume = volume[start:end]
return c, f0, spec, audio_norm, spk, uv,volume

Expand Down Expand Up @@ -178,7 +176,7 @@ def __call__(self, batch):
uv = row[5]
uv_padded[i, :uv.size(0)] = uv
volume = row[6]
if volume != None:
if volume is not None:
volume_padded[i, :volume.size(0)] = volume
else :
volume_padded = None
Expand Down
3 changes: 1 addition & 2 deletions diffusion/data_loaders.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import random
import re
import numpy as np
import librosa
import torch
Expand Down Expand Up @@ -130,7 +129,7 @@ def __init__(
with open(filelists,"r") as f:
self.paths = f.read().splitlines()
for name_ext in tqdm(self.paths, total=len(self.paths)):
name = os.path.splitext(name_ext)[0]
os.path.splitext(name_ext)[0]
path_audio = name_ext
duration = librosa.get_duration(filename = path_audio, sr = self.sample_rate)

Expand Down
7 changes: 4 additions & 3 deletions diffusion/diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from functools import partial
from inspect import isfunction
import torch.nn.functional as F
import librosa.sequence
import numpy as np
import torch
from torch import nn
Expand All @@ -26,8 +25,10 @@ def extract(a, t, x_shape):


def noise_like(shape, device, repeat=False):
repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
noise = lambda: torch.randn(shape, device=device)
def repeat_noise():
return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
def noise():
return torch.randn(shape, device=device)
return repeat_noise() if repeat else noise()


Expand Down
9 changes: 5 additions & 4 deletions diffusion/diffusion_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from functools import partial
from inspect import isfunction
import torch.nn.functional as F
import librosa.sequence
import numpy as np
from torch.nn import Conv1d
from torch.nn import Mish
Expand All @@ -27,8 +26,10 @@ def extract(a, t):


def noise_like(shape, device, repeat=False):
repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
noise = lambda: torch.randn(shape, device=device)
def repeat_noise():
return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
def noise():
return torch.randn(shape, device=device)
return repeat_noise() if repeat else noise()


Expand Down Expand Up @@ -577,7 +578,7 @@ def forward(self, condition=None, init_noise=None, pndms=None, k_step=None):
noise_list = torch.zeros((0, 1, 1, self.mel_bins, n_frames), device=device)

ot = step_range[0]
ot_1 = torch.full((1,), ot, device=device, dtype=torch.long)
torch.full((1,), ot, device=device, dtype=torch.long)

for t in step_range:
t_1 = torch.full((1,), t, device=device, dtype=torch.long)
Expand Down
19 changes: 11 additions & 8 deletions diffusion/dpm_solver_pytorch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import torch
import torch.nn.functional as F
import math


class NoiseScheduleVP:
Expand Down Expand Up @@ -559,7 +557,7 @@ def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=Fal
x_t: A pytorch tensor. The approximated solution at time `t`.
"""
ns = self.noise_schedule
dims = x.dim()
x.dim()
lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
h = lambda_t - lambda_s
log_alpha_s, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(t)
Expand Down Expand Up @@ -984,20 +982,25 @@ def dpm_solver_adaptive(self, x, order, t_T, t_0, h_init=0.05, atol=0.0078, rtol
nfe = 0
if order == 2:
r1 = 0.5
lower_update = lambda x, s, t: self.dpm_solver_first_update(x, s, t, return_intermediate=True)
higher_update = lambda x, s, t, **kwargs: self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, solver_type=solver_type, **kwargs)
def lower_update(x, s, t):
return self.dpm_solver_first_update(x, s, t, return_intermediate=True)
def higher_update(x, s, t, **kwargs):
return self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, solver_type=solver_type, **kwargs)
elif order == 3:
r1, r2 = 1. / 3., 2. / 3.
lower_update = lambda x, s, t: self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, return_intermediate=True, solver_type=solver_type)
higher_update = lambda x, s, t, **kwargs: self.singlestep_dpm_solver_third_update(x, s, t, r1=r1, r2=r2, solver_type=solver_type, **kwargs)
def lower_update(x, s, t):
return self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, return_intermediate=True, solver_type=solver_type)
def higher_update(x, s, t, **kwargs):
return self.singlestep_dpm_solver_third_update(x, s, t, r1=r1, r2=r2, solver_type=solver_type, **kwargs)
else:
raise ValueError("For adaptive step size solver, order must be 2 or 3, got {}".format(order))
while torch.abs((s - t_0)).mean() > t_err:
t = ns.inverse_lambda(lambda_s + h)
x_lower, lower_noise_kwargs = lower_update(x, s, t)
x_higher = higher_update(x, s, t, **lower_noise_kwargs)
delta = torch.max(torch.ones_like(x).to(x) * atol, rtol * torch.max(torch.abs(x_lower), torch.abs(x_prev)))
norm_fn = lambda v: torch.sqrt(torch.square(v.reshape((v.shape[0], -1))).mean(dim=-1, keepdim=True))
def norm_fn(v):
return torch.sqrt(torch.square(v.reshape((v.shape[0], -1))).mean(dim=-1, keepdim=True))
E = norm_fn((x_higher - x_lower) / delta).max()
if torch.all(E <= 1.):
x = x_higher
Expand Down
1 change: 0 additions & 1 deletion diffusion/infer_gt_mel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import torch
import torch.nn.functional as F
from diffusion.unit2mel import load_model_vocoder
Expand Down
2 changes: 0 additions & 2 deletions diffusion/logger/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
'''

import os
import json
import time
import yaml
import datetime
import torch
import matplotlib.pyplot as plt
from . import utils
from torch.utils.tensorboard import SummaryWriter

class Saver(object):
Expand Down
3 changes: 1 addition & 2 deletions diffusion/logger/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import yaml
import json
import pickle
import torch

def traverse_dir(
Expand Down Expand Up @@ -121,6 +120,6 @@ def load_model(
ckpt = torch.load(path_pt, map_location=torch.device(device))
global_step = ckpt['global_step']
model.load_state_dict(ckpt['model'], strict=False)
if ckpt.get('optimizer') != None:
if ckpt.get("optimizer") is not None:
optimizer.load_state_dict(ckpt['optimizer'])
return global_step, model, optimizer
10 changes: 4 additions & 6 deletions diffusion/onnx_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
import torch
import torch.nn as nn
import numpy as np
from wavenet import WaveNet
import torch.nn.functional as F
import diffusion

class DotDict(dict):
def __getattr__(*args):
Expand Down Expand Up @@ -147,8 +145,8 @@ def OnnxExport(self, project_name=None, init_noise=None, export_encoder=True, ex
spks.update({i:1.0/float(self.n_spk)})
spk_mix = torch.tensor(spk_mix)
spk_mix = spk_mix.repeat(n_frames, 1)
orgouttt = self.init_spkembed(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
outtt = self.forward(hubert, mel2ph, f0, volume, spk_mix)
self.init_spkembed(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
self.forward(hubert, mel2ph, f0, volume, spk_mix)
if export_encoder:
torch.onnx.export(
self,
Expand Down Expand Up @@ -182,8 +180,8 @@ def ExportOnnx(self, project_name=None):
spk_mix.append(1.0/float(self.n_spk))
spks.update({i:1.0/float(self.n_spk)})
spk_mix = torch.tensor(spk_mix)
orgouttt = self.orgforward(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
outtt = self.forward(hubert, mel2ph, f0, volume, spk_mix)
self.orgforward(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
self.forward(hubert, mel2ph, f0, volume, spk_mix)

torch.onnx.export(
self,
Expand Down
1 change: 0 additions & 1 deletion diffusion/solver.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import time
import numpy as np
import torch
Expand Down
7 changes: 4 additions & 3 deletions diffusion/uni_pc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import torch
import torch.nn.functional as F
import math


Expand Down Expand Up @@ -109,7 +108,8 @@ def marginal_log_mean_coeff(self, t):
elif self.schedule == 'linear':
return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0
elif self.schedule == 'cosine':
log_alpha_fn = lambda s: torch.log(torch.cos((s + self.cosine_s) / (1. + self.cosine_s) * math.pi / 2.))
def log_alpha_fn(s):
return torch.log(torch.cos((s + self.cosine_s) / (1.0 + self.cosine_s) * math.pi / 2.0))
log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0
return log_alpha_t

Expand Down Expand Up @@ -147,7 +147,8 @@ def inverse_lambda(self, lamb):
return t.reshape((-1,))
else:
log_alpha = -0.5 * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb))
t_fn = lambda log_alpha_t: torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s
def t_fn(log_alpha_t):
return torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2.0 * (1.0 + self.cosine_s) / math.pi - self.cosine_s
t = t_fn(log_alpha)
return t

Expand Down
4 changes: 2 additions & 2 deletions diffusion/unit2mel.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,13 @@ def init_spkmix(self, n_spk):
hubert_hidden_size = self.input_channel
n_frames = 10
hubert = torch.randn((1, n_frames, hubert_hidden_size))
mel2ph = torch.arange(end=n_frames).unsqueeze(0).long()
torch.arange(end=n_frames).unsqueeze(0).long()
f0 = torch.randn((1, n_frames))
volume = torch.randn((1, n_frames))
spks = {}
for i in range(n_spk):
spks.update({i:1.0/float(self.n_spk)})
orgouttt = self.init_spkembed(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
self.init_spkembed(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)

def forward(self, units, f0, volume, spk_id = None, spk_mix_dict = None, aug_shift = None,
gt_spec=None, infer=True, infer_speedup=10, method='dpm-solver', k_step=300, use_tqdm=True):
Expand Down
5 changes: 2 additions & 3 deletions inference/infer_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import pickle

from diffusion.unit2mel import load_model_vocoder
import yaml

logging.getLogger('matplotlib').setLevel(logging.WARNING)

Expand Down Expand Up @@ -153,7 +152,7 @@ def __init__(self, net_g_path, config_path,
self.hop_size = self.diffusion_args.data.block_size
self.spk2id = self.diffusion_args.spk
self.speech_encoder = self.diffusion_args.data.encoder
self.unit_interpolate_mode = self.diffusion_args.data.unit_interpolate_mode if self.diffusion_args.data.unit_interpolate_mode!=None else 'left'
self.unit_interpolate_mode = self.diffusion_args.data.unit_interpolate_mode if self.diffusion_args.data.unit_interpolate_mode is not None else 'left'
if spk_mix_enable:
self.diffusion_model.init_spkmix(len(self.spk2id))
else:
Expand Down Expand Up @@ -290,7 +289,7 @@ def infer(self, speaker, tran, raw_path,
audio = torch.FloatTensor(wav).to(self.dev)
audio_mel = None
if self.only_diffusion or self.shallow_diffusion:
vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev) if vol==None else vol[:,:,None]
vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev) if vol is None else vol[:,:,None]
if self.shallow_diffusion and second_encoding:
audio16k = librosa.resample(audio.detach().cpu().numpy(), orig_sr=self.target_sample, target_sr=16000)
audio16k = torch.from_numpy(audio16k).to(self.dev)
Expand Down
Loading

0 comments on commit 88be209

Please sign in to comment.