Skip to content

Commit

Permalink
Refactor for ICASSP
Browse files Browse the repository at this point in the history
  • Loading branch information
SungFeng-Huang committed Oct 5, 2021
1 parent 701b4ec commit 8494da5
Show file tree
Hide file tree
Showing 80 changed files with 834 additions and 229 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ preprocessed_data/LibriTTS/
preprocessed_data/VCTK/
evaluation/speechmetrics/
evaluation/Pytorch_MBNet/
evaluation/images/._*
evaluation/images/**/._*
evaluation/images/**/*.png
*.npy
*.csv
*.swp
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,11 @@ You can download pretrained models [here](https://drive.google.com/drive/folders

## Results

Speaker verification:
- same speaker v.s. different speaker
- EER ![image](evaluation/images/eer.png)
- DET ![image](evaluation/images/det.png)
- real v.s. synthesized
- Cosine similarity ![image](evaluation/images/errorbar_plot.png)
- ROC ![image](evaluation/images/roc.png)
| Corpus | LibriTTS | VCTK |
| --- | --- | --- |
| Speaker Similarity | ![](evaluation/images/LibriTTS/errorbar_plot_encoder.png) | ![](evaluation/images/VCTK/errorbar_plot_encoder.png) |
| Speaker Verification | ![](evaluation/images/LibriTTS/eer_encoder.png) | ![](evaluation/images/VCTK/eer_encoder.png) |
| Synthesized Speech Detection | ![](evaluation/images/LibriTTS/roc_encoder.png) | ![](evaluation/images/VCTK/roc_encoder.png) |


<!--## Contributing-->
Expand Down
8 changes: 8 additions & 0 deletions evaluation/centroid_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def load_dvector(self):
self.dvector_list_dict[mode] = np.load(f'npy/{self.corpus}/{mode}_dvector.npy', allow_pickle=True)
for mode in self.mode_list:
for step in self.step_list:
if mode in ['scratch_encoder', 'encoder', 'dvec'] and step != 0:
continue
self.dvector_list_dict[f'{mode}_step{step}'] = np.load(f'npy/{self.corpus}/{mode}_step{step}_dvector.npy', allow_pickle=True)

# get the cosine similarity between the centroid and the dvectors of sample utterances
Expand All @@ -54,6 +56,8 @@ def get_centroid_similarity(self):
self.dvector_list_dict_tensor['recon'] = torch.from_numpy(self.dvector_list_dict['recon'])
for mode in self.mode_list:
for step in self.step_list:
if mode in ['scratch_encoder', 'encoder', 'dvec'] and step != 0:
continue
self.dvector_list_dict_tensor[f'{mode}_step{step}'] = torch.from_numpy(
self.dvector_list_dict[f'{mode}_step{step}']
)
Expand All @@ -74,13 +78,17 @@ def get_centroid_similarity(self):
for mode in self.mode_list:
print(f'processing the similarity of mode: {mode}')
for step in self.step_list:
if mode in ['scratch_encoder', 'encoder', 'dvec'] and step != 0:
continue
print(f' step{step}')
self.similarity_list_dict[f'{mode}_step{step}'] = cos(
self.dvector_list_dict_tensor['centroid'],
self.dvector_list_dict_tensor[f'{mode}_step{step}']
).detach().cpu().numpy()

def save_centroid_similarity(self):
for key in self.similarity_list_dict:
print(key, np.mean(self.similarity_list_dict[key]), np.std(self.similarity_list_dict[key]))
np.save(f'npy/{self.corpus}/centroid_similarity_dict.npy', self.similarity_list_dict, allow_pickle=True)

def load_centroid_similarity(self):
Expand Down
173 changes: 135 additions & 38 deletions evaluation/compute_mos.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
from tqdm import trange, tqdm
from tqdm.contrib import tenumerate

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import numpy as np
import scipy
import torch
Expand All @@ -17,6 +22,33 @@
import config


class MBNetDataset(Dataset):
def __init__(self, filelist):
self.wav_name = filelist
self.length = len(self.wav_name)

def __len__(self):
return self.length

def __getitem__(self, idx):
wav, _ = librosa.load(self.wav_name[idx], sr=16000)
wav = np.abs(librosa.stft(wav, n_fft=512)).T
return wav

def collate_fn(self, wavs):
max_len = max(wavs, key = lambda x: x.shape[0]).shape[0]
output_wavs = []
for i, wav in enumerate(wavs):
wav_len = wav.shape[0]
dup_times = max_len//wav_len
remain = max_len - wav_len*dup_times
to_dup = [wav for t in range(dup_times)]
to_dup.append(wav[:remain, :])
output_wavs.append(torch.Tensor(np.concatenate(to_dup, axis = 0)))
output_wavs = torch.stack(output_wavs, dim = 0)
return output_wavs


class NeuralMOS:
def __init__(self, args):
self.corpus = config.corpus
Expand All @@ -42,12 +74,23 @@ def setup_filelist(self,):
file_list['recon'].append(candidate[0])

for mode in self.mode_list:
# if mode in ['scratch_encoder', 'encoder', 'dvec'] and step > 0:
if mode in ['scratch_encoder', 'encoder', 'dvec']:
continue
mode_dir = os.path.join(self.data_dir_dict[mode], 'audio/Testing')
for step in self.step_list:
file_list[f'{mode}_step{step}'] = []
for _id in range(self.n_speaker * self.n_sample):
candidate = glob.glob(f"{mode_dir}/test_{_id:03}/*FTstep_{step}.synth.wav")
assert len(candidate) == 1
try:
candidate = glob.glob(f"{mode_dir}/test_{_id:03}/*FTstep_{step}.synth.wav")
if self.corpus == "LibriTTS":
candidate = [name for name in candidate if name.split('/')[-1][0].isdigit()]
assert len(candidate) == 1, mode_dir + ' / ' + ' - '.join(candidates) + f" / test_{_id:03} / {step}"
except:
candidate = glob.glob(f"{mode_dir}/*/test_{_id:03}/*FTstep_{step}.synth.wav")
if self.corpus == "LibriTTS":
candidate = [name for name in candidate if name.split('/')[-1][0].isdigit()]
assert len(candidate) == 1, mode_dir + ' / ' + ' - '.join(candidates) + f" / test_{_id:03} / {step}"
file_list[f'{mode}_step{step}'].append(candidate[0])

return file_list
Expand Down Expand Up @@ -149,7 +192,8 @@ def add_up(self, net='mosnet'):
print(mode, mean, ci)
fo.write(f"{mode}, {mean}, {ci}\n")

def plot(self, mode_name):

def bar_plot(self, mode_name):
if mode_name == 'base_emb':
xtitle = 'Baseline (emb table)'
elif mode_name == 'base_emb1':
Expand All @@ -158,10 +202,6 @@ def plot(self, mode_name):
xtitle = 'Meta-TTS (emb table)'
elif mode_name == 'meta_emb1':
xtitle = 'Meta-TTS (share emb)'
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
models = ['mosnet', 'mbnet', 'wav2vec2', 'tera', 'cpc']
modes = ['real', 'recon'] + [f'{mode_name}{j}_step{i}' for j in ['_vad','_va','_d',''] for i in [0,5,10,20,50,100]]
xticks = ['Real', 'Reconstructed'] + [f'{j}, step {i}' for j in ['Emb, VA, D','Emb, VA','Emb, D','Emb'] for i in [0,5,10,20,50,100]]
Expand Down Expand Up @@ -196,43 +236,100 @@ def plot(self, mode_name):
plt.savefig(f'images/{self.corpus}/MOS_{mode_name}.png', format='png', bbox_extra_artists=(leg, ), bbox_inches='tight')
# plt.show()

class MBNetDataset(Dataset):
def __init__(self, filelist):
self.wav_name = filelist
self.length = len(self.wav_name)

def __len__(self):
return self.length
def plot(self, mode_name):
title_map = {
'base_emb': 'Baseline (emb table)',
'base_emb1': 'Baseline (share emb)',
'meta_emb': 'Meta-TTS (emb table)',
'meta_emb1': 'Meta-TTS (share emb)',
}
xtitle = title_map[mode_name]

def __getitem__(self, idx):
wav, _ = librosa.load(self.wav_name[idx], sr=16000)
wav = np.abs(librosa.stft(wav, n_fft=512)).T
return wav
palette = sns.color_palette(n_colors=8)
palette_color = ['blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'grey', 'olive', 'cyan']
fig, ax = plt.subplots(figsize=(4.8, 4.2))

models = ['mosnet', 'mbnet', 'wav2vec2', 'tera', 'cpc']

# Horizontal lines with bands
dfs = []
for mode, xtick in zip(['real', 'recon'], ['Real', 'Reconstructed']):
for model in tqdm(models, desc='mos_type', leave=False):
filename = f'csv/{self.corpus}/{model}_{mode}.csv'
df = pd.read_csv(filename)
if model in ['mosnet','mbnet']:
df = df.rename(columns={' mos': "MOS"})
else:
df = df.rename(columns={'score': "MOS"})
df['MOS_type'] = model
df[xtitle] = xtick
dfs.append(df)
dfs = pd.concat(dfs, ignore_index=True)
dfs = dfs.groupby([xtitle, "test_id"]).mean().groupby(xtitle).agg(self.get_mean_confidence_interval)
print(dfs)
for (xtick, row), color in zip(dfs.iterrows(), ['purple', 'grey']):
mean, ci = row["MOS"]
rgb_color = palette[palette_color.index(color)]
ax.axhspan(mean-ci, mean+ci, facecolor=rgb_color, alpha=0.15)
ax.axhline(mean, linestyle='--', alpha=0.5, color=rgb_color, label=xtick)
del dfs

# Curves
dfs = []
modes = [f'{mode_name}{j}_step{i}' for j in ['_vad','_va','_d',''] for i in [0,5,10,20,50,100]]
xticks = [f'{j}, step {i}' for j in ['Emb, VA, D','Emb, VA','Emb, D','Emb'] for i in [0,5,10,20,50,100]]
for i, mode in tenumerate(modes, desc='mode', leave=False):
for model in tqdm(models, desc='mos_type', leave=False):
filename = f'csv/{self.corpus}/{model}_{mode}.csv'
df = pd.read_csv(filename)
if model in ['mosnet','mbnet']:
df = df.rename(columns={' mos': "MOS"})
else:
df = df.rename(columns={'score': "MOS"})
df['MOS_type'] = model
df[xtitle] = xticks[i].rsplit(',', 1)[0]
df['Adaptation Steps'] = int(mode.rsplit('_', 1)[1][4:])
dfs.append(df)
dfs = pd.concat(dfs, ignore_index=True)
print(dfs.groupby([xtitle, "test_id"]).mean().groupby(xtitle).agg(self.get_mean_confidence_interval))
ax = sns.lineplot(x='Adaptation Steps', y='MOS', hue=xtitle, data=dfs, ax=ax, err_style='bars')
del dfs

h, l = ax.get_legend_handles_labels()
print(l)
# Usually seaborn treat hue label as the first legend label with empty
# artist(handle). In such case, we should remove the first handles/labels.
# But with unknown reason, the hue label is correctly treated as legent
# title, so we do not need to remove the first handle/label.
ax.legend(handles=h, labels=l, ncol=2, title=xtitle, title_fontsize='large')
plt.ylim((2.6,4.2))
plt.tight_layout()

savefile = f"images/{self.corpus}/MOS_{mode_name}.png"
plt.savefig(savefile, format='png')
print(savefile)
plt.close()
from PIL import Image
im = Image.open(savefile)
im.show()

def collate_fn(self, wavs):
max_len = max(wavs, key = lambda x: x.shape[0]).shape[0]
output_wavs = []
for i, wav in enumerate(wavs):
wav_len = wav.shape[0]
dup_times = max_len//wav_len
remain = max_len - wav_len*dup_times
to_dup = [wav for t in range(dup_times)]
to_dup.append(wav[:remain, :])
output_wavs.append(torch.Tensor(np.concatenate(to_dup, axis = 0)))
output_wavs = torch.stack(output_wavs, dim = 0)
return output_wavs

if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--net', type=str, choices=['mosnet', 'mbnet'], default=False)
parser.add_argument('--net', type=str, choices=['mosnet', 'mbnet', 'wav2vec2', 'tera', 'cpc'], default=False)
parser.add_argument('--plot', type=str, default=False)
args = parser.parse_args()
main = NeuralMOS(args)
if args.net == 'mosnet':
main.compute_mosnet()
main.add_up('mosnet')
if args.net == 'mbnet':
main.compute_mbnet()
main.add_up('mbnet')
if args.plot:
main.plot(args.plot)
if args.net:
if args.net == 'mosnet':
main.compute_mosnet()
elif args.net == 'mbnet':
main.compute_mbnet()
main.add_up(args.net)
# if args.plot:
# main.plot(args.plot)
# for suffix in ['', '_base_emb', '_base_emb1', '_meta_emb', '_meta_emb1']:
for plot in ['base_emb', 'base_emb1', 'meta_emb', 'meta_emb1']:
main.plot(plot)
plt.show()
18 changes: 12 additions & 6 deletions evaluation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
data_dir_dict['meta_emb1_va'] = f'{root_dir}/output/result/{corpus}/76d5bf2e9c044908bf7122d350488cff/meta_emb1_va'
data_dir_dict['meta_emb1_d'] = f'{root_dir}/output/result/{corpus}/c0e9e6a6f5984cb28fa05522b830cfec/meta_emb1_d'
data_dir_dict['meta_emb1'] = f'{root_dir}/output/result/{corpus}/eaca69ba824b45bfb8d6e1f663bc6c51/meta_emb1'
data_dir_dict['scratch_encoder'] = f'{root_dir}/output/result/{corpus}/064fdd9ccfa94ca190d0dcccead456ce'
data_dir_dict['encoder'] = f'{root_dir}/output/result/{corpus}/b40400015bac4dfd8a8aaffec7d3db9f'
data_dir_dict['dvec'] = f'{root_dir}/output/result/{corpus}/fdf55e6b33434922b758d034e839f000'
n_sample = 16
mode_list = [
'base_emb_vad',
Expand All @@ -47,7 +50,10 @@
'meta_emb1_vad',
'meta_emb1_va',
'meta_emb1_d',
'meta_emb1'
'meta_emb1',
'scratch_encoder',
'encoder',
'dvec',
]
step_list = [0, 5, 10, 20, 50, 100]

Expand All @@ -72,20 +78,20 @@
plot_type = 'errorbar' # ['errorbar', 'box_ver', 'box_hor']
sim_plot_mode_list = [
'recon', 'recon_random',
'base_emb_vad',
'meta_emb_vad',
'base_emb1_vad',
'base_emb_vad',
'meta_emb1_vad',
'meta_emb_vad',
]
# length of color_list should be same as len(sim_plot_mode_list)
sim_plot_color_list = ['purple', 'grey', 'red', 'blue', 'orange', 'green']
sim_plot_color_list = ['purple', 'grey', 'orange', 'red', 'green', 'blue']
# length of legend_list should be same as len(sim_plot_mode_list)
sim_plot_legend_list = [
'Same spk', 'Different spk',
'Baseline (emb table)',
'Meta-TTS (emb table)',
'Baseline (share emb)',
'Baseline (emb table)',
'Meta-TTS (share emb)',
'Meta-TTS (emb table)',
]


Expand Down
Binary file removed evaluation/images/LibriTTS/MOS_base_emb.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/MOS_base_emb1.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/MOS_meta_emb.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/MOS_meta_emb1.png
Binary file not shown.
Binary file added evaluation/images/LibriTTS/auc_encoder.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed evaluation/images/LibriTTS/det.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/det_base_emb.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/det_base_emb1.png
Binary file not shown.
Binary file added evaluation/images/LibriTTS/det_encoder.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed evaluation/images/LibriTTS/det_meta_emb.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/det_meta_emb1.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/eer.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/eer_base_emb.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/eer_base_emb1.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/eer_emb_table.png
Binary file not shown.
Binary file added evaluation/images/LibriTTS/eer_encoder.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed evaluation/images/LibriTTS/eer_meta_emb.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/eer_meta_emb1.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/eer_share_emb.png
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/errorbar_plot.png
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file removed evaluation/images/LibriTTS/roc.png
Diff not rendered.
Binary file removed evaluation/images/LibriTTS/roc_base_emb.png
Diff not rendered.
Binary file removed evaluation/images/LibriTTS/roc_base_emb1.png
Diff not rendered.
Binary file added evaluation/images/LibriTTS/roc_encoder.png
Binary file removed evaluation/images/LibriTTS/roc_meta_emb.png
Diff not rendered.
Binary file removed evaluation/images/LibriTTS/roc_meta_emb1.png
Diff not rendered.
Binary file removed evaluation/images/LibriTTS/tsne.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/MOS_base_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/MOS_base_emb1.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/MOS_meta_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/MOS_meta_emb1.png
Diff not rendered.
Binary file added evaluation/images/VCTK/auc_encoder.png
Binary file removed evaluation/images/VCTK/det.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/det_base_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/det_base_emb1.png
Diff not rendered.
Binary file added evaluation/images/VCTK/det_encoder.png
Binary file removed evaluation/images/VCTK/det_meta_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/det_meta_emb1.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/eer.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/eer_base_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/eer_base_emb1.png
Diff not rendered.
Binary file added evaluation/images/VCTK/eer_encoder.png
Binary file removed evaluation/images/VCTK/eer_meta_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/eer_meta_emb1.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/errorbar_plot.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/errorbar_plot_base_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/errorbar_plot_base_emb1.png
Diff not rendered.
Binary file added evaluation/images/VCTK/errorbar_plot_encoder.png
Binary file removed evaluation/images/VCTK/errorbar_plot_meta_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/errorbar_plot_meta_emb1.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/roc.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/roc_base_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/roc_base_emb1.png
Diff not rendered.
Binary file added evaluation/images/VCTK/roc_encoder.png
Binary file removed evaluation/images/VCTK/roc_meta_emb.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/roc_meta_emb1.png
Diff not rendered.
Binary file removed evaluation/images/VCTK/tsne.png
Diff not rendered.
Binary file removed evaluation/images/cos_sim.png
Diff not rendered.
Binary file removed evaluation/images/cos_sim_hor.png
Diff not rendered.
Binary file removed evaluation/images/det.png
Diff not rendered.
Binary file removed evaluation/images/eer.png
Diff not rendered.
Binary file removed evaluation/images/errorbar_plot.png
Diff not rendered.
Binary file removed evaluation/images/roc.png
Diff not rendered.
2 changes: 2 additions & 0 deletions evaluation/pair_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def load_dvector(self):
self.dvector_list_dict[mode] = np.load(f'npy/{self.corpus}/{mode}_dvector.npy', allow_pickle=True)
for mode in tqdm(self.mode_list, desc='mode'):
for step in tqdm(self.step_list, leave=False):
if mode in ['scratch_encoder', 'encoder', 'dvec'] and step != 0:
continue
self.dvector_list_dict[f'{mode}_step{step}'] = np.load(
f'npy/{self.corpus}/{mode}_step{step}_dvector.npy', allow_pickle=True
)
Expand Down
Loading

0 comments on commit 8494da5

Please sign in to comment.