Skip to content

Commit

Permalink
separate identification and verification; fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
shaojinding committed Aug 31, 2020
1 parent 7051c3e commit 86c2412
Show file tree
Hide file tree
Showing 20 changed files with 473 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ models/__pycache__/**
__pycache__/**
config/__pycache__/**
data_objects/__pycache__/**
logs_scratch/**
logs_scratch/**
4 changes: 1 addition & 3 deletions config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,11 @@
# DATASET related params
_C.DATASET = CN()
_C.DATASET.DATA_DIR = ''
_C.DATASET.DATASET = ''
_C.DATASET.SUB_DIR = ''
_C.DATASET.TEST_DATA_DIR = ''
_C.DATASET.TEST_DATASET = ''
_C.DATASET.NUM_WORKERS = 0
_C.DATASET.PARTIAL_N_FRAMES = 32
_C.DATASET.FEATURE_DIM = 40


# train
Expand All @@ -60,7 +59,6 @@
_C.TRAIN.END_EPOCH = 140



def update_config(cfg, args):
cfg.defrost()
cfg.merge_from_file(args.cfg)
Expand Down
4 changes: 2 additions & 2 deletions data_objects/DeepSpeakerDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ def find_classes(speakers):

class DeepSpeakerDataset(data.Dataset):

def __init__(self, data_dir, partial_n_frames, partition=None, is_test=False):
def __init__(self, data_dir, sub_dir, partial_n_frames, partition=None, is_test=False):
super(DeepSpeakerDataset, self).__init__()
self.data_dir = data_dir
self.root = data_dir.joinpath('feature')
self.root = data_dir.joinpath('feature', sub_dir)
self.partition = partition
self.partial_n_frames = partial_n_frames
self.is_test = is_test
Expand Down
7 changes: 4 additions & 3 deletions data_objects/VoxcelebTestset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def get_test_paths(pairs_path, db_dir):
def convert_folder_name(path):
basename = os.path.splitext(path)[0]
items = basename.split('/')
speaker_dir = 'wav_{}'.format(items[0])
speaker_dir = items[0]
fname = '{}_{}.npy'.format(items[1], items[2])
p = os.path.join(speaker_dir, fname)
return p
Expand Down Expand Up @@ -38,12 +38,13 @@ def convert_folder_name(path):

return path_list


class VoxcelebTestset(data.Dataset):
def __init__(self, data_dir, partial_n_frames):
super(VoxcelebTestset, self).__init__()
self.data_dir = data_dir
self.root = data_dir.joinpath('feature')
self.test_pair_txt_fpath = data_dir.joinpath('veri_test.txt')
self.root = data_dir.joinpath('feature', 'test')
self.test_pair_txt_fpath = data_dir.joinpath('veri_test2.txt')
self.test_pairs = get_test_paths(self.test_pair_txt_fpath, self.root)
self.partial_n_frames = partial_n_frames
mean = np.load(self.data_dir.joinpath('mean.npy'))
Expand Down
4 changes: 1 addition & 3 deletions data_objects/params_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
## Audio
sampling_rate = 16000
# Number of spectrogram frames in a partial utterance
partials_n_frames = 160 # 1600 ms
# Number of spectrogram frames at inference
inference_n_frames = 80 # 800 ms
partials_n_frames = 300 # 3000 ms


## Audio volume normalization
Expand Down
6 changes: 3 additions & 3 deletions data_objects/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _preprocess_speaker_dirs(speaker_dirs, dataset_name, datasets_root, out_dir,
# Function to preprocess utterances for one speaker
def preprocess_speaker(speaker_dir: Path):
# Give a name to the speaker that includes its dataset
speaker_name = "_".join(speaker_dir.relative_to(datasets_root).parts)
speaker_name = speaker_dir.parts[-1]

# Create an output directory with that name, as well as a txt file containing a
# reference to each source file.
Expand Down Expand Up @@ -121,7 +121,7 @@ def preprocess_speaker(speaker_dir: Path):
print("Done preprocessing %s.\n" % dataset_name)


def preprocess_voxceleb1(dataset_root: Path, out_dir: Path, skip_existing=False):
def preprocess_voxceleb1(dataset_root: Path, parition: str, out_dir: Path, skip_existing=False):
# Initialize the preprocessing
dataset_name = "VoxCeleb1"
dataset_root, logger = _init_preprocess_dataset(dataset_name, dataset_root, out_dir)
Expand All @@ -140,7 +140,7 @@ def preprocess_voxceleb1(dataset_root: Path, out_dir: Path, skip_existing=False)
(len(keep_speaker_ids), len(nationalities)))

# Get the speaker directories for anglophone speakers only
speaker_dirs = dataset_root.joinpath("wav").glob("*")
speaker_dirs = dataset_root.joinpath(parition).glob("*")
speaker_dirs = [speaker_dir for speaker_dir in speaker_dirs]

print("VoxCeleb1: found %d anglophone speakers on the disk." %
Expand Down
21 changes: 16 additions & 5 deletions data_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from data_objects.partition_voxceleb import partition_voxceleb
from pathlib import Path
import argparse
import subprocess

if __name__ == "__main__":
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
Expand All @@ -19,15 +20,25 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
args = parser.parse_args()

# Process the arguments
out_dir = args.dataset_root.joinpath("feature")
dev_out_dir = args.dataset_root.joinpath("feature", "dev")
test_out_dir = args.dataset_root.joinpath("feature", "test")
merged_out_dir = args.dataset_root.joinpath("feature", "merged")
assert args.dataset_root.exists()
assert args.dataset_root.joinpath('iden_split.txt').exists()
assert args.dataset_root.joinpath('veri_test.txt').exists()
assert args.dataset_root.joinpath('vox1_meta.csv').exists()
out_dir.mkdir(exist_ok=True, parents=True)
dev_out_dir.mkdir(exist_ok=True, parents=True)
test_out_dir.mkdir(exist_ok=True, parents=True)
merged_out_dir.mkdir(exist_ok=True, parents=True)

# Preprocess the datasets
preprocess_voxceleb1(args.dataset_root, out_dir, args.skip_existing)
compute_mean_std(out_dir, args.dataset_root.joinpath('mean.npy'), args.dataset_root.joinpath('std.npy'))
partition_voxceleb(out_dir, args.dataset_root.joinpath('iden_split.txt'))
preprocess_voxceleb1(args.dataset_root, 'dev', dev_out_dir, args.skip_existing)
preprocess_voxceleb1(args.dataset_root, 'test', test_out_dir, args.skip_existing)
for path in dev_out_dir.iterdir():
subprocess.call(['cp', '-r', path.as_posix(), merged_out_dir.as_posix()])
for path in test_out_dir.iterdir():
subprocess.call(['cp', '-r', path.as_posix(), merged_out_dir.as_posix()])
compute_mean_std(merged_out_dir, args.dataset_root.joinpath('mean.npy'),
args.dataset_root.joinpath('std.npy'))
partition_voxceleb(merged_out_dir, args.dataset_root.joinpath('iden_split.txt'))
print("Done")
10 changes: 0 additions & 10 deletions evaluate_verification.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
# -*- coding: utf-8 -*-
# @Date : 2019-08-09
# @Author : Xinyu Gong (xy_gong@tamu.edu)
# @Link : None
# @Version : 0.0


from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
Expand Down Expand Up @@ -41,7 +34,6 @@ def parse_args():
parser.add_argument('--load_path',
help="The path to resumed dir",
default=None)

parser.add_argument('--text_arch',
help="The path to arch",
default=None)
Expand Down Expand Up @@ -78,8 +70,6 @@ def main():
model.drop_path_prob = 0.0
else:
model = eval('resnet.{}(num_classes={})'.format(cfg.MODEL.NAME, cfg.MODEL.NUM_CLASSES))
# model = Network(cfg.MODEL.NAME, cfg.MODEL.EMBEDDING_DIM, cfg.MODEL.NUM_CLASSES)
# model = DeepSpeakerModel(cfg.MODEL.EMBEDDING_DIM, cfg.MODEL.NUM_CLASSES)
model = model.cuda()

# resume && make log dir and logger
Expand Down
28 changes: 28 additions & 0 deletions exps/baseline/resnet18_iden.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
PRINT_FREQ: 200
VAL_FREQ: 10

CUDNN:
BENCHMARK: true
DETERMINISTIC: false
ENABLED: true

DATASET:
DATA_DIR: '/path/to/VoxCeleb1'
SUB_DIR: 'merged'
NUM_WORKERS: 0
PARTIAL_N_FRAMES: 300

TRAIN:
BATCH_SIZE: 256
LR: 0.01
LR_MIN: 0.001
BETA1: 0.9
BETA2: 0.999

BEGIN_EPOCH: 0
END_EPOCH: 301

MODEL:
NAME: 'resnet18'
NUM_CLASSES: 1251
INIT_CHANNELS: 64
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ CUDNN:
ENABLED: true

DATASET:
DATA_DIR: '/path/to/VoxCeleb'
DATA_DIR: '/path/to/VoxCeleb1'
SUB_DIR: 'dev'
NUM_WORKERS: 0
PARTIAL_N_FRAMES: 300

TRAIN:
BATCH_SIZE: 256
LR: 0.01
LR_MIN: 0.001
WD: 0.0003
BETA1: 0.9
BETA2: 0.999

Expand Down
28 changes: 28 additions & 0 deletions exps/baseline/resnet34_iden.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
PRINT_FREQ: 200
VAL_FREQ: 10

CUDNN:
BENCHMARK: true
DETERMINISTIC: false
ENABLED: true

DATASET:
DATA_DIR: '/path/to/VoxCeleb1'
SUB_DIR: 'merged'
NUM_WORKERS: 0
PARTIAL_N_FRAMES: 300

TRAIN:
BATCH_SIZE: 128
LR: 0.01
LR_MIN: 0.001
BETA1: 0.9
BETA2: 0.999

BEGIN_EPOCH: 0
END_EPOCH: 301

MODEL:
NAME: 'resnet34'
NUM_CLASSES: 1251
INIT_CHANNELS: 64
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ CUDNN:
ENABLED: true

DATASET:
DATA_DIR: '/path/to/VoxCeleb'
DATA_DIR: '/path/to/VoxCeleb1'
SUB_DIR: 'dev'
NUM_WORKERS: 0
PARTIAL_N_FRAMES: 300

TRAIN:
BATCH_SIZE: 256
BATCH_SIZE: 128
LR: 0.01
LR_MIN: 0.001
WD: 0.0003
BETA1: 0.9
BETA2: 0.999

Expand Down
7 changes: 3 additions & 4 deletions exps/scratch/scratch.yaml → exps/scratch/scratch_iden.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,15 @@ CUDNN:
ENABLED: true

DATASET:
DATA_DIR: '/path/to/VoxCeleb'
DATASET: 'train'
DATA_DIR: '/path/to/VoxCeleb1'
SUB_DIR: 'merged'
NUM_WORKERS: 0
PARTIAL_N_FRAMES: 300

TRAIN:
BATCH_SIZE: 48
BATCH_SIZE: 96
LR: 0.01
LR_MIN: 0.001
WD: 0.0003
BETA1: 0.9
BETA2: 0.999

Expand Down
29 changes: 29 additions & 0 deletions exps/scratch/scratch_veri.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
PRINT_FREQ: 200
VAL_FREQ: 10

CUDNN:
BENCHMARK: true
DETERMINISTIC: false
ENABLED: true

DATASET:
DATA_DIR: '/path/to/VoxCeleb1'
SUB_DIR: 'dev'
NUM_WORKERS: 0
PARTIAL_N_FRAMES: 300

TRAIN:
BATCH_SIZE: 48
LR: 0.01
LR_MIN: 0.001
BETA1: 0.9
BETA2: 0.999

BEGIN_EPOCH: 0
END_EPOCH: 301

MODEL:
NAME: 'model'
NUM_CLASSES: 1211
LAYERS: 8
INIT_CHANNELS: 64
4 changes: 2 additions & 2 deletions functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ def validate_identification(cfg, model, test_loader, criterion):
target = target.cuda(non_blocking=True)

# compute output
outputs = model(input)
output = torch.mean(outputs, dim=0, keepdim=True)
output = model(input)
output = torch.mean(output, dim=0, keepdim=True)
output = model.forward_classifier(output)
acc1, acc5 = accuracy(output, target, topk=(1, 5))
top1.update(acc1[0], input.size(0))
Expand Down
3 changes: 1 addition & 2 deletions search.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ def main():
# Optimizer
optimizer = optim.Adam(
weight_params,
lr=cfg.TRAIN.LR,
weight_decay=cfg.TRAIN.WD,
lr=cfg.TRAIN.LR
)

# resume && make log dir and logger
Expand Down
7 changes: 3 additions & 4 deletions train_baseline.py → train_baseline_identification.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ def main():
model = model.cuda()
optimizer = optim.Adam(
model.net_parameters() if hasattr(model, 'net_parameters') else model.parameters(),
lr=cfg.TRAIN.LR,
weight_decay=cfg.TRAIN.WD,
lr=cfg.TRAIN.LR
)

# Loss
Expand Down Expand Up @@ -100,9 +99,9 @@ def main():

# dataloader
train_dataset = DeepSpeakerDataset(
Path(cfg.DATASET.DATA_DIR), cfg.DATASET.PARTIAL_N_FRAMES, 'train')
Path(cfg.DATASET.DATA_DIR), cfg.DATASET.SUB_DIR, cfg.DATASET.PARTIAL_N_FRAMES, 'train')
test_dataset_identification = DeepSpeakerDataset(
Path(cfg.DATASET.DATA_DIR), cfg.DATASET.PARTIAL_N_FRAMES, 'test', is_test=True)
Path(cfg.DATASET.DATA_DIR), cfg.DATASET.SUB_DIR, cfg.DATASET.PARTIAL_N_FRAMES, 'test', is_test=True)
train_loader = torch.utils.data.DataLoader(
dataset=train_dataset,
batch_size=cfg.TRAIN.BATCH_SIZE,
Expand Down
Loading

0 comments on commit 86c2412

Please sign in to comment.