Skip to content

Commit 63c5d0e

Browse files
Merge pull request #143 from hyperion-ml/persephone-refactor
Persephone refactor
2 parents 6833401 + 0a1d2b2 commit 63c5d0e

21 files changed

+1262
-82
lines changed

egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml

Lines changed: 0 additions & 78 deletions
This file was deleted.
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
#!/bin/bash
2+
# Copyright
3+
# 2020 Johns Hopkins University (Author: Jesus Villalba)
4+
# Apache 2.0.
5+
#
6+
. ./cmd.sh
7+
. ./path.sh
8+
set -e
9+
10+
stage=2
11+
nnet_stage=1
12+
config_file=default_config.sh
13+
use_gpu=false
14+
do_tsne=false
15+
split_dev=false
16+
hf_chunk_length=120 #seconds
17+
xvec_chunk_length=120 #seconds
18+
. parse_options.sh || exit 1;
19+
. $config_file
20+
21+
if [ "$use_gpu" == "true" ];then
22+
xvec_args="--use-gpu true --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length"
23+
xvec_cmd="$cuda_eval_cmd --mem 6G"
24+
else
25+
xvec_cmd="$train_cmd --mem 12G"
26+
fi
27+
if [ $nnet_stages -lt $nnet_stage ];then
28+
nnet_stage=$nnet_stages
29+
fi
30+
31+
if [ $nnet_stage -eq 1 ];then
32+
nnet=$nnet_s1
33+
nnet_name=$nnet_s1_name
34+
elif [ $nnet_stage -eq 2 ];then
35+
nnet=$nnet_s2
36+
nnet_name=$nnet_s2_name
37+
elif [ $nnet_stage -eq 3 ];then
38+
nnet=$nnet_s3
39+
nnet_name=$nnet_s3_name
40+
elif [ $nnet_stage -eq 4 ];then
41+
nnet=$nnet_s4
42+
nnet_name=$nnet_s4_name
43+
elif [ $nnet_stage -eq 5 ];then
44+
nnet=$nnet_s5
45+
nnet_name=$nnet_s5_name
46+
elif [ $nnet_stage -eq 6 ];then
47+
nnet=$nnet_s6
48+
nnet_name=$nnet_s6_name
49+
fi
50+
51+
xvector_dir=exp/xvectors/$nnet_name
52+
53+
# if [ $stage -le 1 ]; then
54+
# # Extract xvectors for training
55+
# for name in lre17_proc_audio_no_sil \
56+
# voxlingua107_codecs_proc_audio_no_sil \
57+
# babel_sre_proc_audio_no_sil \
58+
# cv_codecs_proc_audio_no_sil \
59+
# others_afr_proc_audio_no_sil
60+
# do
61+
# steps_xvec/extract_wav2vec2xvectors.sh \
62+
# --cmd "$xvec_cmd" --nj 100 ${xvec_args} \
63+
# --use-bin-vad false \
64+
# --random-utt-length true --min-utt-length 3 --max-utt-length 30 \
65+
# $nnet data/${name} \
66+
# $xvector_dir/${name}
67+
# done
68+
# fi
69+
70+
if [ $stage -le 2 ]; then
71+
# Extract xvectors for training
72+
for name in lre22_dev
73+
do
74+
steps_xvec/extract_wav2vec2xvectors.sh \
75+
--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
76+
--use-bin-vad true --num-augs 10 --aug-config conf/reverb_noise_aug.yaml \
77+
--random-utt-length true --min-utt-length 3 --max-utt-length 30 \
78+
$nnet data/${name} \
79+
$xvector_dir/${name}_aug \
80+
data/${name}_aug
81+
done
82+
fi
83+
84+
85+
if [ $stage -le 3 ]; then
86+
# Extracts x-vectors for dev and eval
87+
for name in lre22_dev lre22_eval
88+
do
89+
num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
90+
nj=$(($num_spk < 100 ? $num_spk:100))
91+
steps_xvec/extract_wav2vec2xvectors.sh \
92+
--cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
93+
$nnet data/$name \
94+
$xvector_dir/$name
95+
done
96+
fi
97+
98+
99+
if [ $stage -le 4 ]; then
100+
for name in lre22_dev
101+
do
102+
if [ "$do_tsne" == "true" ] || [ "$split_dev" == "true" ];then
103+
$train_cmd \
104+
$xvector_dir/$name/tsne/tsne.log \
105+
hyp_utils/conda_env.sh \
106+
plot_embedding_tsne.py \
107+
--train-list data/$name/utt2lang \
108+
--train-v-file scp:$xvector_dir/$name/xvector.scp \
109+
--output-dir $xvector_dir/$name/tsne \
110+
--pca-var-r 0.975 \
111+
--lnorm \
112+
--prob-plot 1. \
113+
--tsne.metric cosine \
114+
--tsne.early-exaggeration 12 --tsne.perplexity 30
115+
116+
$train_cmd \
117+
$xvector_dir/$name/tsne_per_class/tsne.log \
118+
hyp_utils/conda_env.sh \
119+
plot_embedding_tsne_per_class.py \
120+
--train-list data/$name/utt2lang \
121+
--train-v-file scp:$xvector_dir/$name/xvector.scp \
122+
--output-dir $xvector_dir/$name/tsne_per_class \
123+
--pca-var-r 0.975 \
124+
--lnorm \
125+
--prob-plot 1. \
126+
--tsne.metric cosine \
127+
--tsne.early-exaggeration 12 --tsne.perplexity 30 \
128+
--do-ahc --cluster-tsne --ahc-thr -5
129+
130+
if [ "$split_dev" == "true" ];then
131+
hyp_utils/conda_env.sh \
132+
local/split_dev.py \
133+
--segs-file $xvector_dir/$name/tsne_per_class/segments.csv \
134+
--output-dir ./resources/dev_splits \
135+
--num-folds 2
136+
137+
# delete the split data dirs so they are regenerated later
138+
rm -rf data/lre22_dev_p{1,2}
139+
140+
fi
141+
fi
142+
done
143+
fi
144+
145+
if [ $stage -le 5 ]; then
146+
if [ ! -d data/lre22_dev_p1 ];then
147+
awk -F "," '$1!="id" { print $1}' \
148+
./resources/dev_splits/fold_0/train_segments.csv \
149+
> p1.lst
150+
awk -F "," '$1!="id" { print $1}' \
151+
./resources/dev_splits/fold_0/test_segments.csv \
152+
> p2.lst
153+
154+
for p in p1 p2
155+
do
156+
utils/subset_data_dir.sh \
157+
--utt-list $p.lst \
158+
data/lre22_dev data/lre22_dev_$p
159+
done
160+
fi
161+
fi
162+
163+
if [ $stage -le 6 ]; then
164+
if [ -d data/lre22_dev_aug ] && [ ! -d data/lre22_dev_aug_p1 ];then
165+
awk -v fsegs=./resources/dev_splits/fold_0/train_segments.csv '
166+
BEGIN{FS=",";
167+
getline;
168+
while(getline < fsegs)
169+
{
170+
segs[$1]
171+
}
172+
FS=" ";
173+
}
174+
{ if($2 in segs){ print $1}}' data/lre22_dev_aug/augm2clean \
175+
> p1.lst
176+
177+
awk -v fsegs=./resources/dev_splits/fold_0/test_segments.csv '
178+
BEGIN{FS=",";
179+
getline;
180+
while(getline < fsegs)
181+
{
182+
segs[$1]=1;
183+
}
184+
FS=" ";
185+
}
186+
{ if($2 in segs){ print $1}}' data/lre22_dev_aug/augm2clean \
187+
> p2.lst
188+
189+
for p in p1 p2
190+
do
191+
utils/subset_data_dir.sh \
192+
--utt-list $p.lst \
193+
data/lre22_dev_aug data/lre22_dev_aug_$p
194+
done
195+
fi
196+
fi
197+
198+
if [ $stage -le 7 ];then
199+
if [ -f $xvector_dir/lre22_dev_aug/xvector.scp ];then
200+
mkdir -p $xvector_dir/lre22_dev_aug_clean
201+
cat $xvector_dir/lre22_dev/xvector.scp \
202+
$xvector_dir/lre22_dev_aug/xvector.scp \
203+
> $xvector_dir/lre22_dev_aug_clean/xvector.scp
204+
205+
for p in "" _p1 _p2
206+
do
207+
if [ ! -d data/lre22_dev_aug_clean$p ]; then
208+
utils/combine_data.sh \
209+
data/lre22_dev_aug_clean$p \
210+
data/lre22_dev$p \
211+
data/lre22_dev_aug$p
212+
fi
213+
done
214+
fi
215+
fi
216+
217+
exit

egs/voxceleb/v1.2/cmd.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ if [ "$(hostname -d)" == "cm.gemini" ];then
1414
#export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
1515
export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
1616
export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
17-
#export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
1817
export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
18+
#export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
1919
export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
2020
# export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
2121
else

0 commit comments

Comments
 (0)