Skip to content

Commit 594e098

Browse files
Merge pull request #131 from hyperion-ml/persephone-asr-refactor
Persephone asr refactor
2 parents 2cf461d + 16b1eae commit 594e098

File tree

499 files changed

+15221
-4582
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

499 files changed

+15221
-4582
lines changed

egs/librispeech/v1/conf/infer.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
beam_width: 5
2+
decoding_method: time_sync_beam_search
3+
#decoding_method: greedy
4+
#decoding_method: align_length_sync_beam_search
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
data:
2+
train:
3+
dataset:
4+
wav_scale: 1
5+
aug_cfgs:
6+
- conf/reverb_noise_aug.yaml
7+
return_segment_info:
8+
- text
9+
sampler:
10+
sampler_type: bucketing_seg_sampler
11+
max_batch_length: 70.
12+
min_batch_size: 1
13+
drop_last: false
14+
data_loader:
15+
num_workers: 4
16+
val:
17+
dataset:
18+
aug_cfgs:
19+
- conf/reverb_noise_aug.yaml
20+
wav_scale: 1
21+
return_segment_info:
22+
- text
23+
sampler:
24+
sampler_type: bucketing_seg_sampler
25+
max_batch_length: 70.
26+
min_batch_size: 1
27+
drop_last: true
28+
data_loader:
29+
num_workers: 4
30+
model:
31+
hf_feats:
32+
pretrained_model_path: facebook/wav2vec2-base-960h
33+
transducer:
34+
decoder:
35+
rnnt_loss: k2_pruned
36+
predictor:
37+
embed_dim: 1024
38+
num_layers: 2
39+
hid_feats: 512
40+
embed_dropout_rate: 0.4
41+
rnn_dropout_rate: 0.4
42+
rnn_type: lstm
43+
joiner:
44+
hid_feats: 512
45+
feat_fusion_method: weighted-avg
46+
feat_fusion_start: 2
47+
trainer:
48+
optim:
49+
opt_type: sgd
50+
lr: 0.003
51+
momentum: 0.9
52+
weight_decay: 4e-4
53+
lrsched:
54+
lrsch_type: exp_lr
55+
decay_rate: 0.5
56+
decay_steps: 4200
57+
hold_steps: 1500
58+
min_lr: 4e-5
59+
warmup_steps: 1500
60+
update_lr_on_opt_step: true
61+
grad_clip: 100
62+
use_amp: true
63+
log_interval: 1000
64+
epochs: 120
65+
# eff_batch_size: 1024
66+
eff_batch_size: 128
67+
train_mode: hf-feats-frozen-nograd
68+
69+
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
data:
2+
train:
3+
dataset:
4+
wav_scale: 1
5+
aug_cfgs:
6+
- conf/reverb_noise_aug.yaml
7+
return_segment_info:
8+
- text
9+
sampler:
10+
sampler_type: bucketing_seg_sampler
11+
max_batch_length: 70.
12+
min_batch_size: 1
13+
drop_last: false
14+
data_loader:
15+
num_workers: 4
16+
val:
17+
dataset:
18+
aug_cfgs:
19+
- conf/reverb_noise_aug.yaml
20+
wav_scale: 1
21+
return_segment_info:
22+
- text
23+
sampler:
24+
sampler_type: bucketing_seg_sampler
25+
max_batch_length: 70.
26+
min_batch_size: 1
27+
drop_last: true
28+
data_loader:
29+
num_workers: 4
30+
model:
31+
hf_feats:
32+
pretrained_model_path: facebook/wav2vec2-base-960h
33+
transducer:
34+
decoder:
35+
rnnt_loss: k2_pruned
36+
predictor:
37+
embed_dim: 1024
38+
num_layers: 2
39+
hid_feats: 512
40+
embed_dropout_rate: 0.4
41+
rnn_dropout_rate: 0.4
42+
rnn_type: lstm
43+
joiner:
44+
hid_feats: 512
45+
feat_fusion_method: weighted-avg
46+
feat_fusion_start: 2
47+
trainer:
48+
optim:
49+
opt_type: sgd
50+
lr: 0.005
51+
momentum: 0.9
52+
weight_decay: 4e-4
53+
lrsched:
54+
lrsch_type: exp_lr
55+
decay_rate: 0.5
56+
decay_steps: 4200
57+
hold_steps: 1500
58+
min_lr: 4e-5
59+
warmup_steps: 1500
60+
update_lr_on_opt_step: true
61+
grad_clip: 100
62+
use_amp: true
63+
log_interval: 1000
64+
epochs: 120
65+
# eff_batch_size: 1024
66+
eff_batch_size: 128
67+
train_mode: hf-feats-frozen-nograd
68+
69+
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
data:
2+
train:
3+
dataset:
4+
wav_scale: 1
5+
aug_cfgs:
6+
- conf/reverb_noise_aug.yaml
7+
return_segment_info:
8+
- text
9+
sampler:
10+
sampler_type: bucketing_seg_sampler
11+
max_batch_length: 70.
12+
min_batch_size: 1
13+
drop_last: false
14+
data_loader:
15+
num_workers: 4
16+
val:
17+
dataset:
18+
aug_cfgs:
19+
- conf/reverb_noise_aug.yaml
20+
wav_scale: 1
21+
return_segment_info:
22+
- text
23+
sampler:
24+
sampler_type: bucketing_seg_sampler
25+
max_batch_length: 70.
26+
min_batch_size: 1
27+
drop_last: true
28+
data_loader:
29+
num_workers: 4
30+
model:
31+
hf_feats:
32+
pretrained_model_path: facebook/wav2vec2-base-960h
33+
transducer:
34+
decoder:
35+
rnnt_loss: k2_pruned
36+
simple_loss_scale: 0.2
37+
predictor:
38+
embed_dim: 1024
39+
num_layers: 2
40+
hid_feats: 512
41+
embed_dropout_rate: 0.4
42+
rnn_dropout_rate: 0.4
43+
rnn_type: lstm
44+
joiner:
45+
hid_feats: 512
46+
feat_fusion_method: weighted-avg
47+
feat_fusion_start: 2
48+
trainer:
49+
optim:
50+
opt_type: sgd
51+
lr: 0.005
52+
momentum: 0.9
53+
weight_decay: 4e-4
54+
lrsched:
55+
lrsch_type: exp_lr
56+
decay_rate: 0.5
57+
decay_steps: 4200
58+
hold_steps: 1500
59+
min_lr: 4e-5
60+
warmup_steps: 1500
61+
update_lr_on_opt_step: true
62+
grad_clip: 100
63+
use_amp: true
64+
log_interval: 1000
65+
epochs: 120
66+
# eff_batch_size: 1024
67+
eff_batch_size: 128
68+
train_mode: hf-feats-frozen-nograd
69+
70+
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
data:
2+
train:
3+
dataset:
4+
wav_scale: 1
5+
aug_cfgs:
6+
- conf/reverb_noise_aug.yaml
7+
return_segment_info:
8+
- text
9+
sampler:
10+
sampler_type: bucketing_seg_sampler
11+
max_batch_length: 70.
12+
min_batch_size: 1
13+
drop_last: false
14+
data_loader:
15+
num_workers: 4
16+
val:
17+
dataset:
18+
aug_cfgs:
19+
- conf/reverb_noise_aug.yaml
20+
wav_scale: 1
21+
return_segment_info:
22+
- text
23+
sampler:
24+
sampler_type: bucketing_seg_sampler
25+
max_batch_length: 70.
26+
min_batch_size: 1
27+
drop_last: true
28+
data_loader:
29+
num_workers: 4
30+
model:
31+
hf_feats:
32+
pretrained_model_path: facebook/wav2vec2-base-960h
33+
transducer:
34+
decoder:
35+
rnnt_loss: k2
36+
predictor:
37+
embed_dim: 1024
38+
num_layers: 2
39+
hid_feats: 512
40+
embed_dropout_rate: 0.4
41+
rnn_dropout_rate: 0.4
42+
rnn_type: lstm
43+
joiner:
44+
hid_feats: 512
45+
feat_fusion_method: weighted-avg
46+
feat_fusion_start: 2
47+
trainer:
48+
optim:
49+
opt_type: sgd
50+
lr: 0.003
51+
momentum: 0.9
52+
weight_decay: 4e-4
53+
lrsched:
54+
lrsch_type: exp_lr
55+
decay_rate: 0.5
56+
decay_steps: 4200
57+
hold_steps: 1500
58+
min_lr: 4e-5
59+
warmup_steps: 1500
60+
update_lr_on_opt_step: true
61+
grad_clip: 100
62+
use_amp: true
63+
log_interval: 1000
64+
epochs: 120
65+
# eff_batch_size: 1024
66+
eff_batch_size: 128
67+
train_mode: hf-feats-frozen-nograd
68+
69+
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
data:
2+
train:
3+
dataset:
4+
wav_scale: 1
5+
aug_cfgs:
6+
- conf/reverb_noise_aug.yaml
7+
return_segment_info:
8+
- text
9+
sampler:
10+
sampler_type: 'bucketing_seg_sampler'
11+
max_batch_length: 75.
12+
min_batch_size: 1
13+
drop_last: false
14+
data_loader:
15+
num_workers: 4
16+
val:
17+
dataset:
18+
aug_cfgs:
19+
- conf/reverb_noise_aug.yaml
20+
wav_scale: 1
21+
return_segment_info:
22+
- text
23+
sampler:
24+
sampler_type: 'bucketing_seg_sampler'
25+
max_batch_length: 75.
26+
min_batch_size: 1
27+
drop_last: true
28+
data_loader:
29+
num_workers: 4
30+
model: wav2vec2base_transducer_do0.4.yaml
31+
trainer:
32+
optim:
33+
opt_type: sgd
34+
lr: 0.003
35+
momentum: 0.9
36+
weight_decay: 4e-4
37+
lrsched:
38+
lrsch_type: exp_lr
39+
decay_rate: 0.5
40+
decay_steps: 42000
41+
hold_steps: 15000
42+
min_lr: 4e-5
43+
warmup_steps: 15000
44+
update_lr_on_opt_step: true
45+
grad_clip: 100
46+
use_amp: true
47+
log_interval: 1000
48+
epochs: 1200
49+
# eff_batch_size: 1024
50+
eff_batch_size: 128
51+
train_mode: hf-feats-frozen-nograd
52+
53+

0 commit comments

Comments
 (0)