Skip to content

Commit

Permalink
[examples] fix aishell/paraformer and make it runnable (#2293)
Browse files Browse the repository at this point in the history
* [examples] fix aishell/paraformer and make it runnable

* [examples] fix aishell/paraformer and make it runnable

* [examples] fix aishell/paraformer and make it runnable

* [examples] try to pass unit test

* [examples] try to pass unit test

* [examples] try to pass ut
  • Loading branch information
xingchensong authored Jan 11, 2024
1 parent 86605c3 commit c0f4194
Show file tree
Hide file tree
Showing 11 changed files with 321,476 additions and 39 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ jobs:
id: get_pr_tip
- name: Ensure no tabs
run: |
(! git grep -I -l $'\t' -- . ':(exclude)*.svg' ':(exclude)**Makefile' ':(exclude)**/contrib/**' ':(exclude)third_party' ':(exclude).gitattributes' ':(exclude).gitmodules' || (echo "The above files have tabs; please convert them to spaces"; false))
(! git grep -I -l $'\t' -- . ':(exclude)*.txt' ':(exclude)*.svg' ':(exclude)**Makefile' ':(exclude)**/contrib/**' ':(exclude)third_party' ':(exclude).gitattributes' ':(exclude).gitmodules' || (echo "The above files have tabs; please convert them to spaces"; false))
- name: Ensure no trailing whitespace
run: |
(! git grep -I -n $' $' -- . ':(exclude)third_party' ':(exclude).gitattributes' ':(exclude).gitmodules' || (echo "The above files have trailing whitespace; please remove them"; false))
(! git grep -I -n $' $' -- . ':(exclude)*.txt' ':(exclude)third_party' ':(exclude).gitattributes' ':(exclude).gitmodules' || (echo "The above files have trailing whitespace; please remove them"; false))
flake8-py3:
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ repos:
rev: v4.5.0
hooks:
- id: trailing-whitespace
exclude: 'test/resources/.*'
- repo: https://github.com/pre-commit/mirrors-yapf
rev: 'v0.32.0'
hooks:
Expand Down
6 changes: 2 additions & 4 deletions examples/aishell/paraformer/conf/train_paraformer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ encoder_conf:
pos_enc_layer_type: abs_pos_paraformer
positional_dropout_rate: 0.1
sanm_shfit: 0
gradient_checkpointing: true

decoder: sanm_decoder
decoder_conf:
Expand All @@ -27,6 +28,7 @@ decoder_conf:
sanm_shfit: 0
self_attention_dropout_rate: 0.1
src_attention_dropout_rate: 0.1
gradient_checkpointing: true

tokenizer: paraformer
tokenizer_conf:
Expand All @@ -52,8 +54,6 @@ model_conf:
ctc_weight: 0.3
length_normalized_loss: false
lsm_weight: 0.1
predictor_bias: 1
predictor_weight: 1.0
sampling_ratio: 0.75

predictor: paraformer_predictor
Expand All @@ -68,8 +68,6 @@ predictor_conf:
tail_threshold: 0.45
threshold: 1.0
upsample_times: 3
upsample_type: cnn_blstm
use_cif1_cnn: false

dataset: asr
dataset_conf:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ encoder_conf:
positional_dropout_rate: 0.1
sanm_shfit: 0
use_dynamic_chunk: true
gradient_checkpointing: true

decoder: sanm_decoder
decoder_conf:
Expand All @@ -28,6 +29,7 @@ decoder_conf:
sanm_shfit: 0
self_attention_dropout_rate: 0.1
src_attention_dropout_rate: 0.1
gradient_checkpointing: true

tokenizer: paraformer
tokenizer_conf:
Expand All @@ -53,8 +55,6 @@ model_conf:
ctc_weight: 0.3
length_normalized_loss: false
lsm_weight: 0.1
predictor_bias: 1
predictor_weight: 1.0
sampling_ratio: 0.75

predictor: paraformer_predictor
Expand All @@ -69,8 +69,6 @@ predictor_conf:
tail_threshold: 0.45
threshold: 1.0
upsample_times: 3
upsample_type: cnn_blstm
use_cif1_cnn: false

dataset: asr
dataset_conf:
Expand Down
9 changes: 8 additions & 1 deletion examples/aishell/paraformer/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ decode_modes="ctc_greedy_search ctc_prefix_beam_search paraformer_greedy_search"

train_engine=torch_ddp

# model+optimizer or model_only, model+optimizer is more time-efficient but
# consumes more space, while model_only is the opposite
deepspeed_config=../whisper/conf/ds_stage1.json
deepspeed_save_states="model+optimizer"

. tools/parse_options.sh || exit 1;

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
Expand Down Expand Up @@ -97,7 +102,9 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--ddp.dist_backend $dist_backend \
--num_workers ${num_workers} \
--prefetch ${prefetch} \
--pin_memory
--pin_memory \
--deepspeed_config ${deepspeed_config} \
--deepspeed.save_states ${deepspeed_save_states}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
Expand Down
Loading

0 comments on commit c0f4194

Please sign in to comment.