FlagOpen · upvenly · Jun 7, 2023 · May 16, 2023 · May 16, 2023 · May 18, 2023
diff --git a/...ing/benchmarks/WaveGlow/pytorch/README.md → training/benchmarks/WaveGlow/README.md b/...ing/benchmarks/WaveGlow/pytorch/README.md → training/benchmarks/WaveGlow/README.md
diff --git a/...ing/benchmarks/wav2vec2/pytorch/README.md → training/benchmarks/wav2vec2/README.md b/...ing/benchmarks/wav2vec2/pytorch/README.md → training/benchmarks/wav2vec2/README.md
@@ -8,8 +8,13 @@ https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechRecogni
 
 
 ### 数据集下载地址(global proxy)
-http://www.openslr.org/resources/12
+数据来源 https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechRecognition/wav2vec2#quick-start-guide
 
+执行
+
+DATASET_DIR=[PATH]  bash training/benchmarks/wav2vec2/pytorch/scripts/download_data.sh
+
+DATASET_DIR=[PATH]  bash training/benchmarks/wav2vec2/pytorch/scripts/generate_filelists.sh
 
 ### 框架与芯片支持情况
 |            | Pytorch |

diff --git a/training/benchmarks/wav2vec2/pytorch/config/_base.py b/training/benchmarks/wav2vec2/pytorch/config/_base.py
@@ -20,7 +20,7 @@
 save_frequency = 1
 
 # IO
-output_dir = "results/pretrain_base"
+output_dir = "result/pretrain_base"
 data_dir = None
 train_subset = "train-full-960"
 valid_subset = "dev-other"

diff --git a/training/benchmarks/wav2vec2/pytorch/config/model_config.py b/training/benchmarks/wav2vec2/pytorch/config/model_config.py
@@ -14,7 +14,7 @@
 encoder_attention_heads = 12
 feature_grad_mult = 0.1
 ema = 0.0
-optimizer = "adam"
+optimizer = "fused_adam"
 clip_norm = 25
 weight_decay = 0.01
 lr_policy = "poly"

diff --git a/training/benchmarks/wav2vec2/pytorch/optimizer/__init__.py b/training/benchmarks/wav2vec2/pytorch/optimizer/__init__.py
@@ -1,17 +0,0 @@
-
-from common.fairseq.optim.fused_adam import get_fused_adam_class
-from common.utils import print_once
-
-def create_optimizer(model, args):
-
-    kw = {'lr': args.lr, 'weight_decay': args.weight_decay}
-    if args.optimizer == 'adam' and not (args.fp16 or args.bf16):
-        print_once('WARNING: Using FusedAdam instead of Adam')
-        kw.update({'betas': args.adam_betas, 'eps': args.adam_eps})
-        fused_adam_cls = get_fused_adam_class()
-        print(fused_adam_cls, "fused_adam_cls")
-        optimizer = fused_adam_cls(model.parameters(), **kw)
-    else:
-        raise ValueError(f'Invalid optimizer "{args.optimizer}"')
-
-    return optimizer

diff --git a/training/benchmarks/wav2vec2/pytorch/scripts/download_data.sh b/training/benchmarks/wav2vec2/pytorch/scripts/download_data.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+: ${DATASET_DIR:=/datasets/}
+: ${SUBSETS:="train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other"}
+
+python3 utils/download_librispeech.py $DATASET_DIR --subsets $SUBSETS
diff --git a/training/benchmarks/wav2vec2/pytorch/scripts/generate_filelists.sh b/training/benchmarks/wav2vec2/pytorch/scripts/generate_filelists.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -eu
+
+: ${DATASET_DIR:=/datasets/LibriSpeech}
+: ${FILELISTS_DIR:=$DATASET_DIR}
+: ${EXT:=flac}  # or wav
+
+mkdir -p $DATASET_DIR
+mkdir -p $FILELISTS_DIR
+
+for SUBSET in train-clean-100 train-clean-360 train-other-500 \
+              dev-clean dev-other test-clean test-other \
+; do
+    TSV=$FILELISTS_DIR/$SUBSET.tsv
+
+    if [ ! -d $DATASET_DIR/$SUBSET ]; then
+        echo "ERROR: $DATASET_DIR/$SUBSET does not exist; skipping."
+        continue
+    fi
+
+    python3 utils/generate_filelist.py --extension $EXT $DATASET_DIR/$SUBSET $TSV
+    python3 utils/libri_labels.py $TSV --output-dir $FILELISTS_DIR --output-name $SUBSET
+done
+
+# Combine
+python3 utils/combine_filelists.py $FILELISTS_DIR/train-{clean-100,clean-360,other-500}.tsv > $FILELISTS_DIR/train-full-960.tsv
+
+cat $FILELISTS_DIR/train-clean-100.wrd > $FILELISTS_DIR/train-full-960.wrd
+cat $FILELISTS_DIR/train-clean-360.wrd >> $FILELISTS_DIR/train-full-960.wrd
+cat $FILELISTS_DIR/train-other-500.wrd >> $FILELISTS_DIR/train-full-960.wrd
+
+cat $FILELISTS_DIR/train-clean-100.ltr > $FILELISTS_DIR/train-full-960.ltr
+cat $FILELISTS_DIR/train-clean-360.ltr >> $FILELISTS_DIR/train-full-960.ltr
+cat $FILELISTS_DIR/train-other-500.ltr >> $FILELISTS_DIR/train-full-960.ltr
+
+python3 utils/generate_dictionary.py $FILELISTS_DIR/train-full-960.ltr $FILELISTS_DIR/dict.ltr.txt
diff --git a/training/benchmarks/wav2vec2/pytorch/train/trainer.py b/training/benchmarks/wav2vec2/pytorch/train/trainer.py
@@ -22,7 +22,6 @@
 from model import create_model
 from train.evaluator import Evaluator
 from train.training_state import TrainingState
-from optimizer import create_optimizer
 from loss.criterion import Wav2vecCriterion
 
 
@@ -50,7 +49,7 @@ def __init__(self, driver: Driver, adapter, evaluator: Evaluator,
     def init(self):
         self.model = create_model(self.config)
         self.model = self.init_model(self.model, self.device)
-        self.optimizer = create_optimizer(self.model, self.config)
+        self.optimizer = self.adapter.create_optimizer(self.model, self.config)
         self.optim = self.optimizer
 
         Metrics = W2v2Metrics
@@ -153,7 +152,7 @@ def train_one_epoch(self, config, epoch, step, train_dataloader, sampler):
                 grads_mult_factor = world_size / self.metrics.partials[
                     'sample_size']
 
-                if self.config.optimizer == 'adam' and not (self.config.fp16
+                if self.config.optimizer == 'fused_adam' and not (self.config.fp16
                                                        or self.config.bf16):
                     # adam and non-amp optimizer - can use 'scale' kwarg for step
                     # and defer grad multiplication
@@ -168,7 +167,7 @@ def train_one_epoch(self, config, epoch, step, train_dataloader, sampler):
                         # calculate grad norm, maybe clip
                         grad_norm = self.optim.clip_grad_norm(self.config.clip_norm)
 
-                    if self.config.optimizer == 'adam' and not (self.config.fp16
+                    if self.config.optimizer == 'fused_adam' and not (self.config.fp16
                                                            or self.config.bf16):
                         self.scaler.step(self.optim,
                                          scale=1. / grads_mult_factor)

diff --git a/training/benchmarks/wav2vec2/pytorch/train/trainer_adapter.py b/training/benchmarks/wav2vec2/pytorch/train/trainer_adapter.py
@@ -1,10 +1,11 @@
 import torch.distributed as dist
 import config
 
-from torch import nn, Tensor
-from driver.dist_pytorch import main_proc_print
+from torch import nn
 from torch.nn.parallel import DistributedDataParallel as DDP
-
+
+from common.fairseq.optim.fused_adam import get_fused_adam_class
+from driver.dist_pytorch import main_proc_print
 
 
 def convert_model(model: nn.Module) -> nn.Module:
@@ -27,3 +28,17 @@ def model_to_ddp(model: nn.Module) -> nn.Module:
         from common.fairseq.dist import ModuleProxyWrapper
         model = ModuleProxyWrapper(model)
     return model
+
+
+def create_optimizer(model, args):
+
+    kw = {'lr': args.lr, 'weight_decay': args.weight_decay}
+    if args.optimizer == 'fused_adam' and not (args.fp16 or args.bf16):
+        kw.update({'betas': args.adam_betas, 'eps': args.adam_eps})
+        fused_adam_cls = get_fused_adam_class()
+        print(fused_adam_cls, "fused_adam_cls")
+        optimizer = fused_adam_cls(model.parameters(), **kw)
+    else:
+        raise ValueError(f'Invalid optimizer "{args.optimizer}"')
+
+    return optimizer
diff --git a/training/nvidia/wav2vec2-pytorch/README.md b/training/nvidia/wav2vec2-pytorch/README.md
@@ -1,7 +1,14 @@
 
-### 数据集准备
 
-https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2
+### 数据集下载地址(global proxy)
+数据来源 https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechRecognition/wav2vec2#quick-start-guide
+
+执行
+
+DATASET_DIR=[PATH]  bash training/benchmarks/wav2vec2/pytorch/scripts/download_data.sh
+
+DATASET_DIR=[PATH]  bash training/benchmarks/wav2vec2/pytorch/scripts/generate_filelists.sh
+
 
 ### 运行情况
 | 训练资源 | 配置文件        | 运行时长(s) | 目标精度 | 收敛精度(mAP) | 性能（ntokens/s） |