Merge pull request #320 from Maggione/main

mmspeech merge
OFA-Sys · Dec 15, 2022 · 83457cf · 83457cf
2 parents d4fb41b + 345a611
commit 83457cf
Show file tree

Hide file tree

Showing 16 changed files with 1,056 additions and 133 deletions.
diff --git a/README_mmspeech.md b/README_mmspeech.md
@@ -49,6 +49,15 @@ Experiments on AISHELL-1 show that our proposed method achieves state-of-the-art
 
 
 ## Quick start
+### Installation
+
+Note that we update the fairseq version for mmspeech.
+
+```bash
+git clone https://github.com/OFA-Sys/OFA
+pip install -r requirements.txt
+```
+
 ### Data preparation
 
 Input files for all tasks include three columns: "speech_id, wav_path, text", delimited by a "\t". 

diff --git a/fairseq/fairseq/models/wav2vec/utils.py b/fairseq/fairseq/models/wav2vec/utils.py
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import torch.nn.functional as F
+
+
+def pad_to_multiple(x, multiple, dim=-1, value=0):
+    # Inspired from https://github.com/lucidrains/local-attention/blob/master/local_attention/local_attention.py#L41
+    if x is None:
+        return None, 0
+    tsz = x.size(dim)
+    m = tsz / multiple
+    remainder = math.ceil(m) * multiple - tsz
+    if m.is_integer():
+        return x, 0
+    pad_offset = (0,) * (-1 - dim) * 2
+
+    return F.pad(x, (*pad_offset, 0, remainder), value=value), remainder