Merge remote-tracking branch 'dan/master' into add-fast-beam-search-n…

…best
k2-fsa · Jul 25, 2022 · 9d4d1c8 · 9d4d1c8
2 parents d902087 + 7f67801
commit 9d4d1c8
Show file tree

Hide file tree

Showing 17 changed files with 52 additions and 36 deletions.
diff --git a/.flake8 b/.flake8
@@ -3,10 +3,14 @@ show-source=true
 statistics=true
 max-line-length = 80
 
+per-file-ignores =
+    # F821 undefined name 'StreamingServer'
+    # E203 whitespace before ':'
+    ./sherpa/bin/*/beam_search.py: F821, E203
+
 exclude =
   .git,
   ./cmake,
   ./triton,
   ./sherpa/python/sherpa/__init__.py,
   ./sherpa/python/sherpa/decode.py,
-  ./sherpa/python/bin
diff --git a/.github/workflows/python_style_check.yml b/.github/workflows/python_style_check.yml
@@ -61,4 +61,4 @@ jobs:
         shell: bash
         working-directory: ${{github.workspace}}
         run: |
-          black --check --exclude triton --diff .
+          black --check --diff .
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,5 +1,6 @@
 include LICENSE
 include README.md
 include CMakeLists.txt
+exclude pyproject.toml
 recursive-include sherpa *.*
 recursive-include cmake *.*
diff --git a/README.md b/README.md
@@ -12,6 +12,8 @@
 Try `sherpa` from within your browser without installing anything:
 <https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>
 
+See <https://k2-fsa.github.io/sherpa/huggingface/> for more details.
+
 ## Introduction
 
 An ASR server framework in **Python**, supporting both streaming
@@ -113,7 +115,7 @@ Here, before running the web client, you need to map your server ports to your l
 ssh -R 6006:localhost:6006 -R 6008:localhost:6008 your_local_username@your_local_ip
 ```
 **Note**:
-(1) You only need to do this if the asr server is running on a machine different from the client. 
+(1) You only need to do this if the asr server is running on a machine different from the client.
 (2) The command is run in the terminal on the server machine.
 #### Start the client
 
@@ -332,7 +334,7 @@ If you have a GPU with a larger RAM (e.g., 32 GB), you can get an even **lower**
 
 ### Contributing
 
-Contributions to ```sherpa``` are very welcomed. There are many possible ways to make contributions 
+Contributions to `sherpa` are very welcomed. There are many possible ways to make contributions
 and two of them are:
 - To write documentation
 - To write code:

diff --git a/docs/source/huggingface/index.rst b/docs/source/huggingface/index.rst
@@ -61,3 +61,18 @@ one of them and then click ``Submit for recognition``.
 .. image:: ./pic/hugging-face-sherpa-2.png
    :alt: screenshot of `<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_
    :target: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
+
+YouTube Video
+-------------
+
+We provide the following YouTube video demonstrating how to use
+`<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_.
+
+.. note::
+
+   To get the latest news of `next-gen Kaldi <https://github.com/k2-fsa>`_, please subscribe
+   the following YouTube channel by `Nadira Povey <https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_:
+
+      `<https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_
+
+..  youtube:: ElN3r9dkKE4
diff --git a/docs/source/streaming_asr/conformer/conformer_rnnt_for_Chinese/server.rst b/docs/source/streaming_asr/conformer/conformer_rnnt_for_Chinese/server.rst
@@ -19,7 +19,7 @@ Usage
 .. code-block::
 
    cd /path/to/sherpa
-   ./sherpa/bin/streaming_conformer_rnnt/streaming_server.py --help
+   ./sherpa/bin/streaming_pruned_transducer_statelessX/streaming_server.py --help
 
 shows the usage message.
 
@@ -51,7 +51,7 @@ The following shows you how to start the server with the above pretrained model.
     git lfs install
     git clone https://huggingface.co/luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless5_streaming
 
-    ./sherpa/bin/streaming_conformer_rnnt/streaming_server.py \
+    ./sherpa/bin/streaming_pruned_transducer_statelessX/streaming_server.py \
       --port 6006 \
       --max-batch-size 50 \
       --max-wait-ms 5 \

diff --git a/docs/source/streaming_asr/conformer/conformer_rnnt_for_English/server.rst b/docs/source/streaming_asr/conformer/conformer_rnnt_for_English/server.rst
@@ -19,7 +19,7 @@ Usage
 .. code-block::
 
    cd /path/to/sherpa
-   ./sherpa/bin/streaming_conformer_rnnt/streaming_server.py --help
+   ./sherpa/bin/streaming_pruned_transducer_statelessX/streaming_server.py --help
 
 shows the usage message.
 
@@ -51,7 +51,7 @@ The following shows you how to start the server with the above pretrained model.
     git lfs install
     git clone https://huggingface.co/pkufool/icefall_librispeech_streaming_pruned_transducer_stateless4_20220625
 
-    ./sherpa/bin/streaming_conformer_rnnt/streaming_server.py \
+    ./sherpa/bin/streaming_pruned_transducer_statelessX/streaming_server.py \
       --port 6006 \
       --max-batch-size 50 \
       --max-wait-ms 5 \

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,14 @@
+[tool.isort]
+profile = "black"
+
+[tool.black]
+line-length = 80
+exclude = '''
+/(
+    \.git
+  | \.github
+  | cmake
+  | triton
+  | build
+)/
+'''
diff --git a/sherpa/bin/conv_emformer_transducer_stateless2/beam_search.py b/sherpa/bin/conv_emformer_transducer_stateless2/beam_search.py
@@ -102,7 +102,8 @@ def process(
         )
 
         num_processed_frames = torch.tensor(
-            processed_frames_list, device=device
+            processed_frames_list,
+            device=device,
         )
 
         (
@@ -250,7 +251,8 @@ def process(
         )
 
         num_processed_frames = torch.tensor(
-            processed_frames_list, device=device
+            processed_frames_list,
+            device=device,
         )
 
         (

diff --git a/sherpa/bin/conv_emformer_transducer_stateless2/streaming_server.py b/sherpa/bin/conv_emformer_transducer_stateless2/streaming_server.py
@@ -418,9 +418,7 @@ async def handle_connection_impl(
 
             while len(stream.features) > self.chunk_length_pad:
                 await self.compute_and_decode(stream)
-                await socket.send(
-                    f"{self.beam_search.get_texts(stream)}"
-                )  # noqa
+                await socket.send(f"{self.beam_search.get_texts(stream)}")
 
         stream.input_finished()
         while len(stream.features) > self.chunk_length_pad:
@@ -519,8 +517,6 @@ def main():
 """
 
 if __name__ == "__main__":
-    # fmt:off
     formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
-    # fmt:on
     logging.basicConfig(format=formatter, level=logging.INFO)
     main()
diff --git a/sherpa/bin/pruned_stateless_emformer_rnnt2/beam_search.py b/sherpa/bin/pruned_stateless_emformer_rnnt2/beam_search.py
@@ -109,7 +109,7 @@ def process(
             encoder_out,
             encoder_out_lens,
             next_states,
-        ) = model.encoder_streaming_forward(  # noqa
+        ) = model.encoder_streaming_forward(
             features=features,
             features_length=features_length,
             states=states,
@@ -249,11 +249,7 @@ def process(
             dtype=torch.int64,
         )
 
-        (
-            encoder_out,
-            _,
-            next_states,
-        ) = model.encoder_streaming_forward(  # noqa
+        (encoder_out, _, next_states,) = model.encoder_streaming_forward(
             features=features,
             features_length=features_length,
             states=states,
@@ -284,7 +280,7 @@ def get_texts(self, stream: Stream):
           stream:
             Stream to be processed.
         """
-        hyp = stream.hyp[self.beam_search_params["context_size"] :]  # noqa
+        hyp = stream.hyp[self.beam_search_params["context_size"] :]
         return self.sp.decode(hyp)
 
 

diff --git a/sherpa/bin/pruned_stateless_emformer_rnnt2/streaming_client.py b/sherpa/bin/pruned_stateless_emformer_rnnt2/streaming_client.py
@@ -135,8 +135,6 @@ async def main():
 
 
 if __name__ == "__main__":
-    # fmt: off
     formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
-    # fmt: on
     logging.basicConfig(format=formatter, level=logging.INFO)
     asyncio.run(main())
diff --git a/sherpa/bin/pruned_stateless_emformer_rnnt2/streaming_server.py b/sherpa/bin/pruned_stateless_emformer_rnnt2/streaming_server.py
@@ -527,8 +527,6 @@ def main():
 """
 
 if __name__ == "__main__":
-    # fmt:off
     formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
-    # fmt:on
     logging.basicConfig(format=formatter, level=logging.INFO)
     main()
diff --git a/sherpa/bin/pruned_transducer_statelessX/offline_asr.py b/sherpa/bin/pruned_transducer_statelessX/offline_asr.py
@@ -192,12 +192,10 @@ def read_sound_files(
     ans = []
     for f in filenames:
         wave, sample_rate = torchaudio.load(f)
-        # fmt: off
         assert sample_rate == expected_sample_rate, (
             f"expected sample rate: {expected_sample_rate}. "
             f"Given: {sample_rate}"
         )
-        # fmt: on
         # We use only the first channel
         ans.append(wave[0])
     return ans
@@ -421,9 +419,7 @@ def main():
 
 if __name__ == "__main__":
     torch.manual_seed(20220609)
-    # fmt: off
     formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
-    # fmt: on
     logging.basicConfig(format=formatter, level=logging.INFO)
 
     main()
diff --git a/sherpa/bin/pruned_transducer_statelessX/offline_client.py b/sherpa/bin/pruned_transducer_statelessX/offline_client.py
@@ -126,8 +126,6 @@ async def main():
 
 
 if __name__ == "__main__":
-    # fmt: off
     formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
-    # fmt: on
     logging.basicConfig(format=formatter, level=logging.INFO)
     asyncio.run(main())
diff --git a/sherpa/bin/pruned_transducer_statelessX/offline_server.py b/sherpa/bin/pruned_transducer_statelessX/offline_server.py
@@ -632,9 +632,7 @@ def main():
 
 if __name__ == "__main__":
     torch.manual_seed(20220519)
-    # fmt:off
     formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
-    # fmt:on
     logging.basicConfig(format=formatter, level=logging.INFO)
 
     main()
diff --git a/sherpa/bin/streaming_pruned_transducer_statelessX/streaming_server.py b/sherpa/bin/streaming_pruned_transducer_statelessX/streaming_server.py
@@ -568,8 +568,6 @@ def main():
 """
 
 if __name__ == "__main__":
-    # fmt:off
     formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
-    # fmt:on
     logging.basicConfig(format=formatter, level=logging.INFO)
     main()