From 06f07da2156d0867c28b0493236dad6cdca38a50 Mon Sep 17 00:00:00 2001 From: jsl-models <74001263+jsl-models@users.noreply.github.com> Date: Thu, 24 Aug 2023 14:57:46 +0700 Subject: [PATCH] 2023-08-22-asr_whisper_tiny_opt_xx (#13931) * Add model 2023-08-22-asr_whisper_tiny_opt_xx * Add model 2023-08-22-asr_whisper_tiny_xx * Update 2023-08-22-asr_whisper_tiny_opt_xx.md * Update 2023-08-22-asr_whisper_tiny_xx.md --------- Co-authored-by: DevinTDHa Co-authored-by: Maziyar Panahi --- .../2023-08-22-asr_whisper_tiny_opt_xx.md | 109 ++++++++++++++++++ .../2023-08-22-asr_whisper_tiny_xx.md | 109 ++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 docs/_posts/DevinTDHa/2023-08-22-asr_whisper_tiny_opt_xx.md create mode 100644 docs/_posts/DevinTDHa/2023-08-22-asr_whisper_tiny_xx.md diff --git a/docs/_posts/DevinTDHa/2023-08-22-asr_whisper_tiny_opt_xx.md b/docs/_posts/DevinTDHa/2023-08-22-asr_whisper_tiny_opt_xx.md new file mode 100644 index 00000000000000..ba5dbb88aa4174 --- /dev/null +++ b/docs/_posts/DevinTDHa/2023-08-22-asr_whisper_tiny_opt_xx.md @@ -0,0 +1,109 @@ +--- +layout: model +title: Official whisper-tiny Optimized +author: John Snow Labs +name: asr_whisper_tiny_opt +date: 2023-08-22 +tags: [whisper, en, audio, open_source, asr, onnx, xx] +task: Automatic Speech Recognition +language: xx +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Official pretrained Whisper model, adapted from HuggingFace transformer and curated to provide scalability and production-readiness using Spark NLP. + +This is a multilingual model and supports the following languages: + +Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian, Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/asr_whisper_tiny_opt_xx_5.1.0_3.0_1692721787993.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/asr_whisper_tiny_opt_xx_5.1.0_3.0_1692721787993.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("asr_whisper_tiny_opt", "xx") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +processedAudioFloats = spark.createDataFrame([[rawFloats]]).toDF("audio_content") +result = pipeline.fit(processedAudioFloats).transform(processedAudioFloats) +result.select("text.result").show(truncate = False) +``` +```scala +import spark.implicits._ +import com.johnsnowlabs.nlp.base._ +import com.johnsnowlabs.nlp.annotators._ +import com.johnsnowlabs.nlp.annotators.audio.WhisperForCTC +import org.apache.spark.ml.Pipeline + +val audioAssembler: AudioAssembler = new AudioAssembler() + .setInputCol("audio_content") + .setOutputCol("audio_assembler") + +val speechToText: WhisperForCTC = WhisperForCTC + .pretrained("asr_whisper_tiny_opt", "xx") + .setInputCols("audio_assembler") + .setOutputCol("text") + +val pipeline: Pipeline = new Pipeline().setStages(Array(audioAssembler, speechToText)) + +val bufferedSource = + scala.io.Source.fromFile("src/test/resources/audio/txt/librispeech_asr_0.txt") + +val rawFloats = bufferedSource + .getLines() + .map(_.split(",").head.trim.toFloat) + .toArray +bufferedSource.close + +val processedAudioFloats = Seq(rawFloats).toDF("audio_content") + +val result = pipeline.fit(processedAudioFloats).transform(processedAudioFloats) +result.select("text.result").show(truncate = false) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|asr_whisper_tiny_opt| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|xx| +|Size:|242.7 MB| diff --git a/docs/_posts/DevinTDHa/2023-08-22-asr_whisper_tiny_xx.md b/docs/_posts/DevinTDHa/2023-08-22-asr_whisper_tiny_xx.md new file mode 100644 index 00000000000000..19f629e4c4d7d6 --- /dev/null +++ b/docs/_posts/DevinTDHa/2023-08-22-asr_whisper_tiny_xx.md @@ -0,0 +1,109 @@ +--- +layout: model +title: Official whisper-tiny +author: John Snow Labs +name: asr_whisper_tiny +date: 2023-08-22 +tags: [whisper, en, audio, open_source, asr, xx, tensorflow] +task: Automatic Speech Recognition +language: xx +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Official pretrained Whisper model, adapted from HuggingFace transformer and curated to provide scalability and production-readiness using Spark NLP. + +This is a multilingual model and supports the following languages: + +Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian, Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/asr_whisper_tiny_xx_5.1.0_3.0_1692723111563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/asr_whisper_tiny_xx_5.1.0_3.0_1692723111563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("asr_whisper_tiny", "xx") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +processedAudioFloats = spark.createDataFrame([[rawFloats]]).toDF("audio_content") +result = pipeline.fit(processedAudioFloats).transform(processedAudioFloats) +result.select("text.result").show(truncate = False) +``` +```scala +import spark.implicits._ +import com.johnsnowlabs.nlp.base._ +import com.johnsnowlabs.nlp.annotators._ +import com.johnsnowlabs.nlp.annotators.audio.WhisperForCTC +import org.apache.spark.ml.Pipeline + +val audioAssembler: AudioAssembler = new AudioAssembler() + .setInputCol("audio_content") + .setOutputCol("audio_assembler") + +val speechToText: WhisperForCTC = WhisperForCTC + .pretrained("asr_whisper_tiny", "xx") + .setInputCols("audio_assembler") + .setOutputCol("text") + +val pipeline: Pipeline = new Pipeline().setStages(Array(audioAssembler, speechToText)) + +val bufferedSource = + scala.io.Source.fromFile("src/test/resources/audio/txt/librispeech_asr_0.txt") + +val rawFloats = bufferedSource + .getLines() + .map(_.split(",").head.trim.toFloat) + .toArray +bufferedSource.close + +val processedAudioFloats = Seq(rawFloats).toDF("audio_content") + +val result = pipeline.fit(processedAudioFloats).transform(processedAudioFloats) +result.select("text.result").show(truncate = false) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|asr_whisper_tiny| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|xx| +|Size:|156.6 MB|