-
Notifications
You must be signed in to change notification settings - Fork 463
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add spoken language identification for node-addon-api (#872)
- Loading branch information
1 parent
031134b
commit 939fdd9
Showing
13 changed files
with
445 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
nodejs-addon-examples/test_spoken_language_identification.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
|
||
const sherpa_onnx = require('sherpa-onnx-node'); | ||
|
||
function createSpokenLanguageID() { | ||
const config = { | ||
whisper: { | ||
encoder: './sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx', | ||
decoder: './sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx', | ||
}, | ||
debug: true, | ||
numThreads: 1, | ||
provider: 'cpu', | ||
}; | ||
return new sherpa_onnx.SpokenLanguageIdentification(config); | ||
} | ||
|
||
const slid = createSpokenLanguageID(); | ||
|
||
const testWaves = [ | ||
'./spoken-language-identification-test-wavs/ar-arabic.wav', | ||
'./spoken-language-identification-test-wavs/de-german.wav', | ||
'./spoken-language-identification-test-wavs/en-english.wav', | ||
'./spoken-language-identification-test-wavs/fr-french.wav', | ||
'./spoken-language-identification-test-wavs/pt-portuguese.wav', | ||
'./spoken-language-identification-test-wavs/es-spanish.wav', | ||
'./spoken-language-identification-test-wavs/zh-chinese.wav', | ||
]; | ||
|
||
const display = new Intl.DisplayNames(['en'], {type: 'language'}) | ||
|
||
for (let f of testWaves) { | ||
const stream = slid.createStream(); | ||
|
||
const wave = sherpa_onnx.readWave(f); | ||
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
|
||
const lang = slid.compute(stream); | ||
console.log(f.split('/')[2], lang, display.of(lang)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
114 changes: 114 additions & 0 deletions
114
nodejs-addon-examples/test_vad_spoken_language_identification_microphone.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
|
||
const portAudio = require('naudiodon2'); | ||
// console.log(portAudio.getDevices()); | ||
|
||
const sherpa_onnx = require('sherpa-onnx-node'); | ||
|
||
function createVad() { | ||
// please download silero_vad.onnx from | ||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
const config = { | ||
sileroVad: { | ||
model: './silero_vad.onnx', | ||
threshold: 0.5, | ||
minSpeechDuration: 0.25, | ||
minSilenceDuration: 0.5, | ||
windowSize: 512, | ||
}, | ||
sampleRate: 16000, | ||
debug: true, | ||
numThreads: 1, | ||
}; | ||
|
||
const bufferSizeInSeconds = 60; | ||
|
||
return new sherpa_onnx.Vad(config, bufferSizeInSeconds); | ||
} | ||
|
||
// Please download test files from | ||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
function createSpokenLanguageID() { | ||
const config = { | ||
whisper: { | ||
encoder: './sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx', | ||
decoder: './sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx', | ||
}, | ||
debug: true, | ||
numThreads: 1, | ||
provider: 'cpu', | ||
}; | ||
return new sherpa_onnx.SpokenLanguageIdentification(config); | ||
} | ||
|
||
const slid = createSpokenLanguageID(); | ||
const vad = createVad(); | ||
|
||
const display = new Intl.DisplayNames(['en'], {type: 'language'}) | ||
|
||
const bufferSizeInSeconds = 30; | ||
const buffer = | ||
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
|
||
|
||
const ai = new portAudio.AudioIO({ | ||
inOptions: { | ||
channelCount: 1, | ||
closeOnError: true, // Close the stream if an audio error is detected, if | ||
// set false then just log the error | ||
deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
sampleFormat: portAudio.SampleFormatFloat32, | ||
sampleRate: vad.config.sampleRate, | ||
} | ||
}); | ||
|
||
let printed = false; | ||
let index = 0; | ||
ai.on('data', data => { | ||
const windowSize = vad.config.sileroVad.windowSize; | ||
buffer.push(new Float32Array(data.buffer)); | ||
while (buffer.size() > windowSize) { | ||
const samples = buffer.get(buffer.head(), windowSize); | ||
buffer.pop(windowSize); | ||
vad.acceptWaveform(samples) | ||
if (vad.isDetected() && !printed) { | ||
console.log(`${index}: Detected speech`) | ||
printed = true; | ||
} | ||
|
||
if (!vad.isDetected()) { | ||
printed = false; | ||
} | ||
|
||
while (!vad.isEmpty()) { | ||
const segment = vad.front(); | ||
vad.pop(); | ||
|
||
const stream = slid.createStream(); | ||
stream.acceptWaveform( | ||
{samples: segment.samples, sampleRate: vad.config.sampleRate}); | ||
const lang = slid.compute(stream); | ||
const fullLang = display.of(lang); | ||
|
||
const filename = `${index}-${fullLang}-${ | ||
new Date() | ||
.toLocaleTimeString('en-US', {hour12: false}) | ||
.split(' ')[0]}.wav`; | ||
sherpa_onnx.writeWave( | ||
filename, | ||
{samples: segment.samples, sampleRate: vad.config.sampleRate}); | ||
const duration = segment.samples.length / vad.config.sampleRate; | ||
console.log(`${index} End of speech. Duration: ${ | ||
duration} seconds.\n Detected language: ${fullLang}`); | ||
console.log(`Saved to ${filename}`); | ||
index += 1; | ||
} | ||
} | ||
}); | ||
|
||
ai.on('close', () => { | ||
console.log('Free resources'); | ||
}); | ||
|
||
ai.start(); | ||
console.log('Started! Please speak') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,4 +37,5 @@ class OfflineRecognizer { | |
|
||
module.exports = { | ||
OfflineRecognizer, | ||
OfflineStream, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
scripts/node-addon-api/lib/spoken-language-identification.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
const addon = require('./addon.js'); | ||
const non_streaming_asr = require('./non-streaming-asr.js'); | ||
|
||
class SpokenLanguageIdentification { | ||
constructor(config) { | ||
this.handle = addon.createSpokenLanguageIdentification(config); | ||
this.config = config; | ||
} | ||
|
||
createStream() { | ||
return new non_streaming_asr.OfflineStream( | ||
addon.createSpokenLanguageIdentificationOfflineStream(this.handle)); | ||
} | ||
|
||
// return a string containing the language code (2 characters), | ||
// e.g., en, de, fr, es, zh | ||
// en -> English | ||
// de -> German | ||
// fr -> French | ||
// es -> Spanish | ||
// zh -> Chinese | ||
compute(stream) { | ||
return addon.spokenLanguageIdentificationCompute( | ||
this.handle, stream.handle); | ||
} | ||
} | ||
|
||
module.exports = { | ||
SpokenLanguageIdentification, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.