Skip to content

Commit

Permalink
Add TTS for node-addon-api (#871)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored May 13, 2024
1 parent 740d7ae commit 031134b
Show file tree
Hide file tree
Showing 21 changed files with 691 additions and 10 deletions.
36 changes: 36 additions & 0 deletions .github/scripts/test-nodejs-addon-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ d=nodejs-addon-examples
echo "dir: $d"
cd $d

echo "----------streaming asr----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
Expand All @@ -31,6 +33,8 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
node ./test_asr_streaming_paraformer.js
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

echo "----------non-streaming asr----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
Expand Down Expand Up @@ -58,3 +62,35 @@ rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2

node ./test_asr_non_streaming_paraformer.js
rm -rf sherpa-onnx-paraformer-zh-2023-03-28

echo "----------tts----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xvf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2

node ./test_tts_non_streaming_vits_piper_en.js
rm -rf vits-piper-en_GB-cori-medium

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xvf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2

node ./test_tts_non_streaming_vits_coqui_de.js
rm -rf vits-coqui-de-css10

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2

node ./test_tts_non_streaming_vits_zh_ll.js
rm -rf sherpa-onnx-vits-zh-ll

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2

node ./test_tts_non_streaming_vits_zh_aishell3.js
rm -rf vits-icefall-zh-aishell3

ls -lh
2 changes: 1 addition & 1 deletion .github/workflows/npm-addon-linux-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ jobs:
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
..
make -j
make -j2
make install
cd ..
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,4 @@ sherpa-onnx-ced-*
node_modules
package-lock.json
sherpa-onnx-nemo-*
sherpa-onnx-vits-*
40 changes: 40 additions & 0 deletions nodejs-addon-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,43 @@ node ./test_asr_non_streaming_paraformer.js
npm install naudiodon2
node ./test_vad_asr_non_streaming_paraformer_microphone.js
```

## Text-to-speech with piper VITS models (TTS)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xvf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2

node ./test_tts_non_streaming_vits_piper_en.js
```

## Text-to-speech with piper Coqui-ai/TTS models (TTS)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xvf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2

node ./test_tts_non_streaming_vits_coqui_de.js
```

## Text-to-speech with vits Chinese models (1/2)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2

node ./test_tts_non_streaming_vits_zh_ll.js
```

## Text-to-speech with vits Chinese models (2/2)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2

node ./test_tts_non_streaming_vits_zh_aishell3.js
```
43 changes: 43 additions & 0 deletions nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-coqui-de-css10/model.onnx',
tokens: './vits-coqui-de-css10/tokens.txt',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
};
return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text = 'Alles hat ein Ende, nur die Wurst hat zwei.'

let start = performance.now();
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))

const filename = 'test-coqui-de.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);
46 changes: 46 additions & 0 deletions nodejs-addon-examples/test_tts_non_streaming_vits_piper_en.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx',
tokens: './vits-piper-en_GB-cori-medium/tokens.txt',
dataDir: './vits-piper-en_GB-cori-medium/espeak-ng-data',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
};
return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'


let start = performance.now();
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))

const filename = 'test-piper-en.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);
48 changes: 48 additions & 0 deletions nodejs-addon-examples/test_tts_non_streaming_vits_zh_aishell3.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-icefall-zh-aishell3/model.onnx',
tokens: './vits-icefall-zh-aishell3/tokens.txt',
lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
ruleFsts:
'./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
ruleFars: './vits-icefall-zh-aishell3/rule.far',
};
return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
'他在长沙出生,长白山长大,去过长江,现在他是一个银行的行长,主管行政工作。有困难,请拨110,或者13020240513。今天是2024年5月13号, 他上个月的工资是12345块钱。'

let start = performance.now();
const audio = tts.generate({text: text, sid: 88, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))

const filename = 'test-zh-aishell3.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);
48 changes: 48 additions & 0 deletions nodejs-addon-examples/test_tts_non_streaming_vits_zh_ll.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './sherpa-onnx-vits-zh-ll/model.onnx',
tokens: './sherpa-onnx-vits-zh-ll/tokens.txt',
lexicon: './sherpa-onnx-vits-zh-ll/lexicon.txt',
dictDir: './sherpa-onnx-vits-zh-ll/dict',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
ruleFsts:
'./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/number.fst',
};
return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
'当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月13号,拨打110或者18920240513。123456块钱。'

let start = performance.now();
const audio = tts.generate({text: text, sid: 2, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))

const filename = 'test-zh-ll.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});

index += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});

index += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});

index += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});

index += 1;
}
Expand Down
2 changes: 1 addition & 1 deletion nodejs-addon-examples/test_vad_microphone.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});
const duration = segment.samples.length / vad.config.sampleRate;
console.log(`${index} End of speech. Duration: ${duration} seconds`);
console.log(`Saved to ${filename}`);
Expand Down
1 change: 1 addition & 0 deletions scripts/node-addon-api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC})

set(srcs
src/non-streaming-asr.cc
src/non-streaming-tts.cc
src/sherpa-onnx-node-addon-api.cc
src/streaming-asr.cc
src/vad.cc
Expand Down
4 changes: 2 additions & 2 deletions scripts/node-addon-api/lib/addon.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ for (const p of possible_paths) {
}

if (!found) {
let msg =
`Could not find sherpa-onnx. Tried\n\n ${possible_paths.join('\n ')}\n`
let msg = `Could not find sherpa-onnx-node. Tried\n\n ${
possible_paths.join('\n ')}\n`
if (os.platform() == 'darwin' && process.env.DYLD_LIBRARY_PATH &&
!process.env.DYLD_LIBRARY_PATH.includes(
`node_modules/sherpa-onnx-${platform_arch}`)) {
Expand Down
25 changes: 25 additions & 0 deletions scripts/node-addon-api/lib/non-streaming-tts.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
const addon = require('./addon.js');

class OfflineTts {
constructor(config) {
this.handle = addon.createOfflineTts(config);
this.config = config;

this.numSpeakers = addon.getOfflineTtsNumSpeakers(this.handle);
this.sampleRate = addon.getOfflineTtsSampleRate(this.handle);
}

/*
input obj: {text: "xxxx", sid: 0, speed: 1.0}
where text is a string, sid is a int32, speed is a float
return an object {samples: Float32Array, sampleRate: <a number>}
*/
generate(obj) {
return addon.offlineTtsGenerate(this.handle, obj);
}
}

module.exports = {
OfflineTts,
}
Loading

0 comments on commit 031134b

Please sign in to comment.