-
Notifications
You must be signed in to change notification settings - Fork 463
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add speaker identification APIs for node-addon-api (#874)
- Loading branch information
1 parent
0895b64
commit 388e6a9
Showing
16 changed files
with
1,034 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
name: npm-addon-linux-aarch64 | ||
|
||
on: | ||
push: | ||
branches: | ||
- node-addon | ||
workflow_dispatch: | ||
|
||
concurrency: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
name: npm-addon-linux-x64 | ||
|
||
on: | ||
push: | ||
branches: | ||
- node-addon | ||
workflow_dispatch: | ||
|
||
concurrency: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
name: npm-addon-macos | ||
|
||
on: | ||
push: | ||
branches: | ||
- node-addon | ||
workflow_dispatch: | ||
|
||
concurrency: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
name: npm-addon-win-x64 | ||
|
||
on: | ||
push: | ||
branches: | ||
- node-addon | ||
workflow_dispatch: | ||
|
||
concurrency: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
// Copyright (c) 2024 Xiaomi Corporation | ||
const sherpa_onnx = require('sherpa-onnx-node'); | ||
const assert = require('node:assert'); | ||
|
||
// Please download models files from | ||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models | ||
function createSpeakerEmbeddingExtractor() { | ||
const config = { | ||
model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx', | ||
numThreads: 1, | ||
debug: true, | ||
}; | ||
return new sherpa_onnx.SpeakerEmbeddingExtractor(config); | ||
} | ||
|
||
function computeEmbedding(extractor, filename) { | ||
const stream = extractor.createStream(); | ||
const wave = sherpa_onnx.readWave(filename); | ||
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
return extractor.compute(stream); | ||
} | ||
|
||
const extractor = createSpeakerEmbeddingExtractor(); | ||
const manager = new sherpa_onnx.SpeakerEmbeddingManager(extractor.dim); | ||
|
||
// Please download test files from | ||
// https://github.com/csukuangfj/sr-data | ||
const spk1Files = [ | ||
'./sr-data/enroll/fangjun-sr-1.wav', | ||
'./sr-data/enroll/fangjun-sr-2.wav', | ||
'./sr-data/enroll/fangjun-sr-3.wav', | ||
]; | ||
|
||
let spk1Vec = []; | ||
for (let f of spk1Files) { | ||
spk1Vec.push(computeEmbedding(extractor, f)); | ||
} | ||
|
||
const spk2Files = [ | ||
'./sr-data/enroll/leijun-sr-1.wav', | ||
'./sr-data/enroll/leijun-sr-2.wav', | ||
]; | ||
|
||
let spk2Vec = []; | ||
for (let f of spk2Files) { | ||
spk2Vec.push(computeEmbedding(extractor, f)); | ||
} | ||
|
||
let ok = manager.addMulti({name: 'fangjun', v: spk1Vec}); | ||
assert.equal(ok, true); | ||
|
||
ok = manager.addMulti({name: 'leijun', v: spk2Vec}); | ||
assert.equal(ok, true); | ||
|
||
assert.equal(manager.getNumSpeakers(), 2); | ||
|
||
assert.equal(manager.contains('fangjun'), true); | ||
assert.equal(manager.contains('leijun'), true); | ||
|
||
console.log('---All speakers---'); | ||
|
||
console.log(manager.getAllSpeakerNames()); | ||
console.log('------------'); | ||
|
||
const testFiles = [ | ||
'./sr-data/test/fangjun-test-sr-1.wav', | ||
'./sr-data/test/leijun-test-sr-1.wav', | ||
'./sr-data/test/liudehua-test-sr-1.wav', | ||
]; | ||
|
||
const threshold = 0.6; | ||
|
||
for (let f of testFiles) { | ||
const embedding = computeEmbedding(extractor, f); | ||
|
||
let name = manager.search({v: embedding, threshold: threshold}); | ||
if (name == '') { | ||
name = '<Unknown>'; | ||
} | ||
console.log(`${f}: ${name}`); | ||
} | ||
|
||
|
||
ok = manager.verify({ | ||
name: 'fangjun', | ||
v: computeEmbedding(extractor, testFiles[0]), | ||
threshold: threshold | ||
}); | ||
|
||
assert.equal(ok, true); | ||
|
||
ok = manager.remove('fangjun'); | ||
assert.equal(ok, true); | ||
|
||
ok = manager.verify({ | ||
name: 'fangjun', | ||
v: computeEmbedding(extractor, testFiles[0]), | ||
threshold: threshold | ||
}); | ||
assert.equal(ok, false); | ||
|
||
assert.equal(manager.getNumSpeakers(), 1); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
const addon = require('./addon.js'); | ||
const streaming_asr = require('./streaming-asr.js'); | ||
|
||
class SpeakerEmbeddingExtractor { | ||
constructor(config) { | ||
this.handle = addon.createSpeakerEmbeddingExtractor(config); | ||
this.config = config; | ||
this.dim = addon.speakerEmbeddingExtractorDim(this.handle); | ||
} | ||
|
||
createStream() { | ||
return new streaming_asr.OnlineStream( | ||
addon.speakerEmbeddingExtractorCreateStream(this.handle)); | ||
} | ||
|
||
isReady(stream) { | ||
return addon.speakerEmbeddingExtractorIsReady(this.handle, stream.handle); | ||
} | ||
|
||
// return a float32 array | ||
compute(stream) { | ||
return addon.speakerEmbeddingExtractorComputeEmbedding( | ||
this.handle, stream.handle); | ||
} | ||
} | ||
|
||
function flatten(arrayList) { | ||
let n = 0; | ||
for (let i = 0; i < arrayList.length; ++i) { | ||
n += arrayList[i].length; | ||
} | ||
let ans = new Float32Array(n); | ||
|
||
let offset = 0; | ||
for (let i = 0; i < arrayList.length; ++i) { | ||
ans.set(arrayList[i], offset); | ||
offset += arrayList[i].length; | ||
} | ||
return ans; | ||
} | ||
|
||
class SpeakerEmbeddingManager { | ||
constructor(dim) { | ||
this.handle = addon.createSpeakerEmbeddingManager(dim); | ||
this.dim = dim; | ||
} | ||
|
||
/* | ||
obj = {name: "xxx", v: a-float32-array} | ||
*/ | ||
add(obj) { | ||
return addon.speakerEmbeddingManagerAdd(this.handle, obj); | ||
} | ||
|
||
/* | ||
* obj = | ||
* {name: "xxx", v: [float32_array1, float32_array2, ..., float32_arrayn] | ||
*/ | ||
addMulti(obj) { | ||
const c = { | ||
name: obj.name, | ||
vv: flatten(obj.v), | ||
n: obj.v.length, | ||
}; | ||
return addon.speakerEmbeddingManagerAddListFlattened(this.handle, c); | ||
} | ||
|
||
remove(name) { | ||
return addon.speakerEmbeddingManagerRemove(this.handle, name); | ||
} | ||
|
||
/* | ||
* obj = {v: a-float32-array, threshold: a-float } | ||
*/ | ||
search(obj) { | ||
return addon.speakerEmbeddingManagerSearch(this.handle, obj); | ||
} | ||
|
||
/* | ||
* obj = {name: 'xxx', v: a-float32-array, threshold: a-float } | ||
*/ | ||
verify(obj) { | ||
return addon.speakerEmbeddingManagerVerify(this.handle, obj); | ||
} | ||
|
||
contains(name) { | ||
return addon.speakerEmbeddingManagerContains(this.handle, name); | ||
} | ||
|
||
getNumSpeakers() { | ||
return addon.speakerEmbeddingManagerNumSpeakers(this.handle); | ||
} | ||
|
||
getAllSpeakerNames() { | ||
return addon.speakerEmbeddingManagerGetAllSpeakers(this.handle); | ||
} | ||
} | ||
|
||
module.exports = { | ||
SpeakerEmbeddingExtractor, | ||
SpeakerEmbeddingManager, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,5 +64,6 @@ class OnlineRecognizer { | |
|
||
module.exports = { | ||
OnlineRecognizer, | ||
OnlineStream, | ||
Display | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.