-
Notifications
You must be signed in to change notification settings - Fork 164
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #167 from sshniro/master
Adding speech to text adapter for Google cloud platform
- Loading branch information
Showing
9 changed files
with
107,221 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import gcpSttToDraft from './index'; | ||
import gcpSttTedTalkTranscript from './sample/gcpSttPunctuation.sample.json'; | ||
|
||
console.log('Starting'); | ||
console.log(JSON.stringify(gcpSttToDraft(gcpSttTedTalkTranscript), null, 2)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
/** | ||
* Converts GCP Speech to Text Json to DraftJs | ||
* see `sample` folder for example of input and output as well as `example-usage.js` | ||
*/ | ||
|
||
import generateEntitiesRanges from '../generate-entities-ranges/index.js'; | ||
|
||
const NANO_SECOND = 1000000000; | ||
|
||
/** | ||
* attribute for the sentences object containing the text. eg sentences ={ punct:'helo', ... } | ||
* or eg sentences ={ text:'hello', ... } | ||
* @param sentences | ||
*/ | ||
export const getBestAlternativeSentence = sentences => { | ||
if (sentences.alternatives.length === 0) { | ||
return sentences[0]; | ||
} | ||
|
||
const sentenceWithHighestConfidence = sentences.alternatives.reduce(function( | ||
prev, | ||
current | ||
) { | ||
return parseFloat(prev.confidence) > parseFloat(current.confidence) | ||
? prev | ||
: current; | ||
}); | ||
|
||
return sentenceWithHighestConfidence; | ||
}; | ||
|
||
export const trimLeadingAndTailingWhiteSpace = text => { | ||
return text.trim(); | ||
}; | ||
|
||
/** | ||
* GCP does not provide a nanosecond attribute if the word starts at 0 nanosecond | ||
* @param startSecond | ||
* @param nanoSecond | ||
* @returns {number} | ||
*/ | ||
const computeTimeInSeconds = (startSecond, nanoSecond) => { | ||
|
||
let seconds = parseFloat(startSecond); | ||
|
||
if (nanoSecond !== undefined) { | ||
seconds = seconds + parseFloat(nanoSecond / NANO_SECOND); | ||
} | ||
|
||
return seconds; | ||
}; | ||
|
||
/** | ||
* Normalizes words so they can be used in | ||
* the generic generateEntitiesRanges() method | ||
**/ | ||
const normalizeWord = (currentWord, confidence) => { | ||
|
||
return { | ||
start: computeTimeInSeconds(currentWord.startTime.seconds, currentWord.startTime.nanos), | ||
end: computeTimeInSeconds(currentWord.endTime.seconds, currentWord.endTime.nanos), | ||
text: currentWord.word, | ||
confidence: confidence | ||
}; | ||
}; | ||
|
||
/** | ||
* groups words list from GCP Speech to Text response. | ||
* @param {array} sentences - array of sentence objects from GCP STT | ||
*/ | ||
const groupWordsInParagraphs = sentences => { | ||
const results = []; | ||
let paragraph = { | ||
words: [], | ||
text: [] | ||
}; | ||
|
||
sentences.forEach((sentence) => { | ||
const bestAlternative = getBestAlternativeSentence(sentence); | ||
paragraph.text.push(trimLeadingAndTailingWhiteSpace(bestAlternative.transcript)); | ||
|
||
bestAlternative.words.forEach((word) => { | ||
paragraph.words.push(normalizeWord(word, bestAlternative.confidence)); | ||
}); | ||
results.push(paragraph); | ||
paragraph = { words: [], text: [] }; | ||
}); | ||
|
||
return results; | ||
}; | ||
|
||
const gcpSttToDraft = gcpSttJson => { | ||
const results = []; | ||
// const speakerLabels = gcpSttJson.results[0]['alternatives'][0]['words'][0]['speakerTag'] | ||
// let speakerSegmentation = typeof(speakerLabels) != 'undefined'; | ||
|
||
const wordsByParagraphs = groupWordsInParagraphs(gcpSttJson.results); | ||
|
||
wordsByParagraphs.forEach((paragraph, i) => { | ||
const draftJsContentBlockParagraph = { | ||
text: paragraph.text.join(' '), | ||
type: 'paragraph', | ||
data: { | ||
speaker: paragraph.speaker ? `Speaker ${ paragraph.speaker }` : `TBC ${ i }`, | ||
words: paragraph.words, | ||
start: parseFloat(paragraph.words[0].start) | ||
}, | ||
// the entities as ranges are each word in the space-joined text, | ||
// so it needs to be compute for each the offset from the beginning of the paragraph and the length | ||
entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName | ||
}; | ||
results.push(draftJsContentBlockParagraph); | ||
}); | ||
|
||
return results; | ||
}; | ||
|
||
export default gcpSttToDraft; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import gcpSttToDraft, { | ||
getBestAlternativeSentence, | ||
trimLeadingAndTailingWhiteSpace | ||
} from './index'; | ||
import draftTranscriptSample from './sample/googleSttToDraftJs.sample.js'; | ||
import gcpSttTedTalkTranscript from './sample/gcpSttPunctuation.sample.json'; | ||
|
||
describe('gcpSttToDraft', () => { | ||
const result = gcpSttToDraft(gcpSttTedTalkTranscript); | ||
it('Should be defined', () => { | ||
expect(result).toBeDefined(); | ||
}); | ||
|
||
it('Should be equal to expected value', () => { | ||
expect(result).toEqual(draftTranscriptSample); | ||
}); | ||
}); | ||
|
||
describe('leading and tailing white space should be removed from text block', () => { | ||
const sentence = ' this is a sentence '; | ||
const expected = 'this is a sentence'; | ||
|
||
const result = trimLeadingAndTailingWhiteSpace(sentence); | ||
it('should be equal to expected value', () => { | ||
expect(result).toEqual(expected); | ||
}); | ||
}); | ||
|
||
describe('Best alternative sentence should be returned', () => { | ||
const sentences = { | ||
alternatives: [ | ||
{ | ||
'transcript': 'this is the first sentence', | ||
'confidence': 0.95, | ||
}, | ||
{ | ||
'transcript': 'this is the first sentence alternative', | ||
'confidence': 0.80, | ||
} | ||
] | ||
}; | ||
const expected = { | ||
'transcript': 'this is the first sentence', | ||
'confidence': 0.95 | ||
}; | ||
|
||
it('Should be equal to expected value', () => { | ||
|
||
const result = getBestAlternativeSentence(sentences); | ||
expect(result).toEqual(expected); | ||
}); | ||
}); |
Oops, something went wrong.