Skip to content

Commit dd875cb

Browse files
Kristin Grace GalvinAce Nassri
authored andcommitted
new sample for infinite streaming command line app (#333)
* new sample for infinite streaming command line app * adding infinite streaming sample to samples folder * added comments and error handling and ran eslint * corrected region tag * fixed reverted merge conflicts and made changes again. * fixed lint errors * removed stream from package.json, it is not necessary * cleaned up comment placements and removed unneeded debugging * made lint updates * made lint updates
1 parent 0dc7d13 commit dd875cb

File tree

2 files changed

+287
-0
lines changed

2 files changed

+287
-0
lines changed

speech/infiniteStreaming.js

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
/**
2+
* Copyright 2019 Google LLC
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
/**
17+
* This application demonstrates how to perform infinite streaming using the
18+
* streamingRecognize operation with the Google Cloud Speech API.
19+
* Before the streaming time limit is met, the program uses the
20+
* 'result end time' parameter to calculate the last 'isFinal' transcription.
21+
* When the time limit is met, the unfinalized audio from the previous session
22+
* is resent all at once to the API, before continuing the real-time stream
23+
* and resetting the clock, so the process can repeat.
24+
* Incoming audio should not be dropped / lost during reset, and context from
25+
* previous sessions should be maintained as long the utterance returns an
26+
* isFinal response before 2 * streamingLimit has expired.
27+
* The output text is color-coded:
28+
* red - unfinalized transcript
29+
* green - finalized transcript
30+
* yellow/orange - API request restarted
31+
*/
32+
33+
'use strict';
34+
35+
/**
36+
* Note: Correct microphone settings required: check enclosed link, and make
37+
* sure the following conditions are met:
38+
* 1. SoX must be installed and available in your $PATH- it can be found here:
39+
* http://sox.sourceforge.net/
40+
* 2. Microphone must be working
41+
* 3. Encoding, sampleRateHertz, and # of channels must match header of
42+
* audioInput file you're recording to.
43+
* 4. Get Node-Record-lpcm16 https://www.npmjs.com/package/node-record-lpcm16
44+
* More Info: https://cloud.google.com/speech-to-text/docs/streaming-recognize
45+
* 5. Set streamingLimit in ms. 10000 ms = 10 seconds.
46+
* Maximum streaming limit should be 1/2 of SpeechAPI Streaming Limit.
47+
*/
48+
49+
function infiniteStream(
50+
encoding,
51+
sampleRateHertz,
52+
languageCode,
53+
streamingLimit
54+
) {
55+
// [START speech_transcribe_infinite_streaming]
56+
57+
// const encoding = 'LINEAR16';
58+
// const sampleRateHertz = 16000;
59+
// const languageCode = 'en-US';
60+
// const streamingLimit = 10000; // ms - set to low number for demo purposes
61+
62+
const chalk = require('chalk');
63+
const {Transform} = require('stream');
64+
65+
// Node-Record-lpcm16
66+
const record = require('node-record-lpcm16');
67+
68+
// Imports the Google Cloud client library
69+
// Currently, only v1p1beta1 contains result-end-time
70+
const speech = require('@google-cloud/speech').v1p1beta1;
71+
72+
const client = new speech.SpeechClient();
73+
74+
const config = {
75+
encoding: encoding,
76+
sampleRateHertz: sampleRateHertz,
77+
languageCode: languageCode,
78+
};
79+
80+
const request = {
81+
config,
82+
interimResults: true,
83+
};
84+
85+
let recognizeStream = null;
86+
let restartCounter = 0;
87+
let audioInput = [];
88+
let lastAudioInput = [];
89+
let resultEndTime = 0;
90+
let isFinalEndTime = 0;
91+
let finalRequestEndTime = 0;
92+
let newStream = true;
93+
let bridgingOffset = 0;
94+
let lastTranscriptWasFinal = false;
95+
96+
function startStream() {
97+
// Clear current audioInput
98+
audioInput = [];
99+
// Initiate (Reinitiate) a recognize stream
100+
recognizeStream = client
101+
.streamingRecognize(request)
102+
.on('error', err => {
103+
if (err.code === 11) {
104+
// restartStream();
105+
} else {
106+
console.error('API request error ' + err);
107+
}
108+
})
109+
.on('data', speechCallback);
110+
111+
// Restart stream when streamingLimit expires
112+
setTimeout(restartStream, streamingLimit);
113+
}
114+
115+
const speechCallback = stream => {
116+
// Convert API result end time from seconds + nanoseconds to milliseconds
117+
resultEndTime =
118+
stream.results[0].resultEndTime.seconds * 1000 +
119+
Math.round(stream.results[0].resultEndTime.nanos / 1000000);
120+
121+
// Calculate correct time based on offset from audio sent twice
122+
const correctedTime =
123+
resultEndTime - bridgingOffset + streamingLimit * restartCounter;
124+
125+
process.stdout.clearLine();
126+
process.stdout.cursorTo(0);
127+
let stdoutText = '';
128+
if (stream.results[0] && stream.results[0].alternatives[0]) {
129+
stdoutText =
130+
correctedTime + ': ' + stream.results[0].alternatives[0].transcript;
131+
}
132+
133+
if (stream.results[0].isFinal) {
134+
process.stdout.write(chalk.green(`${stdoutText}\n`));
135+
136+
isFinalEndTime = resultEndTime;
137+
lastTranscriptWasFinal = true;
138+
} else {
139+
// Make sure transcript does not exceed console character length
140+
if (stdoutText.length > process.stdout.columns) {
141+
stdoutText =
142+
stdoutText.substring(0, process.stdout.columns - 4) + '...';
143+
}
144+
process.stdout.write(chalk.red(`${stdoutText}`));
145+
146+
lastTranscriptWasFinal = false;
147+
}
148+
};
149+
150+
const audioInputStreamTransform = new Transform({
151+
transform: (chunk, encoding, callback) => {
152+
if (newStream && lastAudioInput.length !== 0) {
153+
// Approximate math to calculate time of chunks
154+
const chunkTime = streamingLimit / lastAudioInput.length;
155+
if (chunkTime !== 0) {
156+
if (bridgingOffset < 0) {
157+
bridgingOffset = 0;
158+
}
159+
if (bridgingOffset > finalRequestEndTime) {
160+
bridgingOffset = finalRequestEndTime;
161+
}
162+
const chunksFromMS = Math.floor(
163+
(finalRequestEndTime - bridgingOffset) / chunkTime
164+
);
165+
bridgingOffset = Math.floor(
166+
(lastAudioInput.length - chunksFromMS) * chunkTime
167+
);
168+
169+
for (let i = chunksFromMS; i < lastAudioInput.length; i++) {
170+
recognizeStream.write(lastAudioInput[i]);
171+
}
172+
}
173+
newStream = false;
174+
}
175+
176+
audioInput.push(chunk);
177+
178+
if (recognizeStream) {
179+
recognizeStream.write(chunk);
180+
}
181+
182+
callback();
183+
},
184+
});
185+
186+
function restartStream() {
187+
if (recognizeStream) {
188+
recognizeStream.removeListener('data', speechCallback);
189+
recognizeStream = null;
190+
}
191+
if (resultEndTime > 0) {
192+
finalRequestEndTime = isFinalEndTime;
193+
}
194+
resultEndTime = 0;
195+
196+
lastAudioInput = [];
197+
lastAudioInput = audioInput;
198+
199+
restartCounter++;
200+
201+
if (!lastTranscriptWasFinal) {
202+
process.stdout.write(`\n`);
203+
}
204+
process.stdout.write(
205+
chalk.yellow(`${streamingLimit * restartCounter}: RESTARTING REQUEST\n`)
206+
);
207+
208+
newStream = true;
209+
210+
startStream();
211+
}
212+
// Start recording and send the microphone input to the Speech API
213+
record
214+
.start({
215+
sampleRateHertz: sampleRateHertz,
216+
threshold: 0, // Silence threshold
217+
silence: 1000,
218+
keepSilence: true,
219+
recordProgram: 'rec', // Try also "arecord" or "sox"
220+
})
221+
.on('error', err => {
222+
console.error('Audio recording error ' + err);
223+
})
224+
.pipe(audioInputStreamTransform);
225+
226+
console.log('');
227+
console.log('Listening, press Ctrl+C to stop.');
228+
console.log('');
229+
console.log('End (ms) Transcript Results/Status');
230+
console.log('=========================================================');
231+
232+
startStream();
233+
// [END speech_transcribe_infinite_streaming]
234+
}
235+
236+
require(`yargs`)
237+
.demand(1)
238+
.command(
239+
`infiniteStream`,
240+
`infinitely streams audio input from microphone to speech API`,
241+
{},
242+
opts =>
243+
infiniteStream(
244+
opts.encoding,
245+
opts.sampleRateHertz,
246+
opts.languageCode,
247+
opts.streamingLimit
248+
)
249+
)
250+
.options({
251+
encoding: {
252+
alias: 'e',
253+
default: 'LINEAR16',
254+
global: true,
255+
requiresArg: true,
256+
type: 'string',
257+
},
258+
sampleRateHertz: {
259+
alias: 'r',
260+
default: 16000,
261+
global: true,
262+
requiresArg: true,
263+
type: 'number',
264+
},
265+
languageCode: {
266+
alias: 'l',
267+
default: 'en-US',
268+
global: true,
269+
requiresArg: true,
270+
type: 'string',
271+
},
272+
streamingLimit: {
273+
alias: 's',
274+
default: 10000,
275+
global: true,
276+
requiresArg: true,
277+
type: 'number',
278+
},
279+
})
280+
.example(`node $0 infinteStream`)
281+
.wrap(120)
282+
.recommendCommands()
283+
.epilogue(`For more information, see https://cloud.google.com/speech/docs`)
284+
.help()
285+
.strict().argv;

speech/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
"dependencies": {
1414
"@google-cloud/speech": "^2.3.1",
1515
"@google-cloud/storage": "^2.0.0",
16+
"chalk": "^2.4.2",
1617
"node-record-lpcm16": "^0.3.0",
18+
"sox": "^0.1.0",
1719
"yargs": "^13.0.0"
1820
},
1921
"devDependencies": {

0 commit comments

Comments
 (0)