From 3007162200adfe8c6fbbd6150daded267afb9465 Mon Sep 17 00:00:00 2001 From: Gus Class Date: Wed, 19 Jul 2017 15:19:15 -0400 Subject: [PATCH] Speech upgrade (#427) * Upgrades speech client to semi-gapic * Adds library with streaming fixes --- speech/package.json | 10 ++-- speech/quickstart.js | 20 +++++-- speech/recognize.js | 78 ++++++++++++++++++++------- speech/system-test/quickstart.test.js | 57 ++++---------------- speech/system-test/recognize.test.js | 4 +- 5 files changed, 91 insertions(+), 78 deletions(-) diff --git a/speech/package.json b/speech/package.json index 0443ab89e0e..5c103019656 100644 --- a/speech/package.json +++ b/speech/package.json @@ -18,14 +18,14 @@ "test": "npm run system-test" }, "dependencies": { - "@google-cloud/speech": "0.9.3", - "@google-cloud/storage": "1.1.1", + "@google-cloud/speech": "^0.10.1", + "@google-cloud/storage": "^1.2.0", "node-record-lpcm16": "0.3.0", - "yargs": "8.0.2" + "yargs": "^8.0.2" }, "devDependencies": { - "@google-cloud/nodejs-repo-tools": "1.4.15", - "ava": "0.19.1", + "@google-cloud/nodejs-repo-tools": "^1.4.15", + "ava": "^0.19.1", "proxyquire": "1.8.0", "sinon": "2.3.4" }, diff --git a/speech/quickstart.js b/speech/quickstart.js index d983de90671..f088297a444 100644 --- a/speech/quickstart.js +++ b/speech/quickstart.js @@ -18,9 +18,10 @@ // [START speech_quickstart] // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); +const fs = require('fs'); // Your Google Cloud Platform project ID -const projectId = 'YOUR_PROJECT_ID'; +const projectId = 'your-project-id'; // Instantiates a client const speechClient = Speech({ @@ -30,17 +31,28 @@ const speechClient = Speech({ // The name of the audio file to transcribe const fileName = './resources/audio.raw'; +// Reads a local audio file and converts it to base64 +const file = fs.readFileSync(fileName); +const audioBytes = file.toString('base64'); + // The audio file's encoding, sample rate in hertz, and BCP-47 language code -const options = { +const audio = { + content: audioBytes +}; +const config = { encoding: 'LINEAR16', sampleRateHertz: 16000, languageCode: 'en-US' }; +const request = { + audio: audio, + config: config +}; // Detects speech in the audio file -speechClient.recognize(fileName, options) +speechClient.recognize(request) .then((results) => { - const transcription = results[0]; + const transcription = results[0].results[0].alternatives[0].transcript; console.log(`Transcription: ${transcription}`); }) .catch((err) => { diff --git a/speech/recognize.js b/speech/recognize.js index 3b6fef5fa21..eca5a11ce0d 100644 --- a/speech/recognize.js +++ b/speech/recognize.js @@ -26,6 +26,7 @@ function syncRecognize (filename, encoding, sampleRateHertz, languageCode) { // [START speech_sync_recognize] // Imports the Google Cloud client library + const fs = require('fs'); const Speech = require('@google-cloud/speech'); // Instantiates a client @@ -43,18 +44,25 @@ function syncRecognize (filename, encoding, sampleRateHertz, languageCode) { // The BCP-47 language code to use, e.g. 'en-US' // const languageCode = 'en-US'; - const request = { + const config = { encoding: encoding, sampleRateHertz: sampleRateHertz, languageCode: languageCode }; + const audio = { + content: fs.readFileSync(filename).toString('base64') + }; + + const request = { + config: config, + audio: audio + }; // Detects speech in the audio file - speech.recognize(filename, request) + speech.recognize(request) .then((results) => { - const transcription = results[0]; - - console.log(`Transcription: ${transcription}`); + const transcription = results[0].results[0].alternatives[0].transcript; + console.log(`Transcription: `, transcription); }) .catch((err) => { console.error('ERROR:', err); @@ -82,18 +90,25 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // The BCP-47 language code to use, e.g. 'en-US' // const languageCode = 'en-US'; - const request = { + const config = { encoding: encoding, sampleRateHertz: sampleRateHertz, languageCode: languageCode }; + const audio = { + uri: gcsUri + }; + + const request = { + config: config, + audio: audio + }; // Detects speech in the audio file - speech.recognize(gcsUri, request) + speech.recognize(request) .then((results) => { - const transcription = results[0]; - - console.log(`Transcription: ${transcription}`); + const transcription = results[0].results[0].alternatives[0].transcript; + console.log(`Transcription: `, transcription); }) .catch((err) => { console.error('ERROR:', err); @@ -105,6 +120,7 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) { // [START speech_async_recognize] // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); + const fs = require('fs'); // Instantiates a client const speech = Speech(); @@ -121,22 +137,30 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) { // The BCP-47 language code to use, e.g. 'en-US' // const languageCode = 'en-US'; - const request = { + const config = { encoding: encoding, sampleRateHertz: sampleRateHertz, languageCode: languageCode }; + const audio = { + content: fs.readFileSync(filename).toString('base64') + }; + + const request = { + config: config, + audio: audio + }; // Detects speech in the audio file. This creates a recognition job that you // can wait for now, or get its result later. - speech.startRecognition(filename, request) + speech.longRunningRecognize(request) .then((results) => { const operation = results[0]; // Get a Promise representation of the final result of the job return operation.promise(); }) .then((results) => { - const transcription = results[0]; + const transcription = results[0].results[0].alternatives[0].transcript; console.log(`Transcription: ${transcription}`); }) .catch((err) => { @@ -165,22 +189,31 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // The BCP-47 language code to use, e.g. 'en-US' // const languageCode = 'en-US'; - const request = { + const config = { encoding: encoding, sampleRateHertz: sampleRateHertz, languageCode: languageCode }; + const audio = { + uri: gcsUri + }; + + const request = { + config: config, + audio: audio + }; + // Detects speech in the audio file. This creates a recognition job that you // can wait for now, or get its result later. - speech.startRecognition(gcsUri, request) + speech.longRunningRecognize(request) .then((results) => { const operation = results[0]; // Get a Promise representation of the final result of the job return operation.promise(); }) .then((results) => { - const transcription = results[0]; + const transcription = results[0].results[0].alternatives[0].transcript; console.log(`Transcription: ${transcription}`); }) .catch((err) => { @@ -221,10 +254,11 @@ function streamingRecognize (filename, encoding, sampleRateHertz, languageCode) }; // Stream the audio to the Google Cloud Speech API - const recognizeStream = speech.createRecognizeStream(request) + const recognizeStream = speech.streamingRecognize(request) .on('error', console.error) .on('data', (data) => { - console.log(`Transcription: ${data.results}`); + console.log( + `Transcription: ${data.results[0].alternatives[0].transcript}`); }); // Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw" @@ -261,9 +295,13 @@ function streamingMicRecognize (encoding, sampleRateHertz, languageCode) { }; // Create a recognize stream - const recognizeStream = speech.createRecognizeStream(request) + const recognizeStream = speech.streamingRecognize(request) .on('error', console.error) - .on('data', (data) => process.stdout.write(data.results)); + .on('data', (data) => + process.stdout.write( + (data.results[0] && data.results[0].alternatives[0]) + ? `Transcription: ${data.results[0].alternatives[0].transcript}\n` + : `\n\nReached transcription time limit, press Ctrl+C\n`)); // Start recording and send the microphone input to the Speech API record diff --git a/speech/system-test/quickstart.test.js b/speech/system-test/quickstart.test.js index b58d1abf42a..6a86782a100 100644 --- a/speech/system-test/quickstart.test.js +++ b/speech/system-test/quickstart.test.js @@ -16,57 +16,20 @@ 'use strict'; const path = require(`path`); -const proxyquire = require(`proxyquire`).noPreserveCache(); -const sinon = require(`sinon`); -const speech = proxyquire(`@google-cloud/speech`, {})(); const test = require(`ava`); +const cmd = `node quickstart.js`; +const cwd = path.join(__dirname, `..`); +const text = `how old is the Brooklyn Bridge`; + const { - checkCredentials, - stubConsole, - restoreConsole + runAsync } = require(`@google-cloud/nodejs-repo-tools`); -const fileName = path.join(__dirname, `../resources/audio.raw`); -const config = { - encoding: `LINEAR16`, - sampleRateHertz: 16000, - languageCode: `en-US` -}; - -test.before(checkCredentials); -test.before(stubConsole); -test.after.always(restoreConsole); - -test.cb(`should detect speech`, (t) => { - const expectedFileName = `./resources/audio.raw`; - const expectedText = `how old is the Brooklyn Bridge`; - - const speechMock = { - recognize: (_fileName, _config) => { - t.is(_fileName, expectedFileName); - t.deepEqual(_config, config); - - return speech.recognize(fileName, config) - .then(([transcription]) => { - t.is(transcription, expectedText); - - setTimeout(() => { - try { - t.is(console.log.callCount, 1); - t.deepEqual(console.log.getCall(0).args, [`Transcription: ${expectedText}`]); - t.end(); - } catch (err) { - t.end(err); - } - }, 200); - - return [transcription]; - }); - } - }; +test.before(async () => { +}); - proxyquire(`../quickstart`, { - '@google-cloud/speech': sinon.stub().returns(speechMock) - }); +test(`should run quickstart`, async (t) => { + const output = await runAsync(`${cmd}`, cwd); + t.true(output.includes(`Transcription: ${text}`)); }); diff --git a/speech/system-test/recognize.test.js b/speech/system-test/recognize.test.js index 73606a2a030..bf56a5fb1e2 100644 --- a/speech/system-test/recognize.test.js +++ b/speech/system-test/recognize.test.js @@ -45,12 +45,12 @@ test.after.always(async () => { test(`should run sync recognize`, async (t) => { const output = await runAsync(`${cmd} sync ${filepath}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.true(output.includes(`Transcription: ${text}`)); }); test(`should run sync recognize on a GCS file`, async (t) => { const output = await runAsync(`${cmd} sync-gcs gs://${bucketName}/${filename}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); + t.true(output.includes(`Transcription: ${text}`)); }); test(`should run async recognize on a local file`, async (t) => {