Skip to content

Commit

Permalink
Speech upgrade (#427)
Browse files Browse the repository at this point in the history
* Upgrades speech client to semi-gapic
* Adds library with streaming fixes
  • Loading branch information
gguuss authored Jul 19, 2017
1 parent 4a4bcc8 commit 3007162
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 78 deletions.
10 changes: 5 additions & 5 deletions speech/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
"test": "npm run system-test"
},
"dependencies": {
"@google-cloud/speech": "0.9.3",
"@google-cloud/storage": "1.1.1",
"@google-cloud/speech": "^0.10.1",
"@google-cloud/storage": "^1.2.0",
"node-record-lpcm16": "0.3.0",
"yargs": "8.0.2"
"yargs": "^8.0.2"
},
"devDependencies": {
"@google-cloud/nodejs-repo-tools": "1.4.15",
"ava": "0.19.1",
"@google-cloud/nodejs-repo-tools": "^1.4.15",
"ava": "^0.19.1",
"proxyquire": "1.8.0",
"sinon": "2.3.4"
},
Expand Down
20 changes: 16 additions & 4 deletions speech/quickstart.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
// [START speech_quickstart]
// Imports the Google Cloud client library
const Speech = require('@google-cloud/speech');
const fs = require('fs');

// Your Google Cloud Platform project ID
const projectId = 'YOUR_PROJECT_ID';
const projectId = 'your-project-id';

// Instantiates a client
const speechClient = Speech({
Expand All @@ -30,17 +31,28 @@ const speechClient = Speech({
// The name of the audio file to transcribe
const fileName = './resources/audio.raw';

// Reads a local audio file and converts it to base64
const file = fs.readFileSync(fileName);
const audioBytes = file.toString('base64');

// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const options = {
const audio = {
content: audioBytes
};
const config = {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: 'en-US'
};
const request = {
audio: audio,
config: config
};

// Detects speech in the audio file
speechClient.recognize(fileName, options)
speechClient.recognize(request)
.then((results) => {
const transcription = results[0];
const transcription = results[0].results[0].alternatives[0].transcript;
console.log(`Transcription: ${transcription}`);
})
.catch((err) => {
Expand Down
78 changes: 58 additions & 20 deletions speech/recognize.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
function syncRecognize (filename, encoding, sampleRateHertz, languageCode) {
// [START speech_sync_recognize]
// Imports the Google Cloud client library
const fs = require('fs');
const Speech = require('@google-cloud/speech');

// Instantiates a client
Expand All @@ -43,18 +44,25 @@ function syncRecognize (filename, encoding, sampleRateHertz, languageCode) {
// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
};
const audio = {
content: fs.readFileSync(filename).toString('base64')
};

const request = {
config: config,
audio: audio
};

// Detects speech in the audio file
speech.recognize(filename, request)
speech.recognize(request)
.then((results) => {
const transcription = results[0];

console.log(`Transcription: ${transcription}`);
const transcription = results[0].results[0].alternatives[0].transcript;
console.log(`Transcription: `, transcription);
})
.catch((err) => {
console.error('ERROR:', err);
Expand Down Expand Up @@ -82,18 +90,25 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) {
// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
};
const audio = {
uri: gcsUri
};

const request = {
config: config,
audio: audio
};

// Detects speech in the audio file
speech.recognize(gcsUri, request)
speech.recognize(request)
.then((results) => {
const transcription = results[0];

console.log(`Transcription: ${transcription}`);
const transcription = results[0].results[0].alternatives[0].transcript;
console.log(`Transcription: `, transcription);
})
.catch((err) => {
console.error('ERROR:', err);
Expand All @@ -105,6 +120,7 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) {
// [START speech_async_recognize]
// Imports the Google Cloud client library
const Speech = require('@google-cloud/speech');
const fs = require('fs');

// Instantiates a client
const speech = Speech();
Expand All @@ -121,22 +137,30 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) {
// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
};
const audio = {
content: fs.readFileSync(filename).toString('base64')
};

const request = {
config: config,
audio: audio
};

// Detects speech in the audio file. This creates a recognition job that you
// can wait for now, or get its result later.
speech.startRecognition(filename, request)
speech.longRunningRecognize(request)
.then((results) => {
const operation = results[0];
// Get a Promise representation of the final result of the job
return operation.promise();
})
.then((results) => {
const transcription = results[0];
const transcription = results[0].results[0].alternatives[0].transcript;
console.log(`Transcription: ${transcription}`);
})
.catch((err) => {
Expand Down Expand Up @@ -165,22 +189,31 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) {
// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
};

const audio = {
uri: gcsUri
};

const request = {
config: config,
audio: audio
};

// Detects speech in the audio file. This creates a recognition job that you
// can wait for now, or get its result later.
speech.startRecognition(gcsUri, request)
speech.longRunningRecognize(request)
.then((results) => {
const operation = results[0];
// Get a Promise representation of the final result of the job
return operation.promise();
})
.then((results) => {
const transcription = results[0];
const transcription = results[0].results[0].alternatives[0].transcript;
console.log(`Transcription: ${transcription}`);
})
.catch((err) => {
Expand Down Expand Up @@ -221,10 +254,11 @@ function streamingRecognize (filename, encoding, sampleRateHertz, languageCode)
};

// Stream the audio to the Google Cloud Speech API
const recognizeStream = speech.createRecognizeStream(request)
const recognizeStream = speech.streamingRecognize(request)
.on('error', console.error)
.on('data', (data) => {
console.log(`Transcription: ${data.results}`);
console.log(
`Transcription: ${data.results[0].alternatives[0].transcript}`);
});

// Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw"
Expand Down Expand Up @@ -261,9 +295,13 @@ function streamingMicRecognize (encoding, sampleRateHertz, languageCode) {
};

// Create a recognize stream
const recognizeStream = speech.createRecognizeStream(request)
const recognizeStream = speech.streamingRecognize(request)
.on('error', console.error)
.on('data', (data) => process.stdout.write(data.results));
.on('data', (data) =>
process.stdout.write(
(data.results[0] && data.results[0].alternatives[0])
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`));

// Start recording and send the microphone input to the Speech API
record
Expand Down
57 changes: 10 additions & 47 deletions speech/system-test/quickstart.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,57 +16,20 @@
'use strict';

const path = require(`path`);
const proxyquire = require(`proxyquire`).noPreserveCache();
const sinon = require(`sinon`);
const speech = proxyquire(`@google-cloud/speech`, {})();
const test = require(`ava`);

const cmd = `node quickstart.js`;
const cwd = path.join(__dirname, `..`);
const text = `how old is the Brooklyn Bridge`;

const {
checkCredentials,
stubConsole,
restoreConsole
runAsync
} = require(`@google-cloud/nodejs-repo-tools`);

const fileName = path.join(__dirname, `../resources/audio.raw`);
const config = {
encoding: `LINEAR16`,
sampleRateHertz: 16000,
languageCode: `en-US`
};

test.before(checkCredentials);
test.before(stubConsole);
test.after.always(restoreConsole);

test.cb(`should detect speech`, (t) => {
const expectedFileName = `./resources/audio.raw`;
const expectedText = `how old is the Brooklyn Bridge`;

const speechMock = {
recognize: (_fileName, _config) => {
t.is(_fileName, expectedFileName);
t.deepEqual(_config, config);

return speech.recognize(fileName, config)
.then(([transcription]) => {
t.is(transcription, expectedText);

setTimeout(() => {
try {
t.is(console.log.callCount, 1);
t.deepEqual(console.log.getCall(0).args, [`Transcription: ${expectedText}`]);
t.end();
} catch (err) {
t.end(err);
}
}, 200);

return [transcription];
});
}
};
test.before(async () => {
});

proxyquire(`../quickstart`, {
'@google-cloud/speech': sinon.stub().returns(speechMock)
});
test(`should run quickstart`, async (t) => {
const output = await runAsync(`${cmd}`, cwd);
t.true(output.includes(`Transcription: ${text}`));
});
4 changes: 2 additions & 2 deletions speech/system-test/recognize.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ test.after.always(async () => {

test(`should run sync recognize`, async (t) => {
const output = await runAsync(`${cmd} sync ${filepath}`, cwd);
t.true(output.includes(`Transcription: ${text}`));
t.true(output.includes(`Transcription: ${text}`));
});

test(`should run sync recognize on a GCS file`, async (t) => {
const output = await runAsync(`${cmd} sync-gcs gs://${bucketName}/${filename}`, cwd);
t.true(output.includes(`Transcription: ${text}`));
t.true(output.includes(`Transcription: ${text}`));
});

test(`should run async recognize on a local file`, async (t) => {
Expand Down

0 comments on commit 3007162

Please sign in to comment.