Skip to content

Add ptime cmdline arg #357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions speech/api/parse_arguments.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ ParseResult ParseArguments(int argc, char* argv[]) {
("bitrate", po::value<int>()->default_value(16000),
"the sample rate in Hz")
//
("ptime", po::value<int>()->default_value(0),
"(optional) packetization time in milliseconds")
//
("language-code", po::value<std::string>()->default_value("en"),
"the language code for the audio")
//
Expand All @@ -47,6 +50,7 @@ ParseResult ParseArguments(int argc, char* argv[]) {

auto const path = vm["path"].as<std::string>();
auto const bitrate = vm["bitrate"].as<int>();
auto const ptime = vm["ptime"].as<int>();
auto const language_code = vm["language-code"].as<std::string>();
// Validate the command-line options.
if (path.empty()) {
Expand All @@ -57,11 +61,19 @@ ParseResult ParseArguments(int argc, char* argv[]) {
"--bitrate option must be a positive number, value=" +
std::to_string(bitrate));
}
if (ptime < 0) {
throw std::runtime_error(
"--ptime option must be a positive number, value=" +
std::to_string(ptime));
}

ParseResult result;
result.path = path;
result.config.set_language_code(language_code);
result.config.set_sample_rate_hertz(bitrate);
result.bitrate = 0;
result.ptime = 0;
result.sample_size = 0;

// Use the audio file extension to configure the encoding.
auto const ext = [&] {
Expand All @@ -75,8 +87,14 @@ ParseResult ParseArguments(int argc, char* argv[]) {

if (ext.empty() || ext == ".raw") {
result.config.set_encoding(RecognitionConfig::LINEAR16);
result.bitrate = bitrate;
result.ptime = ptime;
result.sample_size = 2;
} else if (ext == ".ulaw") {
result.config.set_encoding(RecognitionConfig::MULAW);
result.bitrate = bitrate;
result.ptime = ptime;
result.sample_size = 1;
} else if (ext == ".flac") {
result.config.set_encoding(RecognitionConfig::FLAC);
} else if (ext == ".amr") {
Expand Down
3 changes: 3 additions & 0 deletions speech/api/parse_arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
struct ParseResult {
google::cloud::speech::v1::RecognitionConfig config;
std::string path;
int bitrate;
int ptime;
int sample_size;
};

ParseResult ParseArguments(int argc, char* argv[]);
Expand Down
34 changes: 23 additions & 11 deletions speech/api/streaming_transcribe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,34 @@ using RecognizeStream = ::google::cloud::AsyncStreamingReadWriteRpc<
speech::v1::StreamingRecognizeResponse>;

auto constexpr kUsage = R"""(Usage:
streaming_transcribe [--bitrate N] audio.(raw|ulaw|flac|amr|awb)
streaming_transcribe [--bitrate N] [--ptime N] audio.(raw|ulaw|flac|amr|awb)
)""";

// Write the audio in 64k chunks at a time, simulating audio content arriving
// from a microphone.
void MicrophoneThreadMain(RecognizeStream& stream,
std::string const& file_path) {
// Write the audio packet every ptime ms, simulating audio content arriving
// from a microphone in ptime ms intervals
void MicrophoneThreadMain(RecognizeStream& stream, std::string const& file_path,
const int bitrate, const int ptime,
const int sample_size) {
speech::v1::StreamingRecognizeRequest request;
std::ifstream file_stream(file_path, std::ios::binary);
auto constexpr kChunkSize = 64 * 1024;
std::vector<char> chunk(kChunkSize);
// By default, read 64k bytes every 1 second
auto bytes_n = 64 * 1024;
auto wait_ms = 1000;
// If ptime is configured read packet every ptime ms (may only be set for
// "raw" and "ulaw")
if (ptime) {
wait_ms = ptime;
bytes_n = (bitrate / 1000) * sample_size * ptime;
}
std::vector<char> chunk(bytes_n);
while (true) {
// Read another chunk from the file.
file_stream.read(chunk.data(), chunk.size());
auto const bytes_read = file_stream.gcount();
// And write the chunk to the stream.
if (bytes_read > 0) {
request.set_audio_content(chunk.data(), bytes_read);
std::cout << "Sending " << bytes_read / 1024 << "k bytes." << std::endl;
std::cout << "Sending " << bytes_read << " bytes." << std::endl;
if (!stream.Write(request, grpc::WriteOptions()).get()) break;
}
if (!file_stream) {
Expand All @@ -54,7 +63,7 @@ void MicrophoneThreadMain(RecognizeStream& stream,
break;
}
// Wait a second before writing the next chunk.
std::this_thread::sleep_for(std::chrono::seconds(1));
std::this_thread::sleep_for(std::chrono::milliseconds(wait_ms));
}
}

Expand All @@ -65,6 +74,9 @@ int main(int argc, char** argv) try {
// Parse command line arguments.
auto args = ParseArguments(argc, argv);
auto const file_path = args.path;
auto const bitrate = args.bitrate;
auto const ptime = args.ptime;
auto const sample_size = args.sample_size;

speech::v1::StreamingRecognizeRequest request;
auto& streaming_config = *request.mutable_streaming_config();
Expand All @@ -81,8 +93,8 @@ int main(int argc, char** argv) try {
}

// Simulate a microphone thread using the file as input.
auto microphone =
std::thread(MicrophoneThreadMain, std::ref(*stream), file_path);
auto microphone = std::thread(MicrophoneThreadMain, std::ref(*stream),
file_path, bitrate, ptime, sample_size);
// Read responses.
auto read = [&stream] { return stream->Read().get(); };
for (auto response = read(); response.has_value(); response = read()) {
Expand Down