Adding support for the SpeechRecognition.maxAlternatives JS API parameter (Speech CL2.5)

primiano@chromium.org · primiano@chromium.org · commit 2b0f67f2c247 · 2012-06-27T17:48:54.000Z
BUG=116954 TEST=none Review URL: https://chromiumcodereview.appspot.com/10629003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@144487 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/content/browser/speech/google_one_shot_remote_engine.cc b/content/browser/speech/google_one_shot_remote_engine.cc
@@ -37,9 +37,6 @@ const int kWebServiceStatusNoSpeech = 4;
 const int kWebServiceStatusNoMatch = 5;
 const speech::AudioEncoder::Codec kDefaultAudioCodec =
     speech::AudioEncoder::CODEC_FLAC;
-// TODO(satish): Remove this hardcoded value once the page is allowed to
-// set this via an attribute.
-const int kMaxResults = 6;
 
 bool ParseServerResponse(const std::string& response_body,
                          SpeechRecognitionResult* result,
@@ -198,7 +195,7 @@ void GoogleOneShotRemoteEngine::StartRecognition() {
   if (!config_.hardware_info.empty())
     parts.push_back("xhw=" + net::EscapeQueryParamValue(config_.hardware_info,
                                                         true));
-  parts.push_back("maxresults=" + base::IntToString(kMaxResults));
+  parts.push_back("maxresults=" + base::UintToString(config_.max_hypotheses));
   parts.push_back(config_.filter_profanities ? "pfilter=2" : "pfilter=0");
 
   GURL url(std::string(kDefaultSpeechRecognitionUrl) + JoinString(parts, '&'));
diff --git a/content/browser/speech/google_streaming_remote_engine.cc b/content/browser/speech/google_streaming_remote_engine.cc
@@ -35,9 +35,6 @@ using net::URLFetcher;
 
 namespace {
 
-// TODO(primiano): This shouldn't be a const, rather it should be taken from
-// maxNBest property (which is not yet implemented in WebKit).
-const int kMaxResults = 5;
 const char kDownstreamUrl[] = "/down?";
 const char kUpstreamUrl[] = "/up?";
 const int kAudioPacketIntervalMs = 100;
@@ -322,8 +319,6 @@ GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
   std::vector<std::string> downstream_args;
   downstream_args.push_back("sky=" + GetWebserviceKey());
   downstream_args.push_back("pair=" + request_key);
-  downstream_args.push_back("maxresults=" + base::IntToString(kMaxResults));
-
   GURL downstream_url(GetWebserviceBaseURL() + std::string(kDownstreamUrl) +
                       JoinString(downstream_args, '&'));
   // TODO(primiano): /////////// Remove this after debug stage. /////////////
@@ -347,9 +342,12 @@ GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
       "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
   upstream_args.push_back(
       config_.filter_profanities ? "pfilter=2" : "pfilter=0");
-  upstream_args.push_back("maxresults=" + base::IntToString(kMaxResults));
+  if (config_.max_hypotheses > 0U) {
+    upstream_args.push_back("maxresults=" +
+                            base::UintToString(config_.max_hypotheses));
+  }
+  // TODO(primiano) What is this client= parameter? Check with speech team.
   upstream_args.push_back("client=myapp.mycompany.com");
-  // TODO(primiano): Can we remove this feature sending audio HW information?
   if (!config_.hardware_info.empty()) {
     upstream_args.push_back(
         "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
diff --git a/content/browser/speech/input_tag_speech_dispatcher_host.cc b/content/browser/speech/input_tag_speech_dispatcher_host.cc
@@ -16,6 +16,10 @@ using content::SpeechRecognitionManager;
 using content::SpeechRecognitionSessionConfig;
 using content::SpeechRecognitionSessionContext;
 
+namespace {
+const uint32 kMaxHypothesesForSpeechInputTag = 6;
+}
+
 namespace speech {
 SpeechRecognitionManager* InputTagSpeechDispatcherHost::manager_for_tests_;
 
@@ -77,6 +81,7 @@ void InputTagSpeechDispatcherHost::OnStartRecognition(
     config.grammars.push_back(
         content::SpeechRecognitionGrammar(params.grammar));
   }
+  config.max_hypotheses = kMaxHypothesesForSpeechInputTag;
   config.origin_url = params.origin_url;
   config.initial_context = context;
   config.url_request_context_getter = url_request_context_getter_.get();
diff --git a/content/browser/speech/speech_recognition_dispatcher_host.cc b/content/browser/speech/speech_recognition_dispatcher_host.cc
@@ -83,6 +83,7 @@ void SpeechRecognitionDispatcherHost::OnStartRequest(
   config.is_one_shot = params.is_one_shot;
   config.language = params.language;
   config.grammars = params.grammars;
+  config.max_hypotheses = params.max_hypotheses;
   config.origin_url = params.origin_url;
   config.initial_context = context;
   config.url_request_context_getter = context_getter_.get();
diff --git a/content/browser/speech/speech_recognition_engine.cc b/content/browser/speech/speech_recognition_engine.cc
@@ -7,12 +7,14 @@
 namespace {
 const int kDefaultConfigSampleRate = 8000;
 const int kDefaultConfigBitsPerSample = 16;
+const uint32 kDefaultMaxHypotheses = 1;
 }  // namespace
 
 namespace speech {
 
 SpeechRecognitionEngine::Config::Config()
     : filter_profanities(false),
+      max_hypotheses(kDefaultMaxHypotheses),
       audio_sample_rate(kDefaultConfigSampleRate),
       audio_num_bits_per_sample(kDefaultConfigBitsPerSample) {
 }
diff --git a/content/browser/speech/speech_recognition_engine.h b/content/browser/speech/speech_recognition_engine.h
@@ -56,6 +56,7 @@ class SpeechRecognitionEngine {
     std::string language;
     content::SpeechRecognitionGrammarArray grammars;
     bool filter_profanities;
+    uint32 max_hypotheses;
     std::string hardware_info;
     std::string origin_url;
     int audio_sample_rate;
diff --git a/content/browser/speech/speech_recognition_manager_impl.cc b/content/browser/speech/speech_recognition_manager_impl.cc
@@ -98,6 +98,7 @@ int SpeechRecognitionManagerImpl::CreateSession(
   remote_engine_config.audio_num_bits_per_sample =
      SpeechRecognizer::kNumBitsPerAudioSample;
   remote_engine_config.filter_profanities = config.filter_profanities;
+  remote_engine_config.max_hypotheses = config.max_hypotheses;
   remote_engine_config.hardware_info = hardware_info;
   remote_engine_config.origin_url = can_report_metrics ? config.origin_url : "";
 
diff --git a/content/common/speech_recognition_messages.h b/content/common/speech_recognition_messages.h
@@ -117,6 +117,8 @@ IPC_STRUCT_BEGIN(SpeechRecognitionHostMsg_StartRequest_Params)
   IPC_STRUCT_MEMBER(std::string, origin_url)
   // One-shot/continuous recognition mode.
   IPC_STRUCT_MEMBER(bool, is_one_shot)
+  // Maximum number of hypotheses allowed for each results.
+  IPC_STRUCT_MEMBER(uint32, max_hypotheses)
 IPC_STRUCT_END()
 
 
diff --git a/content/public/browser/speech_recognition_session_config.cc b/content/public/browser/speech_recognition_session_config.cc
@@ -4,11 +4,16 @@
 
 #include "content/public/browser/speech_recognition_session_config.h"
 
+namespace {
+const uint32 kDefaultMaxHypotheses = 1;
+}
+
 namespace content {
 
 SpeechRecognitionSessionConfig::SpeechRecognitionSessionConfig()
     : is_one_shot(true),
       filter_profanities(false),
+      max_hypotheses(kDefaultMaxHypotheses),
       event_listener(NULL) {
 }
 
diff --git a/content/public/browser/speech_recognition_session_config.h b/content/public/browser/speech_recognition_session_config.h
@@ -30,6 +30,7 @@ struct CONTENT_EXPORT SpeechRecognitionSessionConfig {
   SpeechRecognitionGrammarArray grammars;
   std::string origin_url;
   bool filter_profanities;
+  uint32 max_hypotheses;
   SpeechRecognitionSessionContext initial_context;
   scoped_refptr<net::URLRequestContextGetter> url_request_context_getter;
   SpeechRecognitionEventListener* event_listener;
diff --git a/content/renderer/speech_recognition_dispatcher.cc b/content/renderer/speech_recognition_dispatcher.cc
@@ -73,6 +73,7 @@ void SpeechRecognitionDispatcher::start(
   }
   msg_params.language = UTF16ToUTF8(params.language());
   msg_params.is_one_shot = !params.continuous();
+  msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());
   msg_params.origin_url = params.origin().toString().utf8();
   msg_params.render_view_id = routing_id();
   msg_params.request_id = GetOrCreateIDForHandle(handle);

Original file line number	Diff line number	Diff line change
`@@ -73,6 +73,7 @@ void SpeechRecognitionDispatcher::start(`
`73`	`73`	`}`
`74`	`74`	`msg_params.language = UTF16ToUTF8(params.language());`
`75`	`75`	`msg_params.is_one_shot = !params.continuous();`
	`76`	`+ msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());`
`76`	`77`	`msg_params.origin_url = params.origin().toString().utf8();`
`77`	`78`	`msg_params.render_view_id = routing_id();`
`78`	`79`	`msg_params.request_id = GetOrCreateIDForHandle(handle);`