From cf4edb43e745fd54ed629857497cb12fc8bf080f Mon Sep 17 00:00:00 2001
From: "wuchengli@chromium.org"
 <wuchengli@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>
Date: Wed, 11 Sep 2013 14:47:28 +0000
Subject: [PATCH] Send PictureReady and NotifyEndOfBitstreamBuffer directly to
 IO thread.

This can reduce decode latency by 4ms per frame.

BUG=170345
TEST=Run http://apprtc.appspot.com on Chromebook Daisy.
     Play local hardware-accelerated videos on Daisy.
     Run video_decode_accelerator_unittest.

Review URL: https://chromiumcodereview.appspot.com/23819040

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@222550 0039d316-1c4b-4281-b951-d872f2087c98
---
 content/common/gpu/gpu_command_buffer_stub.cc |  6 +--
 .../media/exynos_video_decode_accelerator.cc  | 10 +++--
 .../media/exynos_video_decode_accelerator.h   |  3 ++
 .../gpu/media/gpu_video_decode_accelerator.cc | 41 ++++++++++++++++---
 .../gpu/media/gpu_video_decode_accelerator.h  | 14 +++++--
 .../gpu/media/video_decode_accelerator_impl.h |  4 +-
 .../video_decode_accelerator_unittest.cc      | 10 ++++-
 7 files changed, 68 insertions(+), 20 deletions(-)
diff --git a/content/common/gpu/gpu_command_buffer_stub.cc b/content/common/gpu/gpu_command_buffer_stub.cc
index 14ac92fbb4aaea..da3fb0ec65f592 100644
--- a/content/common/gpu/gpu_command_buffer_stub.cc
+++ b/content/common/gpu/gpu_command_buffer_stub.cc
@@ -720,9 +720,9 @@ void GpuCommandBufferStub::OnCreateVideoDecoder(
     IPC::Message* reply_message) {
   TRACE_EVENT0("gpu", "GpuCommandBufferStub::OnCreateVideoDecoder");
   int decoder_route_id = channel_->GenerateRouteID();
-  GpuVideoDecodeAccelerator* decoder =
-      new GpuVideoDecodeAccelerator(decoder_route_id, this);
-  decoder->Initialize(profile, reply_message, channel_->io_message_loop());
+  GpuVideoDecodeAccelerator* decoder = new GpuVideoDecodeAccelerator(
+      decoder_route_id, this, channel_->io_message_loop());
+  decoder->Initialize(profile, reply_message);
   // decoder is registered as a DestructionObserver of this stub and will
   // self-delete during destruction of this stub.
 }
diff --git a/content/common/gpu/media/exynos_video_decode_accelerator.cc b/content/common/gpu/media/exynos_video_decode_accelerator.cc
index e643969d8e6d6c..edce5f3ef8bcb6 100644
--- a/content/common/gpu/media/exynos_video_decode_accelerator.cc
+++ b/content/common/gpu/media/exynos_video_decode_accelerator.cc
@@ -205,6 +205,7 @@ ExynosVideoDecodeAccelerator::ExynosVideoDecodeAccelerator(
     EGLDisplay egl_display,
     EGLContext egl_context,
     Client* client,
+    const base::WeakPtr<Client>& io_client,
     const base::Callback<bool(void)>& make_context_current,
     const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy)
     : child_message_loop_proxy_(base::MessageLoopProxy::current()),
@@ -212,6 +213,7 @@ ExynosVideoDecodeAccelerator::ExynosVideoDecodeAccelerator(
       weak_this_(base::AsWeakPtr(this)),
       client_ptr_factory_(client),
       client_(client_ptr_factory_.GetWeakPtr()),
+      io_client_(io_client),
       decoder_thread_("ExynosDecoderThread"),
       decoder_state_(kUninitialized),
       decoder_delay_bitstream_buffer_id_(-1),
@@ -611,7 +613,7 @@ void ExynosVideoDecodeAccelerator::DecodeTask(
                bitstream_buffer.id());
 
   scoped_ptr<BitstreamBufferRef> bitstream_record(new BitstreamBufferRef(
-      client_, child_message_loop_proxy_,
+      io_client_, io_message_loop_proxy_,
       new base::SharedMemory(bitstream_buffer.handle(), true),
       bitstream_buffer.size(), bitstream_buffer.id()));
   if (!bitstream_record->shm->Map(bitstream_buffer.size())) {
@@ -1418,8 +1420,8 @@ void ExynosVideoDecodeAccelerator::DequeueGsc() {
     gsc_output_buffer_queued_count_--;
     DVLOG(3) << "DequeueGsc(): returning input_id=" << dqbuf.timestamp.tv_sec
              << " as picture_id=" << output_record.picture_id;
-    child_message_loop_proxy_->PostTask(FROM_HERE, base::Bind(
-        &Client::PictureReady, client_, media::Picture(
+    io_message_loop_proxy_->PostTask(FROM_HERE, base::Bind(
+        &Client::PictureReady, io_client_, media::Picture(
             output_record.picture_id, dqbuf.timestamp.tv_sec)));
     decoder_frames_at_client_++;
   }
@@ -1629,7 +1631,7 @@ void ExynosVideoDecodeAccelerator::FlushTask() {
 
   // Queue up an empty buffer -- this triggers the flush.
   decoder_input_queue_.push_back(linked_ptr<BitstreamBufferRef>(
-      new BitstreamBufferRef(client_, child_message_loop_proxy_, NULL, 0,
+      new BitstreamBufferRef(io_client_, io_message_loop_proxy_, NULL, 0,
                              kFlushBufferId)));
   decoder_flushing_ = true;
 
diff --git a/content/common/gpu/media/exynos_video_decode_accelerator.h b/content/common/gpu/media/exynos_video_decode_accelerator.h
index 09264b94056e2d..76b1bb156768e2 100644
--- a/content/common/gpu/media/exynos_video_decode_accelerator.h
+++ b/content/common/gpu/media/exynos_video_decode_accelerator.h
@@ -61,6 +61,7 @@ class CONTENT_EXPORT ExynosVideoDecodeAccelerator
       EGLDisplay egl_display,
       EGLContext egl_context,
       Client* client,
+      const base::WeakPtr<Client>& io_client_,
       const base::Callback<bool(void)>& make_context_current,
       const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy);
   virtual ~ExynosVideoDecodeAccelerator();
@@ -331,6 +332,8 @@ class CONTENT_EXPORT ExynosVideoDecodeAccelerator
   // child_message_loop_proxy_.
   base::WeakPtrFactory<Client> client_ptr_factory_;
   base::WeakPtr<Client> client_;
+  // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|.
+  base::WeakPtr<Client> io_client_;
 
   //
   // Decoder state, owned and operated by decoder_thread_.
diff --git a/content/common/gpu/media/gpu_video_decode_accelerator.cc b/content/common/gpu/media/gpu_video_decode_accelerator.cc
index a1062e3c106777..c5de2df00e2004 100644
--- a/content/common/gpu/media/gpu_video_decode_accelerator.cc
+++ b/content/common/gpu/media/gpu_video_decode_accelerator.cc
@@ -61,10 +61,19 @@ class GpuVideoDecodeAccelerator::MessageFilter
   MessageFilter(GpuVideoDecodeAccelerator* owner, int32 host_route_id)
       : owner_(owner), host_route_id_(host_route_id) {}
 
+  virtual void OnChannelError() OVERRIDE { channel_ = NULL; }
+
+  virtual void OnChannelClosing() OVERRIDE { channel_ = NULL; }
+
+  virtual void OnFilterAdded(IPC::Channel* channel) OVERRIDE {
+    channel_ = channel;
+  }
+
   virtual void OnFilterRemoved() OVERRIDE {
     // This will delete |owner_| and |this|.
     owner_->OnFilterRemoved();
   }
+
   virtual bool OnMessageReceived(const IPC::Message& msg) OVERRIDE {
     if (msg.routing_id() != host_route_id_)
       return false;
@@ -77,20 +86,35 @@ class GpuVideoDecodeAccelerator::MessageFilter
     return true;
   }
 
+  bool SendOnIOThread(IPC::Message* message) {
+    DCHECK(!message->is_sync());
+    if (!channel_) {
+      delete message;
+      return false;
+    }
+    return channel_->Send(message);
+  }
+
  protected:
   virtual ~MessageFilter() {}
 
  private:
   GpuVideoDecodeAccelerator* owner_;
   int32 host_route_id_;
+  // The channel to which this filter was added.
+  IPC::Channel* channel_;
 };
 
-GpuVideoDecodeAccelerator::GpuVideoDecodeAccelerator(int32 host_route_id,
-                                                     GpuCommandBufferStub* stub)
+GpuVideoDecodeAccelerator::GpuVideoDecodeAccelerator(
+    int32 host_route_id,
+    GpuCommandBufferStub* stub,
+    const scoped_refptr<base::MessageLoopProxy>& io_message_loop)
     : init_done_msg_(NULL),
       host_route_id_(host_route_id),
       stub_(stub),
-      texture_target_(0) {
+      texture_target_(0),
+      io_message_loop_(io_message_loop),
+      weak_factory_for_io_(this) {
   DCHECK(stub_);
   stub_->AddDestructionObserver(this);
   stub_->channel()->AddRoute(host_route_id_, this);
@@ -177,8 +201,7 @@ void GpuVideoDecodeAccelerator::NotifyError(
 
 void GpuVideoDecodeAccelerator::Initialize(
     const media::VideoCodecProfile profile,
-    IPC::Message* init_done_msg,
-    const scoped_refptr<base::MessageLoopProxy>& io_message_loop) {
+    IPC::Message* init_done_msg) {
   DCHECK(stub_);
   DCHECK(!video_decode_accelerator_.get());
   DCHECK(!init_done_msg_);
@@ -208,8 +231,9 @@ void GpuVideoDecodeAccelerator::Initialize(
       gfx::GLSurfaceEGL::GetHardwareDisplay(),
       stub_->decoder()->GetGLContext()->GetHandle(),
       this,
+      weak_factory_for_io_.GetWeakPtr(),
       make_context_current_,
-      io_message_loop));
+      io_message_loop_));
 #elif defined(OS_CHROMEOS) && defined(ARCH_CPU_X86_FAMILY) && defined(USE_X11)
   gfx::GLContextGLX* glx_context =
       static_cast<gfx::GLContextGLX*>(stub_->decoder()->GetGLContext());
@@ -355,6 +379,8 @@ void GpuVideoDecodeAccelerator::OnDestroy() {
 }
 
 void GpuVideoDecodeAccelerator::OnFilterRemoved() {
+  // We're destroying; cancel all callbacks.
+  weak_factory_for_io_.InvalidateWeakPtrs();
   child_message_loop_->DeleteSoon(FROM_HERE, this);
 }
 
@@ -390,6 +416,9 @@ void GpuVideoDecodeAccelerator::OnWillDestroyStub() { OnDestroy(); }
 
 bool GpuVideoDecodeAccelerator::Send(IPC::Message* message) {
   DCHECK(stub_);
+  if (filter_.get() && io_message_loop_->BelongsToCurrentThread())
+    return filter_->SendOnIOThread(message);
+  DCHECK(child_message_loop_->BelongsToCurrentThread());
   return stub_->channel()->Send(message);
 }
 
diff --git a/content/common/gpu/media/gpu_video_decode_accelerator.h b/content/common/gpu/media/gpu_video_decode_accelerator.h
index 70d9a440437a93..77946bc59a2083 100644
--- a/content/common/gpu/media/gpu_video_decode_accelerator.h
+++ b/content/common/gpu/media/gpu_video_decode_accelerator.h
@@ -31,7 +31,10 @@ class GpuVideoDecodeAccelerator
   // Each of the arguments to the constructor must outlive this object.
   // |stub->decoder()| will be made current around any operation that touches
   // the underlying VDA so that it can make GL calls safely.
-  GpuVideoDecodeAccelerator(int32 host_route_id, GpuCommandBufferStub* stub);
+  GpuVideoDecodeAccelerator(
+      int32 host_route_id,
+      GpuCommandBufferStub* stub,
+      const scoped_refptr<base::MessageLoopProxy>& io_message_loop);
   virtual ~GpuVideoDecodeAccelerator();
 
   // IPC::Listener implementation.
@@ -60,8 +63,7 @@ class GpuVideoDecodeAccelerator
   // The renderer process handle is valid as long as we have a channel between
   // GPU process and the renderer.
   void Initialize(const media::VideoCodecProfile profile,
-                  IPC::Message* init_done_msg,
-                  const scoped_refptr<base::MessageLoopProxy>& io_message_loop);
+                  IPC::Message* init_done_msg);
 
  private:
   class MessageFilter;
@@ -108,6 +110,12 @@ class GpuVideoDecodeAccelerator
   // GPU child message loop.
   scoped_refptr<base::MessageLoopProxy> child_message_loop_;
 
+  // GPU IO message loop.
+  scoped_refptr<base::MessageLoopProxy> io_message_loop_;
+
+  // Weak pointers will be invalidated on IO thread.
+  base::WeakPtrFactory<Client> weak_factory_for_io_;
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(GpuVideoDecodeAccelerator);
 };
 
diff --git a/content/common/gpu/media/video_decode_accelerator_impl.h b/content/common/gpu/media/video_decode_accelerator_impl.h
index 146a8efc9366c8..8b3163ebd8f1d8 100644
--- a/content/common/gpu/media/video_decode_accelerator_impl.h
+++ b/content/common/gpu/media/video_decode_accelerator_impl.h
@@ -16,8 +16,8 @@ class CONTENT_EXPORT VideoDecodeAcceleratorImpl
   VideoDecodeAcceleratorImpl();
   virtual ~VideoDecodeAcceleratorImpl();
 
-  // Returns true if media::VideoDecodeAccelerator::Decode can run on the IO
-  // thread. Otherwise Decode will run on the GPU child thread. The purpose of
+  // Returns true if VDA::Decode and VDA::Client callbacks can run on the IO
+  // thread. Otherwise they will run on the GPU child thread. The purpose of
   // running Decode on the IO thread is to reduce decode latency. Note Decode
   // should return as soon as possible and not block on the IO thread.
   virtual bool CanDecodeOnIOThread();
diff --git a/content/common/gpu/media/video_decode_accelerator_unittest.cc b/content/common/gpu/media/video_decode_accelerator_unittest.cc
index 8cae38a008ea0a..925f28760554c2 100644
--- a/content/common/gpu/media/video_decode_accelerator_unittest.cc
+++ b/content/common/gpu/media/video_decode_accelerator_unittest.cc
@@ -429,7 +429,9 @@ void ThrottlingVDAClient::NotifyError(VideoDecodeAccelerator::Error error) {
 
 // Client that can accept callbacks from a VideoDecodeAccelerator and is used by
 // the TESTs below.
-class GLRenderingVDAClient : public VideoDecodeAccelerator::Client {
+class GLRenderingVDAClient
+    : public VideoDecodeAccelerator::Client,
+      public base::SupportsWeakPtr<GLRenderingVDAClient> {
  public:
   // Doesn't take ownership of |rendering_helper| or |note|, which must outlive
   // |*this|.
@@ -606,8 +608,11 @@ void GLRenderingVDAClient::CreateDecoder() {
   CHECK(!decoder_.get());
 
   VideoDecodeAccelerator::Client* client = this;
-  if (throttling_client_)
+  base::WeakPtr<VideoDecodeAccelerator::Client> weak_client = AsWeakPtr();
+  if (throttling_client_) {
     client = throttling_client_.get();
+    weak_client = throttling_client_->AsWeakPtr();
+  }
 #if defined(OS_WIN)
   decoder_.reset(
       new DXVAVideoDecodeAccelerator(client, base::Bind(&DoNothingReturnTrue)));
@@ -617,6 +622,7 @@ void GLRenderingVDAClient::CreateDecoder() {
       static_cast<EGLDisplay>(rendering_helper_->GetGLDisplay()),
       static_cast<EGLContext>(rendering_helper_->GetGLContext()),
       client,
+      weak_client,
       base::Bind(&DoNothingReturnTrue),
       base::MessageLoopProxy::current()));
 #elif defined(ARCH_CPU_X86_FAMILY)