From cf4edb43e745fd54ed629857497cb12fc8bf080f Mon Sep 17 00:00:00 2001 From: "wuchengli@chromium.org" Date: Wed, 11 Sep 2013 14:47:28 +0000 Subject: [PATCH] Send PictureReady and NotifyEndOfBitstreamBuffer directly to IO thread. This can reduce decode latency by 4ms per frame. BUG=170345 TEST=Run http://apprtc.appspot.com on Chromebook Daisy. Play local hardware-accelerated videos on Daisy. Run video_decode_accelerator_unittest. Review URL: https://chromiumcodereview.appspot.com/23819040 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@222550 0039d316-1c4b-4281-b951-d872f2087c98 --- content/common/gpu/gpu_command_buffer_stub.cc | 6 +-- .../media/exynos_video_decode_accelerator.cc | 10 +++-- .../media/exynos_video_decode_accelerator.h | 3 ++ .../gpu/media/gpu_video_decode_accelerator.cc | 41 ++++++++++++++++--- .../gpu/media/gpu_video_decode_accelerator.h | 14 +++++-- .../gpu/media/video_decode_accelerator_impl.h | 4 +- .../video_decode_accelerator_unittest.cc | 10 ++++- 7 files changed, 68 insertions(+), 20 deletions(-) diff --git a/content/common/gpu/gpu_command_buffer_stub.cc b/content/common/gpu/gpu_command_buffer_stub.cc index 14ac92fbb4aaea..da3fb0ec65f592 100644 --- a/content/common/gpu/gpu_command_buffer_stub.cc +++ b/content/common/gpu/gpu_command_buffer_stub.cc @@ -720,9 +720,9 @@ void GpuCommandBufferStub::OnCreateVideoDecoder( IPC::Message* reply_message) { TRACE_EVENT0("gpu", "GpuCommandBufferStub::OnCreateVideoDecoder"); int decoder_route_id = channel_->GenerateRouteID(); - GpuVideoDecodeAccelerator* decoder = - new GpuVideoDecodeAccelerator(decoder_route_id, this); - decoder->Initialize(profile, reply_message, channel_->io_message_loop()); + GpuVideoDecodeAccelerator* decoder = new GpuVideoDecodeAccelerator( + decoder_route_id, this, channel_->io_message_loop()); + decoder->Initialize(profile, reply_message); // decoder is registered as a DestructionObserver of this stub and will // self-delete during destruction of this stub. } diff --git a/content/common/gpu/media/exynos_video_decode_accelerator.cc b/content/common/gpu/media/exynos_video_decode_accelerator.cc index e643969d8e6d6c..edce5f3ef8bcb6 100644 --- a/content/common/gpu/media/exynos_video_decode_accelerator.cc +++ b/content/common/gpu/media/exynos_video_decode_accelerator.cc @@ -205,6 +205,7 @@ ExynosVideoDecodeAccelerator::ExynosVideoDecodeAccelerator( EGLDisplay egl_display, EGLContext egl_context, Client* client, + const base::WeakPtr& io_client, const base::Callback& make_context_current, const scoped_refptr& io_message_loop_proxy) : child_message_loop_proxy_(base::MessageLoopProxy::current()), @@ -212,6 +213,7 @@ ExynosVideoDecodeAccelerator::ExynosVideoDecodeAccelerator( weak_this_(base::AsWeakPtr(this)), client_ptr_factory_(client), client_(client_ptr_factory_.GetWeakPtr()), + io_client_(io_client), decoder_thread_("ExynosDecoderThread"), decoder_state_(kUninitialized), decoder_delay_bitstream_buffer_id_(-1), @@ -611,7 +613,7 @@ void ExynosVideoDecodeAccelerator::DecodeTask( bitstream_buffer.id()); scoped_ptr bitstream_record(new BitstreamBufferRef( - client_, child_message_loop_proxy_, + io_client_, io_message_loop_proxy_, new base::SharedMemory(bitstream_buffer.handle(), true), bitstream_buffer.size(), bitstream_buffer.id())); if (!bitstream_record->shm->Map(bitstream_buffer.size())) { @@ -1418,8 +1420,8 @@ void ExynosVideoDecodeAccelerator::DequeueGsc() { gsc_output_buffer_queued_count_--; DVLOG(3) << "DequeueGsc(): returning input_id=" << dqbuf.timestamp.tv_sec << " as picture_id=" << output_record.picture_id; - child_message_loop_proxy_->PostTask(FROM_HERE, base::Bind( - &Client::PictureReady, client_, media::Picture( + io_message_loop_proxy_->PostTask(FROM_HERE, base::Bind( + &Client::PictureReady, io_client_, media::Picture( output_record.picture_id, dqbuf.timestamp.tv_sec))); decoder_frames_at_client_++; } @@ -1629,7 +1631,7 @@ void ExynosVideoDecodeAccelerator::FlushTask() { // Queue up an empty buffer -- this triggers the flush. decoder_input_queue_.push_back(linked_ptr( - new BitstreamBufferRef(client_, child_message_loop_proxy_, NULL, 0, + new BitstreamBufferRef(io_client_, io_message_loop_proxy_, NULL, 0, kFlushBufferId))); decoder_flushing_ = true; diff --git a/content/common/gpu/media/exynos_video_decode_accelerator.h b/content/common/gpu/media/exynos_video_decode_accelerator.h index 09264b94056e2d..76b1bb156768e2 100644 --- a/content/common/gpu/media/exynos_video_decode_accelerator.h +++ b/content/common/gpu/media/exynos_video_decode_accelerator.h @@ -61,6 +61,7 @@ class CONTENT_EXPORT ExynosVideoDecodeAccelerator EGLDisplay egl_display, EGLContext egl_context, Client* client, + const base::WeakPtr& io_client_, const base::Callback& make_context_current, const scoped_refptr& io_message_loop_proxy); virtual ~ExynosVideoDecodeAccelerator(); @@ -331,6 +332,8 @@ class CONTENT_EXPORT ExynosVideoDecodeAccelerator // child_message_loop_proxy_. base::WeakPtrFactory client_ptr_factory_; base::WeakPtr client_; + // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|. + base::WeakPtr io_client_; // // Decoder state, owned and operated by decoder_thread_. diff --git a/content/common/gpu/media/gpu_video_decode_accelerator.cc b/content/common/gpu/media/gpu_video_decode_accelerator.cc index a1062e3c106777..c5de2df00e2004 100644 --- a/content/common/gpu/media/gpu_video_decode_accelerator.cc +++ b/content/common/gpu/media/gpu_video_decode_accelerator.cc @@ -61,10 +61,19 @@ class GpuVideoDecodeAccelerator::MessageFilter MessageFilter(GpuVideoDecodeAccelerator* owner, int32 host_route_id) : owner_(owner), host_route_id_(host_route_id) {} + virtual void OnChannelError() OVERRIDE { channel_ = NULL; } + + virtual void OnChannelClosing() OVERRIDE { channel_ = NULL; } + + virtual void OnFilterAdded(IPC::Channel* channel) OVERRIDE { + channel_ = channel; + } + virtual void OnFilterRemoved() OVERRIDE { // This will delete |owner_| and |this|. owner_->OnFilterRemoved(); } + virtual bool OnMessageReceived(const IPC::Message& msg) OVERRIDE { if (msg.routing_id() != host_route_id_) return false; @@ -77,20 +86,35 @@ class GpuVideoDecodeAccelerator::MessageFilter return true; } + bool SendOnIOThread(IPC::Message* message) { + DCHECK(!message->is_sync()); + if (!channel_) { + delete message; + return false; + } + return channel_->Send(message); + } + protected: virtual ~MessageFilter() {} private: GpuVideoDecodeAccelerator* owner_; int32 host_route_id_; + // The channel to which this filter was added. + IPC::Channel* channel_; }; -GpuVideoDecodeAccelerator::GpuVideoDecodeAccelerator(int32 host_route_id, - GpuCommandBufferStub* stub) +GpuVideoDecodeAccelerator::GpuVideoDecodeAccelerator( + int32 host_route_id, + GpuCommandBufferStub* stub, + const scoped_refptr& io_message_loop) : init_done_msg_(NULL), host_route_id_(host_route_id), stub_(stub), - texture_target_(0) { + texture_target_(0), + io_message_loop_(io_message_loop), + weak_factory_for_io_(this) { DCHECK(stub_); stub_->AddDestructionObserver(this); stub_->channel()->AddRoute(host_route_id_, this); @@ -177,8 +201,7 @@ void GpuVideoDecodeAccelerator::NotifyError( void GpuVideoDecodeAccelerator::Initialize( const media::VideoCodecProfile profile, - IPC::Message* init_done_msg, - const scoped_refptr& io_message_loop) { + IPC::Message* init_done_msg) { DCHECK(stub_); DCHECK(!video_decode_accelerator_.get()); DCHECK(!init_done_msg_); @@ -208,8 +231,9 @@ void GpuVideoDecodeAccelerator::Initialize( gfx::GLSurfaceEGL::GetHardwareDisplay(), stub_->decoder()->GetGLContext()->GetHandle(), this, + weak_factory_for_io_.GetWeakPtr(), make_context_current_, - io_message_loop)); + io_message_loop_)); #elif defined(OS_CHROMEOS) && defined(ARCH_CPU_X86_FAMILY) && defined(USE_X11) gfx::GLContextGLX* glx_context = static_cast(stub_->decoder()->GetGLContext()); @@ -355,6 +379,8 @@ void GpuVideoDecodeAccelerator::OnDestroy() { } void GpuVideoDecodeAccelerator::OnFilterRemoved() { + // We're destroying; cancel all callbacks. + weak_factory_for_io_.InvalidateWeakPtrs(); child_message_loop_->DeleteSoon(FROM_HERE, this); } @@ -390,6 +416,9 @@ void GpuVideoDecodeAccelerator::OnWillDestroyStub() { OnDestroy(); } bool GpuVideoDecodeAccelerator::Send(IPC::Message* message) { DCHECK(stub_); + if (filter_.get() && io_message_loop_->BelongsToCurrentThread()) + return filter_->SendOnIOThread(message); + DCHECK(child_message_loop_->BelongsToCurrentThread()); return stub_->channel()->Send(message); } diff --git a/content/common/gpu/media/gpu_video_decode_accelerator.h b/content/common/gpu/media/gpu_video_decode_accelerator.h index 70d9a440437a93..77946bc59a2083 100644 --- a/content/common/gpu/media/gpu_video_decode_accelerator.h +++ b/content/common/gpu/media/gpu_video_decode_accelerator.h @@ -31,7 +31,10 @@ class GpuVideoDecodeAccelerator // Each of the arguments to the constructor must outlive this object. // |stub->decoder()| will be made current around any operation that touches // the underlying VDA so that it can make GL calls safely. - GpuVideoDecodeAccelerator(int32 host_route_id, GpuCommandBufferStub* stub); + GpuVideoDecodeAccelerator( + int32 host_route_id, + GpuCommandBufferStub* stub, + const scoped_refptr& io_message_loop); virtual ~GpuVideoDecodeAccelerator(); // IPC::Listener implementation. @@ -60,8 +63,7 @@ class GpuVideoDecodeAccelerator // The renderer process handle is valid as long as we have a channel between // GPU process and the renderer. void Initialize(const media::VideoCodecProfile profile, - IPC::Message* init_done_msg, - const scoped_refptr& io_message_loop); + IPC::Message* init_done_msg); private: class MessageFilter; @@ -108,6 +110,12 @@ class GpuVideoDecodeAccelerator // GPU child message loop. scoped_refptr child_message_loop_; + // GPU IO message loop. + scoped_refptr io_message_loop_; + + // Weak pointers will be invalidated on IO thread. + base::WeakPtrFactory weak_factory_for_io_; + DISALLOW_IMPLICIT_CONSTRUCTORS(GpuVideoDecodeAccelerator); }; diff --git a/content/common/gpu/media/video_decode_accelerator_impl.h b/content/common/gpu/media/video_decode_accelerator_impl.h index 146a8efc9366c8..8b3163ebd8f1d8 100644 --- a/content/common/gpu/media/video_decode_accelerator_impl.h +++ b/content/common/gpu/media/video_decode_accelerator_impl.h @@ -16,8 +16,8 @@ class CONTENT_EXPORT VideoDecodeAcceleratorImpl VideoDecodeAcceleratorImpl(); virtual ~VideoDecodeAcceleratorImpl(); - // Returns true if media::VideoDecodeAccelerator::Decode can run on the IO - // thread. Otherwise Decode will run on the GPU child thread. The purpose of + // Returns true if VDA::Decode and VDA::Client callbacks can run on the IO + // thread. Otherwise they will run on the GPU child thread. The purpose of // running Decode on the IO thread is to reduce decode latency. Note Decode // should return as soon as possible and not block on the IO thread. virtual bool CanDecodeOnIOThread(); diff --git a/content/common/gpu/media/video_decode_accelerator_unittest.cc b/content/common/gpu/media/video_decode_accelerator_unittest.cc index 8cae38a008ea0a..925f28760554c2 100644 --- a/content/common/gpu/media/video_decode_accelerator_unittest.cc +++ b/content/common/gpu/media/video_decode_accelerator_unittest.cc @@ -429,7 +429,9 @@ void ThrottlingVDAClient::NotifyError(VideoDecodeAccelerator::Error error) { // Client that can accept callbacks from a VideoDecodeAccelerator and is used by // the TESTs below. -class GLRenderingVDAClient : public VideoDecodeAccelerator::Client { +class GLRenderingVDAClient + : public VideoDecodeAccelerator::Client, + public base::SupportsWeakPtr { public: // Doesn't take ownership of |rendering_helper| or |note|, which must outlive // |*this|. @@ -606,8 +608,11 @@ void GLRenderingVDAClient::CreateDecoder() { CHECK(!decoder_.get()); VideoDecodeAccelerator::Client* client = this; - if (throttling_client_) + base::WeakPtr weak_client = AsWeakPtr(); + if (throttling_client_) { client = throttling_client_.get(); + weak_client = throttling_client_->AsWeakPtr(); + } #if defined(OS_WIN) decoder_.reset( new DXVAVideoDecodeAccelerator(client, base::Bind(&DoNothingReturnTrue))); @@ -617,6 +622,7 @@ void GLRenderingVDAClient::CreateDecoder() { static_cast(rendering_helper_->GetGLDisplay()), static_cast(rendering_helper_->GetGLContext()), client, + weak_client, base::Bind(&DoNothingReturnTrue), base::MessageLoopProxy::current())); #elif defined(ARCH_CPU_X86_FAMILY)