my basic h264 decoder using libav

DaWelter · Dec 6, 2016 · be980eb · be980eb
commit be980eb
Show file tree

Hide file tree

Showing 8 changed files with 527 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,59 @@
+# Other stuff
+.svn
+*.kdev4
+*~
+build
+buildopt
+trash
+.spyderworkspace
+.spyderproject
+*.kate-swp
+*.swp # create by vim
+*.log
+
+#CMake stuff
+CMakeCache.txt
+CMakeFiles
+Makefile
+cmake_install.cmake
+install_manifest.txt
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,25 @@
+cmake_minimum_required(VERSION 2.8)
+project(python_h264decoder)
+
+set(Python_ADDITIONAL_VERSIONS 2.7)
+find_package(PythonInterp 2.7 REQUIRED)
+find_package(PythonLibs 2.7 REQUIRED )
+find_package(Boost REQUIRED COMPONENTS "python")
+
+include_directories(${PYTHON_INCLUDE_DIRS})
+include_directories(${Boost_INCLUDE_DIRS})
+link_directories(${Boost_LIBRARY_DIRS})
+
+add_compile_options ("-std=c++0x")
+
+add_executable(h264decoder_test1 h264decoder.cpp h264decoder_test1.cpp)
+target_link_libraries(h264decoder_test1 avcodec swscale avutil)
+
+add_library(h264decoder SHARED h264decoder.cpp h264decoder_python.cpp)
+target_link_libraries(h264decoder avcodec swscale avutil ${Boost_PYTHON_LIBRARY_RELEASE} ${PYTHON_LIBRARIES})
+
+
+add_custom_command(TARGET h264decoder POST_BUILD
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink 
+                   ${CMAKE_BINARY_DIR}/libh264decoder.so ${CMAKE_SOURCE_DIR}/libh264decoder.so)
+install(TARGETS h264decoder LIBRARY DESTINATION .)
diff --git a/h264decoder.cpp b/h264decoder.cpp
@@ -0,0 +1,157 @@
+/* I'm such a bad script kiddie. This code is entirely based on roxlu's code http://roxlu.com/2014/039/decoding-h264-and-yuv420p-playback
+*/
+
+
+#include <vector>
+#include <stdexcept>
+
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavutil/avutil.h>
+#include <libavutil/mem.h>
+#include <libswscale/swscale.h>
+}
+
+#include "h264decoder.hpp"
+
+typedef unsigned char ubyte;
+typedef unsigned long ulong;
+
+
+H264Decoder::H264Decoder()
+{
+  avcodec_register_all();
+
+  codec = avcodec_find_decoder(AV_CODEC_ID_H264);
+  if (!codec)
+    throw std::runtime_error("cannot find decoder");
+
+  context = avcodec_alloc_context3(codec);
+  if (!context)
+    throw std::runtime_error("cannot allocate context");
+
+  if(codec->capabilities & CODEC_CAP_TRUNCATED) {
+    context->flags |= CODEC_FLAG_TRUNCATED;
+  }  
+
+  int err = avcodec_open2(context, codec, nullptr);
+  if (err < 0)
+    throw std::runtime_error("cannot open context");
+
+  parser = av_parser_init(AV_CODEC_ID_H264);
+  if (!parser)
+    throw std::runtime_error("cannot init parser");
+
+  frame = av_frame_alloc();
+  if (!frame)
+    throw std::runtime_error("cannot allocate frame");
+}
+
+
+H264Decoder::~H264Decoder()
+{
+  av_parser_close(parser);
+  avcodec_close(context);
+  av_free(context);
+  av_frame_free(&frame);
+}
+
+
+const AVFrame* H264Decoder::next(const ubyte* in_data, ulong in_size)
+{ 
+  AVPacket pkt_mem;
+  auto pkt = &pkt_mem; // might want to use heap allocated pointer later on ...
+  av_init_packet(pkt);  
+
+  buffer.insert(buffer.end(), in_data, in_data + in_size);
+
+  int nread = av_parser_parse2(parser, context, &pkt->data, &pkt->size, 
+                               buffer.size() ? &buffer[0] : nullptr, buffer.size(), 
+                               0, 0, AV_NOPTS_VALUE);
+
+  //printf("inserted %ld bytes in buffer of subsequent size %ld, of which %i bytes were consumed\n", in_size, buffer.size(), nread);
+
+  // I'm guestimating that 4 out of 5 times (nread == buffer.size())
+  // There may be some optimization opportunity here.
+  buffer.erase(buffer.begin(), buffer.begin() + nread);
+
+  // size and buffer refer to a buffer with data of a new frame. But only if all data for that frame is present. Otherwise these vars are zeroed out. 
+  if (pkt->size && pkt->data)
+  {
+    int got_picture = 0;
+
+    nread = avcodec_decode_video2(context, frame, &got_picture, pkt);
+    if (nread < 0)
+      throw std::runtime_error("error decoding frame\n");
+
+    if (got_picture)
+    {
+      return frame;
+    }
+  }
+
+  return nullptr;
+}
+
+
+
+
+ConverterRGB24::ConverterRGB24()
+{
+  framergb = av_frame_alloc();
+  if (!framergb)
+    throw std::runtime_error("cannot allocate frame");
+  context = nullptr;
+}
+
+ConverterRGB24::~ConverterRGB24()
+{
+  sws_freeContext(context);
+  av_frame_free(&framergb);
+}
+
+
+const AVFrame* ConverterRGB24::convert(const AVFrame *frame, ubyte* out_rgb)
+{
+  int w = frame->width;
+  int h = frame->height;
+  int pix_fmt = frame->format;
+
+  context = sws_getCachedContext(context, 
+                                 w, h, (AVPixelFormat)pix_fmt, 
+                                 w, h, PIX_FMT_RGB24, SWS_BILINEAR, 
+                                 nullptr, nullptr, nullptr);
+  if (!context)
+    throw std::runtime_error("cannot allocate context");
+
+  // Setup framergb with out_rgb as external buffer. Also say that we want RGB24 output.
+  avpicture_fill((AVPicture*)framergb, out_rgb, PIX_FMT_RGB24, w, h);
+  // Do the conversion.
+  sws_scale(context, frame->data, frame->linesize, 0, h,
+            framergb->data, framergb->linesize);
+  framergb->width = w;
+  framergb->height = h;
+  return framergb;
+}
+
+/*
+ Returns, given a width and height, how many bytes the frame buffer is going to need.
+
+ * WARNING:
+ * avpicture_get_size is used in http://dranger.com/ffmpeg/tutorial01.html 
+ * to determine the size of the output frame buffer. However, avpicture_get_size returns
+ * the size of a compact representation, without padding bytes. On the other hand,
+ * avpicture_fill will require a larger buffer when linesize > width.
+ */
+int ConverterRGB24::predict_size(int w, int h)
+{
+  return avpicture_fill((AVPicture*)framergb, nullptr, PIX_FMT_RGB24, w, h);  
+}
+
+
+
+void disable_logging()
+{
+  av_log_set_level(AV_LOG_QUIET);
+}
+
diff --git a/h264decoder.hpp b/h264decoder.hpp
@@ -0,0 +1,62 @@
+#ifndef H264DECODER_HPP
+#define H264DECODER_HPP
+
+#include <vector>
+
+struct AVCodecContext;
+struct AVFrame;
+struct AVCodec;
+struct AVCodecParserContext;
+struct SwsContext;
+struct AVPacket;
+
+class H264Decoder
+{
+  AVCodecContext        *context;
+  AVFrame               *frame;
+  AVCodec               *codec;
+  AVCodecParserContext  *parser;
+  // In the documentation example on the github master branch, the packet is put on the heap.
+  // In release 11 it is put on the stack, which is what we do here, too.
+  //AVPacket              *pkt;
+  std::vector<unsigned char>  buffer;
+public:
+  H264Decoder();
+  ~H264Decoder();
+  //ulong put_data(const unsigned char* in_data, ulong in_size); ???
+  //bool is_frame_available() const; ???
+  //const AVFrame* decode_frame(); ???
+
+  const AVFrame* next(const unsigned char* in_data, ulong in_size);
+};
+
+// TODO: Rename OutputStage?
+class ConverterRGB24
+{
+  SwsContext *context;
+  AVFrame *framergb;
+
+public:
+  ConverterRGB24();
+  ~ConverterRGB24();
+
+  int predict_size(int w, int h);
+  const AVFrame* convert(const AVFrame *frame, unsigned char* out_rgb);
+};
+
+void disable_logging();
+
+/* all the documentation links
+ * My version of libav on ubuntu 16 appears to be from the release/11 branch on github
+ * Video decoding example: https://libav.org/documentation/doxygen/release/11/avcodec_8c_source.html#l00455
+ * 
+ * https://libav.org/documentation/doxygen/release/9/group__lavc__decoding.html
+ * https://libav.org/documentation/doxygen/master/group__lavc__parsing.html
+ * https://libav.org/documentation/doxygen/release/11/group__lavc__parsing.html
+ * https://libav.org/documentation/doxygen/release/9/swscale_8h.html
+ * https://libav.org/documentation/doxygen/release/9/group__lavu.html
+ * https://libav.org/documentation/doxygen/release/9/group__lavc__picture.html
+ * http://dranger.com/ffmpeg/tutorial01.html
+ */
+
+#endif
diff --git a/h264decoder.py b/h264decoder.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python2
+
+import os
+import sys
+import numpy as np
+
+import libh264decoder
+
+import matplotlib.pyplot as pyplot
+
+decoder = libh264decoder.H264Decoder()
+
+fig, ax = pyplot.subplots(1,1)
+img = None
+
+f = open('testclip.h264','r')
+while 1:
+  data_in = f.read(1024)
+  frame, w, h, ls = decoder.decode(data_in)
+
+  if frame is not None:
+    print 'frame size %i bytes, w %i, h %i, linesize %i' % (len(frame), w, h, ls)
+    frame = np.fromstring(frame, dtype = np.ubyte, count = len(frame), sep = '')
+    frame = frame.reshape((h, ls/3, 3))
+    frame = frame[:,:w,:]
+
+    if not img:
+      img = ax.imshow(frame)
+      pyplot.show(block = False)
+    else:
+      img.set_data(frame)
+      pyplot.draw()
+    pyplot.pause(0.001)
+
+  if not data_in:
+    break
diff --git a/h264decoder_perf.py b/h264decoder_perf.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python2
+
+import os
+import sys
+import numpy as np
+import time
+
+import libh264decoder
+
+decoder = libh264decoder.H264Decoder()
+
+t0 = time.time()
+num_frames = 0
+
+f = open('testclip.h264','r')
+while 1:
+  data_in = f.read(1024)
+  frame, w, h, ls = decoder.decode(data_in)
+
+  if frame is not None:
+    frame = np.fromstring(frame, dtype = np.ubyte, count = len(frame), sep = '') # this conversion drops fps from 200 to 150
+    frame = frame.reshape((h, ls/3, 3))
+    frame = frame[:,:w,:]
+    num_frames += 1
+
+  if not data_in:
+    break
+print '\n',
+t1 = time.time()
+print 'fps = ', (num_frames/(t1-t0))