Skip to content

Commit

Permalink
my basic h264 decoder using libav
Browse files Browse the repository at this point in the history
  • Loading branch information
DaWelter committed Dec 6, 2016
0 parents commit be980eb
Show file tree
Hide file tree
Showing 8 changed files with 527 additions and 0 deletions.
59 changes: 59 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Other stuff
.svn
*.kdev4
*~
build
buildopt
trash
.spyderworkspace
.spyderproject
*.kate-swp
*.swp # create by vim
*.log

#CMake stuff
CMakeCache.txt
CMakeFiles
Makefile
cmake_install.cmake
install_manifest.txt

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]


# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Fortran module files
*.mod

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app
25 changes: 25 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
cmake_minimum_required(VERSION 2.8)
project(python_h264decoder)

set(Python_ADDITIONAL_VERSIONS 2.7)
find_package(PythonInterp 2.7 REQUIRED)
find_package(PythonLibs 2.7 REQUIRED )
find_package(Boost REQUIRED COMPONENTS "python")

include_directories(${PYTHON_INCLUDE_DIRS})
include_directories(${Boost_INCLUDE_DIRS})
link_directories(${Boost_LIBRARY_DIRS})

add_compile_options ("-std=c++0x")

add_executable(h264decoder_test1 h264decoder.cpp h264decoder_test1.cpp)
target_link_libraries(h264decoder_test1 avcodec swscale avutil)

add_library(h264decoder SHARED h264decoder.cpp h264decoder_python.cpp)
target_link_libraries(h264decoder avcodec swscale avutil ${Boost_PYTHON_LIBRARY_RELEASE} ${PYTHON_LIBRARIES})


add_custom_command(TARGET h264decoder POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_BINARY_DIR}/libh264decoder.so ${CMAKE_SOURCE_DIR}/libh264decoder.so)
install(TARGETS h264decoder LIBRARY DESTINATION .)
157 changes: 157 additions & 0 deletions h264decoder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/* I'm such a bad script kiddie. This code is entirely based on roxlu's code http://roxlu.com/2014/039/decoding-h264-and-yuv420p-playback
*/


#include <vector>
#include <stdexcept>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/avutil.h>
#include <libavutil/mem.h>
#include <libswscale/swscale.h>
}

#include "h264decoder.hpp"

typedef unsigned char ubyte;
typedef unsigned long ulong;


H264Decoder::H264Decoder()
{
avcodec_register_all();

codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec)
throw std::runtime_error("cannot find decoder");

context = avcodec_alloc_context3(codec);
if (!context)
throw std::runtime_error("cannot allocate context");

if(codec->capabilities & CODEC_CAP_TRUNCATED) {
context->flags |= CODEC_FLAG_TRUNCATED;
}

int err = avcodec_open2(context, codec, nullptr);
if (err < 0)
throw std::runtime_error("cannot open context");

parser = av_parser_init(AV_CODEC_ID_H264);
if (!parser)
throw std::runtime_error("cannot init parser");

frame = av_frame_alloc();
if (!frame)
throw std::runtime_error("cannot allocate frame");
}


H264Decoder::~H264Decoder()
{
av_parser_close(parser);
avcodec_close(context);
av_free(context);
av_frame_free(&frame);
}


const AVFrame* H264Decoder::next(const ubyte* in_data, ulong in_size)
{
AVPacket pkt_mem;
auto pkt = &pkt_mem; // might want to use heap allocated pointer later on ...
av_init_packet(pkt);

buffer.insert(buffer.end(), in_data, in_data + in_size);

int nread = av_parser_parse2(parser, context, &pkt->data, &pkt->size,
buffer.size() ? &buffer[0] : nullptr, buffer.size(),
0, 0, AV_NOPTS_VALUE);

//printf("inserted %ld bytes in buffer of subsequent size %ld, of which %i bytes were consumed\n", in_size, buffer.size(), nread);

// I'm guestimating that 4 out of 5 times (nread == buffer.size())
// There may be some optimization opportunity here.
buffer.erase(buffer.begin(), buffer.begin() + nread);

// size and buffer refer to a buffer with data of a new frame. But only if all data for that frame is present. Otherwise these vars are zeroed out.
if (pkt->size && pkt->data)
{
int got_picture = 0;

nread = avcodec_decode_video2(context, frame, &got_picture, pkt);
if (nread < 0)
throw std::runtime_error("error decoding frame\n");

if (got_picture)
{
return frame;
}
}

return nullptr;
}




ConverterRGB24::ConverterRGB24()
{
framergb = av_frame_alloc();
if (!framergb)
throw std::runtime_error("cannot allocate frame");
context = nullptr;
}

ConverterRGB24::~ConverterRGB24()
{
sws_freeContext(context);
av_frame_free(&framergb);
}


const AVFrame* ConverterRGB24::convert(const AVFrame *frame, ubyte* out_rgb)
{
int w = frame->width;
int h = frame->height;
int pix_fmt = frame->format;

context = sws_getCachedContext(context,
w, h, (AVPixelFormat)pix_fmt,
w, h, PIX_FMT_RGB24, SWS_BILINEAR,
nullptr, nullptr, nullptr);
if (!context)
throw std::runtime_error("cannot allocate context");

// Setup framergb with out_rgb as external buffer. Also say that we want RGB24 output.
avpicture_fill((AVPicture*)framergb, out_rgb, PIX_FMT_RGB24, w, h);
// Do the conversion.
sws_scale(context, frame->data, frame->linesize, 0, h,
framergb->data, framergb->linesize);
framergb->width = w;
framergb->height = h;
return framergb;
}

/*
Returns, given a width and height, how many bytes the frame buffer is going to need.
* WARNING:
* avpicture_get_size is used in http://dranger.com/ffmpeg/tutorial01.html
* to determine the size of the output frame buffer. However, avpicture_get_size returns
* the size of a compact representation, without padding bytes. On the other hand,
* avpicture_fill will require a larger buffer when linesize > width.
*/
int ConverterRGB24::predict_size(int w, int h)
{
return avpicture_fill((AVPicture*)framergb, nullptr, PIX_FMT_RGB24, w, h);
}



void disable_logging()
{
av_log_set_level(AV_LOG_QUIET);
}

62 changes: 62 additions & 0 deletions h264decoder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#ifndef H264DECODER_HPP
#define H264DECODER_HPP

#include <vector>

struct AVCodecContext;
struct AVFrame;
struct AVCodec;
struct AVCodecParserContext;
struct SwsContext;
struct AVPacket;

class H264Decoder
{
AVCodecContext *context;
AVFrame *frame;
AVCodec *codec;
AVCodecParserContext *parser;
// In the documentation example on the github master branch, the packet is put on the heap.
// In release 11 it is put on the stack, which is what we do here, too.
//AVPacket *pkt;
std::vector<unsigned char> buffer;
public:
H264Decoder();
~H264Decoder();
//ulong put_data(const unsigned char* in_data, ulong in_size); ???
//bool is_frame_available() const; ???
//const AVFrame* decode_frame(); ???

const AVFrame* next(const unsigned char* in_data, ulong in_size);
};

// TODO: Rename OutputStage?
class ConverterRGB24
{
SwsContext *context;
AVFrame *framergb;

public:
ConverterRGB24();
~ConverterRGB24();

int predict_size(int w, int h);
const AVFrame* convert(const AVFrame *frame, unsigned char* out_rgb);
};

void disable_logging();

/* all the documentation links
* My version of libav on ubuntu 16 appears to be from the release/11 branch on github
* Video decoding example: https://libav.org/documentation/doxygen/release/11/avcodec_8c_source.html#l00455
*
* https://libav.org/documentation/doxygen/release/9/group__lavc__decoding.html
* https://libav.org/documentation/doxygen/master/group__lavc__parsing.html
* https://libav.org/documentation/doxygen/release/11/group__lavc__parsing.html
* https://libav.org/documentation/doxygen/release/9/swscale_8h.html
* https://libav.org/documentation/doxygen/release/9/group__lavu.html
* https://libav.org/documentation/doxygen/release/9/group__lavc__picture.html
* http://dranger.com/ffmpeg/tutorial01.html
*/

#endif
36 changes: 36 additions & 0 deletions h264decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python2

import os
import sys
import numpy as np

import libh264decoder

import matplotlib.pyplot as pyplot

decoder = libh264decoder.H264Decoder()

fig, ax = pyplot.subplots(1,1)
img = None

f = open('testclip.h264','r')
while 1:
data_in = f.read(1024)
frame, w, h, ls = decoder.decode(data_in)

if frame is not None:
print 'frame size %i bytes, w %i, h %i, linesize %i' % (len(frame), w, h, ls)
frame = np.fromstring(frame, dtype = np.ubyte, count = len(frame), sep = '')
frame = frame.reshape((h, ls/3, 3))
frame = frame[:,:w,:]

if not img:
img = ax.imshow(frame)
pyplot.show(block = False)
else:
img.set_data(frame)
pyplot.draw()
pyplot.pause(0.001)

if not data_in:
break
30 changes: 30 additions & 0 deletions h264decoder_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env python2

import os
import sys
import numpy as np
import time

import libh264decoder

decoder = libh264decoder.H264Decoder()

t0 = time.time()
num_frames = 0

f = open('testclip.h264','r')
while 1:
data_in = f.read(1024)
frame, w, h, ls = decoder.decode(data_in)

if frame is not None:
frame = np.fromstring(frame, dtype = np.ubyte, count = len(frame), sep = '') # this conversion drops fps from 200 to 150
frame = frame.reshape((h, ls/3, 3))
frame = frame[:,:w,:]
num_frames += 1

if not data_in:
break
print '\n',
t1 = time.time()
print 'fps = ', (num_frames/(t1-t0))
Loading

0 comments on commit be980eb

Please sign in to comment.