Skip to content

Commit

Permalink
prepare for 1.2.0 release
Browse files Browse the repository at this point in the history
  • Loading branch information
K-os committed Sep 21, 2021
1 parent 1c5070f commit 4924a86
Show file tree
Hide file tree
Showing 42 changed files with 2,509 additions and 953 deletions.
37 changes: 34 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ if( NOT CMAKE_VERSION VERSION_LESS 3.12.0 )
endif()

# project name
project( vvdec VERSION 1.1.2 )
project( vvdec VERSION 1.2.0 )

set( VVDEC_ENABLE_X86_SIMD TRUE )
set( VVDEC_ENABLE_ARM_SIMD FALSE )
set( VVDEC_ENABLE_X86_SIMD TRUE CACHE BOOL "enable x86 intrinsics" )
set( VVDEC_ENABLE_ARM_SIMD FALSE CACHE BOOL "enable arm intrinsics" )

if( APPLE )
if( DEFINED CMAKE_OSX_ARCHITECTURES )
Expand Down Expand Up @@ -49,6 +49,37 @@ if( VVDEC_ENABLE_ARM_SIMD )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTARGET_SIMD_ARM" )
endif()

if( ${CMAKE_SYSTEM_NAME} STREQUAL "Emscripten" )
set( VVDEC_TARGET_WASM TRUE )
# make Emscripten output a html wrapper
#set( CMAKE_EXECUTABLE_SUFFIX ".html" )

add_compile_definitions( TARGET_SIMD_WASM )
add_compile_options( -msimd128 ) # currently breaks running in nodejs (firefox & chrome work)

add_compile_options( -pthread )

add_link_options(
--bind
-sWASM_BIGINT
-sTOTAL_MEMORY=2000MB # this should be enough for FullHD decoding
-sINVOKE_RUN=0 # don't call main() automatically
-sUSE_PTHREADS
-sPROXY_TO_PTHREAD
#-sPTHREAD_POOL_SIZE=20
-sMINIFY_HTML=0
-sMODULARIZE
-sEXPORT_NAME=CreateVVdeC
-sEXPORTED_RUNTIME_METHODS=[ccall,cwrap,getValue,setValue,_malloc,callMain,FS]
-sEXPORTED_FUNCTIONS=@${CMAKE_CURRENT_SOURCE_DIR}/source/Lib/vvdec/wasm_exported_functions.json
# --shell-file=${CMAKE_CURRENT_SOURCE_DIR}/web_player/shell.html
# --preload-file=${CMAKE_CURRENT_SOURCE_DIR}/ext/bitstreams/AMVR_B_HHI_3/AMVR_B_HHI_3.bit@in.bit
# --pre-js=${CMAKE_CURRENT_SOURCE_DIR}/pre.js
)
endif()



list( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" )
message( STATUS "CMAKE_MODULE_PATH: updating module path to: ${CMAKE_MODULE_PATH}" )

Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,25 @@ The above call only tests the sequences that are know to work. To run a test ove

make test-all

## Build WebAssembly using Emscripten

Install the Emscripten emsdk as documented on the website https://emscripten.org and activate the latest version (tested with 2.0.25).

Ensure the environment variables are set up correctly (e.g. run `source emsdk_env.sh` in the current shell).

Configure the VVdeC project:

emcmake cmake -B build/wasm

And build the project:

cmake --build build/wasm

The produced output consists of the `vvdecapp.wasm` binary and the corresponding javascript helpers (`vvdecapp.js`, `vvdecapp.worker.js`).

When importing vvdecapp.js, the function `CreateVVdeC()` creates an instance of the vvdec module, which exposes a similar API to the one defined in `include/vvdec/vvdec.h`.


# Contributing

Feel free to contribute. To do so:
Expand Down
1 change: 1 addition & 0 deletions cmake/modules/check_missing_intrinsics.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ macro( check_intrinsic symbol_name code )
endif()
endmacro()

check_intrinsic( _mm_storeu_si16 "int16_t a = 0; __m128i x = _mm_setzero_si128(); _mm_storeu_si16( &a, x );" )
check_intrinsic( _mm_storeu_si32 "int32_t a = 0; __m128i x = _mm_setzero_si128(); _mm_storeu_si32( &a, x );" )
check_intrinsic( _mm_storeu_si64 "int64_t a = 0; __m128i x = _mm_setzero_si128(); _mm_storeu_si64( &a, x );" )
check_intrinsic( _mm_loadu_si32 "int32_t a = 0; __m128i x = _mm_setzero_si128(); x = _mm_loadu_si32( &a );" )
Expand Down
1 change: 1 addition & 0 deletions include/vvdec/vvdec.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ typedef struct vvdecParams
vvdecRPRUpscaling upscaleOutput; // do internal upscaling of rpl pictures to dest. resolution ( default: 0 )
vvdecLogLevel logLevel; // verbosity level
bool verifyPictureHash; // verify picture, if digest is available, true: check hash in SEI messages if available, false: ignore SEI message
bool removePadding; // copy output pictures to new buffer to remove padding (stride==width) ( default: false )
vvdecSIMD_Extension simd; // set specific simd optimization (default: max. availalbe)
} vvdecParams;

Expand Down
3 changes: 3 additions & 0 deletions source/App/vvdecapp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ target_compile_options( ${EXE_NAME} PRIVATE $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CX
target_link_libraries( ${EXE_NAME} Threads::Threads vvdec )
target_include_directories( ${EXE_NAME} PRIVATE ../../Lib/libmd5 )

if( ${CMAKE_SYSTEM_NAME} STREQUAL "Emscripten" )
set_target_properties( ${EXE_NAME} PROPERTIES LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../../Lib/vvdec/wasm_exported_functions.json;" )
endif()

# example: place header files in different folders
source_group( "Header Files" FILES ${INC_FILES} )
Expand Down
1 change: 1 addition & 0 deletions source/App/vvdecapp/vvdecapp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ int main( int argc, char* argv[] )
if( pcFrame->picAttributes )
{
uiBitrate += pcFrame->picAttributes->bits;
(void)uiBitrate;
}

#if 0 // just sample code to retrieve sei messages
Expand Down
2 changes: 1 addition & 1 deletion source/Lib/CommonLib/AdaptiveLoopFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ void AdaptiveLoopFilter::filterCTU( const CPelUnitBuf& srcBuf,
if( chType < MAX_NUM_CHANNEL_TYPE && toChannelType( compID ) != chType )
continue;
#if ALF_FIX
if( !ctuEnableFlag[compIdx] && !slice->getTileGroupCcAlfEnabledFlag( compIdx-1 ) )
if( !ctuEnableFlag[compIdx] && ( compIdx == 0 || !slice->getTileGroupCcAlfEnabledFlag( compIdx-1 ) ) )
#else
if( !ctuEnableFlag[compIdx] )
#endif
Expand Down
42 changes: 6 additions & 36 deletions source/Lib/CommonLib/Buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,33 +193,6 @@ void copyBufferCore( const char *src, ptrdiff_t srcStride, char *dst, ptrdiff_t
}
}

template<int padSize>
void paddingCore(Pel *ptr, ptrdiff_t stride, int width, int height)
{
/*left and right padding*/
Pel *ptrTemp1 = ptr;
Pel *ptrTemp2 = ptr + (width - 1);
ptrdiff_t offset = 0;
for (int i = 0; i < height; i++)
{
offset = stride * i;
for (int j = 1; j <= padSize; j++)
{
*(ptrTemp1 - j + offset) = *(ptrTemp1 + offset);
*(ptrTemp2 + j + offset) = *(ptrTemp2 + offset);
}
}
/*Top and Bottom padding*/
int numBytes = (width + padSize + padSize) * sizeof(Pel);
ptrTemp1 = (ptr - padSize);
ptrTemp2 = (ptr + (stride * (height - 1)) - padSize);
for (int i = 1; i <= padSize; i++)
{
memcpy(ptrTemp1 - (i * stride), (ptrTemp1), numBytes);
memcpy(ptrTemp2 + (i * stride), (ptrTemp2), numBytes);
}
}

void applyLutCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const Pel* lut )
{
// const auto rsp_sgnl_op = [=, &dst]( int ADDR ){ dst[ADDR] = lut[dst[ADDR]]; };
Expand All @@ -236,7 +209,6 @@ void applyLutCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const P
#undef RSP_SGNL_INC
}


void fillN_CuCore( CodingUnit** ptr, ptrdiff_t ptrStride, int width, int height, CodingUnit* cuPtr )
{
if( width == ptrStride )
Expand Down Expand Up @@ -358,13 +330,13 @@ PelBufferOps::PelBufferOps()
wghtAvg8 = addWeightedAvgCore<Pel>;

copyBuffer = copyBufferCore;
padding2 = paddingCore<2>;
padding1 = paddingCore<1>;

transpose4x4 = transpose4x4Core<Pel>;
transpose8x8 = transpose8x8Core<Pel>;

applyLut = applyLutCore;
rspFwd = nullptr;
rspBcw = nullptr;

fillN_CU = fillN_CuCore;

Expand Down Expand Up @@ -430,12 +402,6 @@ void AreaBuf<Pel>::rescaleBuf( const AreaBuf<const Pel>& beforeScaling, Componen
isLuma( compID ) ? 1 : horCollocatedChromaFlag, isLuma( compID ) ? 1 : verCollocatedChromaFlag );
}

template<>
void AreaBuf<Pel>::rspSignal( const Pel* lut )
{
g_pelBufOP.applyLut( buf, stride, width, height, lut );
}

template<>
void AreaBuf<Pel>::scaleSignal(const int scale, const ClpRng& clpRng)
{
Expand Down Expand Up @@ -716,7 +682,11 @@ void PelStorage::create( const ChromaFormat _chromaFormat, const Size& _size, co
totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment;
}

#if ENABLE_SIMD_OPT_INTER
uint32_t area = totalWidth * totalHeight + 1; // +1 for the extra Pel overread in prefetchPad_SSE, in case reading from the very bottom right of the picture
#else
uint32_t area = totalWidth * totalHeight;
#endif
CHECK( !area, "Trying to create a buffer with zero area" );

m_origSi[i] = Size{ totalWidth, totalHeight };
Expand Down
11 changes: 4 additions & 7 deletions source/Lib/CommonLib/Buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,11 @@ THE POSSIBILITY OF SUCH DAMAGE.
namespace vvdec
{

#if ENABLE_SIMD_OPT_BUFFER
struct PelBufferOps
{
PelBufferOps();

#ifdef TARGET_SIMD_X86
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_OPT_BUFFER
void initPelBufOpsX86();
template<X86_VEXT vext>
void _initPelBufOpsX86();
Expand All @@ -85,11 +84,11 @@ struct PelBufferOps
void ( *wghtAvg4 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, int w0, int w1, const ClpRng& clpRng );
void ( *wghtAvg8 ) ( const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, int width, int height, int shift, int offset, int w0, int w1, const ClpRng& clpRng );
void ( *copyBuffer ) ( const char*src, ptrdiff_t srcStride, char* dst, ptrdiff_t dstStride, int width, int height );
void ( *padding1 ) ( Pel *dst, ptrdiff_t stride, int width, int height );
void ( *padding2 ) ( Pel *dst, ptrdiff_t stride, int width, int height );
void ( *transpose4x4 ) ( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride );
void ( *transpose8x8 ) ( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride );
void ( *applyLut ) ( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const Pel* lut );
void ( *rspFwd ) ( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const int bd, const Pel OrgCW, const Pel* LmcsPivot, const Pel* ScaleCoeff, const Pel* InputPivot );
void ( *rspBcw ) ( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const int bd, const int minBin, const int maxBin, const Pel* LmcsPivot, const Pel* InvScCoeff, const Pel* InputPivot );
void ( *fillN_CU ) ( CodingUnit** ptr, ptrdiff_t ptrStride, int width, int height, CodingUnit* cuPtr );

void (*sampleRateConv) ( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale,
Expand All @@ -99,7 +98,6 @@ struct PelBufferOps
const int afterScaleLeftOffset, const int afterScaleTopOffset,
const int bitDepth, const bool useLumaFilter, const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag );
};
#endif

extern PelBufferOps g_pelBufOP;

Expand Down Expand Up @@ -146,8 +144,7 @@ struct AreaBuf : public Size

void transposedFrom ( const AreaBuf<const T> &other );

void rspSignal ( const Pel *lut );
void scaleSignal ( const int scale, const ClpRng& clpRng);
void scaleSignal ( const int scale, const ClpRng& clpRng );

void rescaleBuf ( const AreaBuf<const T>& beforeScaling, const ComponentID compID, const std::pair<int, int> scalingRatio, const Window& confBefore, const Window& confAfter, const ChromaFormat chromaFormatIDC, const BitDepths& bitDepths, const bool horCollocatedChromaFlag = false, const bool verCollocatedChromaFlag = false );

Expand Down
31 changes: 22 additions & 9 deletions source/Lib/CommonLib/CodingStructure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,18 +166,22 @@ CodingUnit& CodingStructure::addCU( const UnitArea &unit, const ChannelType chTy

if( i )
{
m_predBufOffset += ( cuArea << 1 );
m_predBufOffset += (cuArea << 1);
}
else
{
m_predBufOffset += cuArea;
}

const ptrdiff_t stride = ptrdiff_t( 1 ) << m_ctuWidthLog2[i];
const Area &_blk = cu-> blocks[i];
const Area& _blk = cu->blocks[i];
const UnitScale scale = unitScale[i];
const int sclX = scale.scaleHor( _blk.x );
const int sclY = scale.scaleVer( _blk.y );
const int sclW = scale.scaleHor( _blk.width );
const int sclH = scale.scaleVer( _blk.height );

g_pelBufOP.fillN_CU( ctuData.cuPtr[i] + inCtuPos( _blk, ChannelType( i ) ), stride, scale.scaleHor( _blk.width ), scale.scaleVer( _blk.height ), cu );
g_pelBufOP.fillN_CU( ctuData.cuPtr[i] + ( sclX & m_ctuSizeMask[i] ) + ( ( sclY & m_ctuSizeMask[i] ) << m_ctuWidthLog2[i] ), stride, sclW, sclH, cu );

if( i == chType )
{
Expand Down Expand Up @@ -488,18 +492,27 @@ const CodingUnit* CodingStructure::getCURestricted( const Position &pos, const P
{
const int yshift = pcv->maxCUWidthLog2 - getChannelTypeScaleY( _chType, area.chromaFormat );
const int ydiff = ( pos.y >> yshift ) - ( curPos.y >> yshift ); // ( a <= b ) ==> a - b <= 0
const CodingUnit* cu = ydiff <= 0 ? getCU( pos, _chType ) : nullptr;
const int xshift = pcv->maxCUWidthLog2 - getChannelTypeScaleX( _chType, area.chromaFormat );
const int xdiff = ( pos.x >> xshift ) - ( curPos.x >> xshift );
const bool sameCTU = !ydiff && !xdiff;

if( cu && ( sameCTU || ( cu->slice->getIndependentSliceIdx() == curSliceIdx && cu->tileIdx == curTileIdx ) ) )
const CodingUnit* cu = nullptr;

if( sameCTU )
{
if( xdiff > 0 && sps->getEntropyCodingSyncEnabledFlag() )
{
return nullptr;
}
return getCU( pos, _chType );
}
else if( ydiff > 0 || xdiff > ( 1 - sps->getEntropyCodingSyncEnabledFlag() ) )
{
return nullptr;
}
else
{
cu = getCU( pos, _chType );
}

if( cu && cu->slice->getIndependentSliceIdx() == curSliceIdx && cu->tileIdx == curTileIdx )
{
return cu;
}
else
Expand Down
45 changes: 27 additions & 18 deletions source/Lib/CommonLib/CommonDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,21 @@ THE POSSIBILITY OF SUCH DAMAGE.
#include <iostream>
#include <iomanip>
#include <limits>
#include <cmath> // needed for std::log2()

#include <functional>
#include <mutex>

#if defined( TARGET_SIMD_X86 )
# ifdef _WIN32
# include <intrin.h>
# elif defined( __GNUC__ ) && !defined( TARGET_SIMD_WASM )
# include <x86intrin.h>
# elif defined( __GNUC__ )
# include <immintrin.h>
# endif
#endif // TARGET_SIMD_X86

namespace vvdec
{

Expand Down Expand Up @@ -346,7 +357,7 @@ static const int BIO_TEMP_BUFFER_SIZE = (MAX_BDOF_AP
static const int PROF_BORDER_EXT_W = 1;
static const int PROF_BORDER_EXT_H = 1;
static const int BCW_NUM = 5; ///< the number of weight options
static const int BCW_DEFAULT = ((uint8_t)(BCW_NUM >> 1)); ///< Default weighting index representing for w=0.5
static const int BCW_DEFAULT = 0; ///< Default weighting index representing for w=0.5, in the internal domain
static const int BCW_SIZE_CONSTRAINT = 256; ///< disabling Bcw if cu size is smaller than 256
static const int MAX_NUM_HMVP_CANDS = (MRG_MAX_NUM_CANDS-1); ///< maximum number of HMVP candidates to be stored and used in merge list
static const int MAX_NUM_HMVP_AVMPCANDS = 4; ///< maximum number of HMVP candidates to be used in AMVP list
Expand Down Expand Up @@ -558,42 +569,40 @@ template <typename ValueType> inline ValueType rightShift (const ValueType
template <typename ValueType> inline ValueType leftShift_round (const ValueType value, const int shift) { return (shift >= 0) ? ( value << shift) : ((value + (ValueType(1) << (-shift - 1))) >> -shift); }
template <typename ValueType> inline ValueType rightShift_round(const ValueType value, const int shift) { return (shift >= 0) ? ((value + (ValueType(1) << (shift - 1))) >> shift) : ( value << -shift); }

#ifdef TARGET_SIMD_X86
#ifdef _WIN32
}
# include <intrin.h>
namespace vvdec {
static inline unsigned long _bit_scan_reverse( long a )
#if defined( _WIN32 ) && defined( TARGET_SIMD_X86 )
static inline unsigned int bit_scan_reverse( int a )
{
unsigned long idx = 0;
_BitScanReverse( &idx, a );
return idx;
}
#else
#elif defined( __GNUC__ ) && defined( TARGET_SIMD_X86 ) && !defined( TARGET_SIMD_WASM )
static inline unsigned int bit_scan_reverse( int a )
{
return _bit_scan_reverse( a );
}
#elif defined( __GNUC__ )
static inline unsigned int bit_scan_reverse( int a )
{
return __builtin_clz( a ) ^ ( 8 * sizeof( a ) - 1 );
}
# include <x86intrin.h>
namespace vvdec {
#endif

#endif
#if ENABLE_SIMD_LOG2 && defined( TARGET_SIMD_X86 )
#if ENABLE_SIMD_LOG2
static inline int getLog2( long val )
{
return _bit_scan_reverse( val );
return bit_scan_reverse( val );
}
#else
}
#include <cmath>
namespace vvdec {
extern int8_t g_aucLog2[MAX_CU_SIZE + 1];
static inline int getLog2( long val )
{
CHECKD( g_aucLog2[2] != 1, "g_aucLog2[] has not been initialized yet." );
if( val > 0 && val < (int)sizeof(g_aucLog2) )
if( val > 0 && val < (int) sizeof( g_aucLog2 ) )
{
return g_aucLog2[val];
}
return std::log2(val);
return std::log2( val );
}
#endif

Expand Down
Loading

0 comments on commit 4924a86

Please sign in to comment.