Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libavcodec/aarch64/vvcdsp_init_aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdi
void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
const int16_t *sao_offset_val, int eo, int width, int height);

av_cold void ff_vvc_dsp_init_aarch64(VVCDSPContext *c, const int bit_depth) {
av_cold void ff_vvc_dsp_init_aarch64(VVCDSPContext *c, const int bit_depth, int extended_precision_flag) {
if (!have_neon(av_get_cpu_flags())) return;
if (bit_depth == 8) {
c->sao.band_filter[0] =
Expand Down
4 changes: 3 additions & 1 deletion libavcodec/vvc/vvcdec.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ static av_cold int frame_context_init(VVCFrameContext *fc, AVCodecContext *avctx

static int frame_context_setup(VVCFrameContext *fc, VVCContext *s)
{
const VVCSPS *sps = fc->ps.sps;
int ret = 0;

// copy refs from the last frame
Expand All @@ -740,7 +741,8 @@ static int frame_context_setup(VVCFrameContext *fc, VVCContext *s)
ret = pic_arrays_init(s, fc);
if (ret < 0)
goto fail;
ff_vvc_dsp_init(&fc->vvcdsp, fc->ps.sps->bit_depth);
ff_vvc_dsp_init(&fc->vvcdsp, fc->ps.sps->bit_depth,
sps->r->sps_extended_precision_flag);
ff_videodsp_init(&fc->vdsp, fc->ps.sps->bit_depth);

fail:
Expand Down
7 changes: 4 additions & 3 deletions libavcodec/vvc/vvcdsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,8 @@ typedef struct IntraEdgeParams {
#include "vvcdsp_template.c"
#undef BIT_DEPTH

void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth,
int extended_precision_flag)
{
#undef FUNC
#define FUNC(a, depth) a ## _ ## depth
Expand All @@ -321,8 +322,8 @@ void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
break;
}
#if ARCH_X86
ff_vvc_dsp_init_x86(vvcdsp, bit_depth);
ff_vvc_dsp_init_x86(vvcdsp, bit_depth, extended_precision_flag);
#elif ARCH_AARCH64
ff_vvc_dsp_init_aarch64(vvcdsp, bit_depth);
ff_vvc_dsp_init_aarch64(vvcdsp, bit_depth, extended_precision_flag);
#endif
}
7 changes: 4 additions & 3 deletions libavcodec/vvc/vvcdsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,14 @@ typedef struct VVCDSPContext {
VVCALFDSPContext alf;
} VVCDSPContext;

void ff_vvc_dsp_init(VVCDSPContext *hpc, int bit_depth);
void ff_vvc_dsp_init(VVCDSPContext *hpc, int bit_depth,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO it would be more elegant to pass VVCFrameParamSets, VVCFrameContext or something else more general here (and the call site for ff_vvc_dsp_init supports it), however including vvc_ps.h in vvcdsp.h introduced a lot of compilation errors.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, at this level, we'd better include a small set of headers

int extended_precision_flag);

extern const int8_t ff_vvc_chroma_filters[3][32][4];
extern const int8_t ff_vvc_luma_filters[3][16][8];
extern const int8_t ff_vvc_dmvr_filters[16][2];

void ff_vvc_dsp_init_x86(VVCDSPContext *c, const int bit_depth);
void ff_vvc_dsp_init_aarch64(VVCDSPContext *c, const int bit_depth);
void ff_vvc_dsp_init_x86(VVCDSPContext *c, const int bit_depth, int extended_precision_flag);
void ff_vvc_dsp_init_aarch64(VVCDSPContext *c, const int bit_depth, int extended_precision_flag);

#endif /* AVCODEC_VVCDSP_H */
3 changes: 2 additions & 1 deletion libavcodec/x86/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -206,5 +206,6 @@ X86ASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
X86ASM-OBJS-$(CONFIG_VVC_DECODER) += x86/vvc_alf.o \
x86/vvc_sao.o \
x86/vvc_sao_10bit.o \
x86/vvc_mc.o
x86/vvc_mc.o \
x86/vvc_itx_1d.o
X86ASM-OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp.o
1,142 changes: 1,142 additions & 0 deletions libavcodec/x86/vvc_itx_1d.asm

Large diffs are not rendered by default.

27 changes: 26 additions & 1 deletion libavcodec/x86/vvcdsp_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,11 +241,36 @@ PUT_VVC_LUMA_FORWARD_FUNCS(12, avx512icl)
c->inter.put[LUMA][1][1] = ff_vvc_put_vvc_luma_hv_##bitd##_##opt; \
} while (0)

void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bit_depth)
#define ITX_FUNC(type, size, opt) \
void ff_vvc_inv_##type##_##size##_##opt(int *out, ptrdiff_t out_stride, \
const int *in, ptrdiff_t in_stride);

ITX_FUNC(dct2, 2, avx2);
ITX_FUNC(dct2, 4, avx2);
ITX_FUNC(dct2, 8, avx2);
ITX_FUNC(dct2, 16, avx2);
ITX_FUNC(dct2, 32, avx2);
ITX_FUNC(dct2, 64, avx2);

#define IDCT2_INIT(opt) do { \
c->itx.itx[DCT2][0] = ff_vvc_inv_dct2_2_##opt; \
c->itx.itx[DCT2][1] = ff_vvc_inv_dct2_4_##opt; \
c->itx.itx[DCT2][2] = ff_vvc_inv_dct2_8_##opt; \
c->itx.itx[DCT2][3] = ff_vvc_inv_dct2_16_##opt; \
c->itx.itx[DCT2][4] = ff_vvc_inv_dct2_32_##opt; \
c->itx.itx[DCT2][5] = ff_vvc_inv_dct2_64_##opt; \
} while(0);

void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bit_depth,
int extended_precision_flag)
{
const int cpu_flags = av_get_cpu_flags();

if (EXTERNAL_AVX2(cpu_flags)) {
if (!extended_precision_flag) {
IDCT2_INIT(avx2);
}

switch (bit_depth) {
case 8:
ALF_DSP(8);
Expand Down
4 changes: 4 additions & 0 deletions tests/checkasm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o
AVCODECOBJS-$(CONFIG_VORBIS_DECODER) += vorbisdsp.o
AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o
AVCODECOBJS-$(CONFIG_VVC_DECODER) += vvc_alf.o vvc_sao.o vvc_mc.o
AVCODECOBJS-$(CONFIG_VVC_DECODER) += vvc_alf.o \
vvc_sao.o \
vvc_mc.o \
vvc_itx_1d.o

CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)

Expand Down
1 change: 1 addition & 0 deletions tests/checkasm/checkasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ static const struct {
{ "vvc_alf", checkasm_check_vvc_alf },
{ "vvc_sao", checkasm_check_vvc_sao },
{ "vvc_mc", checkasm_check_vvc_mc },
{ "vvc_itx_1d", checkasm_check_vvc_itx_1d },
#endif
#endif
#if CONFIG_AVFILTER
Expand Down
1 change: 1 addition & 0 deletions tests/checkasm/checkasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ void checkasm_check_vorbisdsp(void);
void checkasm_check_vvc_alf(void);
void checkasm_check_vvc_sao(void);
void checkasm_check_vvc_mc(void);
void checkasm_check_vvc_itx_1d(void);

struct CheckasmPerf;

Expand Down
4 changes: 2 additions & 2 deletions tests/checkasm/vvc_alf.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,12 @@ void checkasm_check_vvc_alf(void)
int bit_depth;
VVCDSPContext h;
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
ff_vvc_dsp_init(&h, bit_depth);
ff_vvc_dsp_init(&h, bit_depth, 0);
check_alf_filter(&h, bit_depth);
}
report("alf_filter");
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
ff_vvc_dsp_init(&h, bit_depth);
ff_vvc_dsp_init(&h, bit_depth, 0);
check_alf_classify(&h, bit_depth);
}
report("alf_classify");
Expand Down
78 changes: 78 additions & 0 deletions tests/checkasm/vvc_itx_1d.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2023 Frank Plowman <post@frankplowman.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

#include "libavutil/mem_internal.h"

#include "libavcodec/avcodec.h"

#include "libavcodec/vvc/vvcdsp.h"
#include "libavcodec/vvc/vvcdec.h"

#include "checkasm.h"

#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define PIXEL_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE)
#define BUF_SIZE (PIXEL_STRIDE * MAX_TB_SIZE)

#define randomize_buffers(buf0, buf1, size) \
do { \
int k; \
for (k = 0; k < size; ++k) { \
uint32_t r = rnd(); \
int32_t a = INT16_MIN + r / (UINT32_MAX / (INT16_MAX - INT16_MIN + 1) + 1); \
AV_WN32A(buf0 + k, a); \
AV_WN32A(buf1 + k, a); \
} \
} while (0)

static void check_idct2(VVCDSPContext h, int bit_depth)
{
LOCAL_ALIGNED_32(int, ref_dst, [BUF_SIZE]);
LOCAL_ALIGNED_32(int, new_dst, [BUF_SIZE]);
LOCAL_ALIGNED_32(int, ref_src, [BUF_SIZE]);
LOCAL_ALIGNED_32(int, new_src, [BUF_SIZE]);

const ptrdiff_t stride = PIXEL_STRIDE * SIZEOF_PIXEL;

for (int log2_size = 1; log2_size <= 6; log2_size++) {
const int size = 1 << log2_size;
declare_func_emms(AV_CPU_FLAG_MMX, void, int *dst, ptrdiff_t dst_stride,
int *src, ptrdiff_t src_stride);

randomize_buffers(ref_src, new_src, BUF_SIZE);
memset(ref_dst, 0, BUF_SIZE);
memset(new_dst, 0, BUF_SIZE);

if (check_func(h.itx.itx[DCT2][log2_size - 1], "vvc_inv_dct2_%d", size)) {
call_ref(ref_dst, stride, ref_src, stride);
call_new(new_dst, stride, new_src, stride);
checkasm_check_int32_t("vvc_itx_1d.asm", 0, ref_dst, stride * sizeof(int), new_dst, stride * sizeof(int), 1, size, "dst");
}
bench_new(new_dst, stride, new_src, stride);
}
}

void checkasm_check_vvc_itx_1d(void)
{
VVCDSPContext h;
ff_vvc_dsp_init(&h, 8, 0);
check_idct2(h, 8);
report("idct2");
}
2 changes: 1 addition & 1 deletion tests/checkasm/vvc_mc.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void checkasm_check_vvc_mc(void)
int bit_depth;
VVCDSPContext h;
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
ff_vvc_dsp_init(&h, bit_depth);
ff_vvc_dsp_init(&h, bit_depth, 0);
check_put_vvc_luma(&h, bit_depth);
}

Expand Down
4 changes: 2 additions & 2 deletions tests/checkasm/vvc_sao.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,14 @@ void checkasm_check_vvc_sao(void)
for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
VVCDSPContext h;

ff_vvc_dsp_init(&h, bit_depth);
ff_vvc_dsp_init(&h, bit_depth, 0);
check_sao_band(h, bit_depth);
}
report("sao_band");

for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
VVCDSPContext h;
ff_vvc_dsp_init(&h, bit_depth);
ff_vvc_dsp_init(&h, bit_depth, 0);
check_sao_edge(h, bit_depth);
}
report("sao_edge");
Expand Down