From 81b25d8456c80220979405936325bf082ec41494 Mon Sep 17 00:00:00 2001 From: Vibhoothi Date: Sun, 29 Mar 2020 11:58:04 +0530 Subject: [PATCH] Integrate AV-Metrics for calculation of metrics For showing metrics in CLI use --metrics for showing all metrics and use --psnr for showing PNSR. This commit introduces: - Frame Metrics function to calculate PSNR, PSNR-HVS, SSIM, MS-SSIM, CIEDE2000. - Quality Metrics Structure to store all the calculated values - Adding METRICS as an argument - Updates --psnr calculation based on av-metrics - Calculated metrics in a neat way - Introduce metrics_cli as an additional parameter for process_frame as parse_cli is an expensive function to be made in encode_loop, to make it more efficient we have moved metrics_cli enum as an argument to process_frame and making the calculation in do_encode function. --- Cargo.toml | 5 ++ src/api/config.rs | 5 -- src/api/internal.rs | 38 +--------- src/api/test.rs | 2 - src/api/util.rs | 2 - src/bin/common.rs | 19 ++++- src/bin/rav1e.rs | 14 +++- src/bin/stats.rs | 174 +++++++++++++++++++++++++++++++++++--------- src/lib.rs | 1 - src/metrics.rs | 66 ----------------- 10 files changed, 176 insertions(+), 150 deletions(-) delete mode 100644 src/metrics.rs diff --git a/Cargo.toml b/Cargo.toml index 3a0b5c132c..81ab3c7b7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ binaries = [ "fern", "console", "better-panic", + "av-metrics", ] default = ["binaries", "asm", "signal_support"] asm = ["nasm-rs", "cc"] @@ -69,6 +70,7 @@ aom-sys = { version = "0.1.3", optional = true } scan_fmt = { version = "0.2.3", optional = true, default-features = false } ivf = { version = "0.1", path = "ivf/", optional = true } v_frame = { version = "0.1", path = "v_frame/" } +av-metrics = { version = "0.4", optional = true } rayon = "1.0" toml = { version = "0.5", optional = true } arrayvec = "0.5" @@ -104,6 +106,9 @@ version = "0.1.7" optional = true features = ["parallel"] +[replace] +"v_frame:0.1.0" = { path = "v_frame/" } + [target.'cfg(unix)'.dependencies] signal-hook = { version = "0.1.9", optional = true } diff --git a/src/api/config.rs b/src/api/config.rs index 2ccd36b899..ccc7cb2285 100644 --- a/src/api/config.rs +++ b/src/api/config.rs @@ -109,10 +109,6 @@ pub struct EncoderConfig { pub tiles: usize, /// Number of frames to read ahead for the RDO lookahead computation. pub rdo_lookahead_frames: usize, - /// If enabled, computes the PSNR values and stores them in [`Packet`]. - /// - /// [`Packet`]: struct.Packet.html#structfield.psnr - pub show_psnr: bool, /// Settings which affect the enconding speed vs. quality trade-off. pub speed_settings: SpeedSettings, @@ -171,7 +167,6 @@ impl EncoderConfig { tiles: 0, rdo_lookahead_frames: 40, speed_settings: SpeedSettings::from_preset(speed), - show_psnr: false, } } diff --git a/src/api/internal.rs b/src/api/internal.rs index b4f9155cf7..ed40d9fb13 100644 --- a/src/api/internal.rs +++ b/src/api/internal.rs @@ -16,7 +16,6 @@ use crate::dist::get_satd; use crate::encoder::*; use crate::frame::*; use crate::hawktracer::*; -use crate::metrics::calculate_frame_psnr; use crate::partition::*; use crate::rate::RCState; use crate::rate::FRAME_NSUBTYPES; @@ -351,18 +350,6 @@ impl ContextInner { Ok(()) } - fn get_frame(&self, input_frameno: u64) -> Arc> { - // Clones only the arc, so low cost overhead - self - .frame_q - .get(&input_frameno) - .as_ref() - .unwrap() - .as_ref() - .unwrap() - .clone() - } - /// Indicates whether more frames need to be read into the frame queue /// in order for frame queue lookahead to be full. fn needs_more_frame_q_lookahead(&self, input_frameno: u64) -> bool { @@ -1101,7 +1088,6 @@ impl ContextInner { let input_frameno = frame_data.fi.input_frameno; let frame_type = frame_data.fi.frame_type; - let bit_depth = frame_data.fi.sequence.bit_depth; let qp = frame_data.fi.base_q_idx; let enc_stats = frame_data.fs.enc_stats.clone(); self.finalize_packet( @@ -1109,7 +1095,6 @@ impl ContextInner { source, input_frameno, frame_type, - bit_depth, qp, enc_stats, ) @@ -1222,14 +1207,12 @@ impl ContextInner { if fi.show_frame { let input_frameno = fi.input_frameno; let frame_type = fi.frame_type; - let bit_depth = fi.sequence.bit_depth; let qp = fi.base_q_idx; self.finalize_packet( rec, source, input_frameno, frame_type, - bit_depth, qp, enc_stats, ) @@ -1283,7 +1266,7 @@ impl ContextInner { fn finalize_packet( &mut self, rec: Option>>, source: Option>>, - input_frameno: u64, frame_type: FrameType, bit_depth: usize, qp: u8, + input_frameno: u64, frame_type: FrameType, qp: u8, enc_stats: EncoderStats, ) -> Result, EncoderStatus> { let data = self.packet_data.clone(); @@ -1292,25 +1275,8 @@ impl ContextInner { return Err(EncoderStatus::Failure); } - let mut psnr = None; - if self.config.show_psnr { - if let Some(ref rec) = rec { - let original_frame = self.get_frame(input_frameno); - psnr = Some(calculate_frame_psnr(&*original_frame, rec, bit_depth)); - } - } - self.frames_processed += 1; - Ok(Packet { - data, - rec, - source, - input_frameno, - frame_type, - psnr, - qp, - enc_stats, - }) + Ok(Packet { data, rec, source, input_frameno, frame_type, qp, enc_stats }) } fn garbage_collect(&mut self, cur_input_frameno: u64) { diff --git a/src/api/test.rs b/src/api/test.rs index fe8bafb3ac..eb9f64068b 100644 --- a/src/api/test.rs +++ b/src/api/test.rs @@ -1710,7 +1710,6 @@ fn log_q_exp_overflow() { non_square_partition: false, ..Default::default() }, - show_psnr: false, }, threads: 1, }; @@ -1778,7 +1777,6 @@ fn guess_frame_subtypes_assert() { non_square_partition: false, ..Default::default() }, - show_psnr: false, }, threads: 1, }; diff --git a/src/api/util.rs b/src/api/util.rs index 5157c13bd5..4d651136d6 100644 --- a/src/api/util.rs +++ b/src/api/util.rs @@ -170,8 +170,6 @@ pub struct Packet { pub input_frameno: u64, /// Type of the shown frame. pub frame_type: FrameType, - /// PSNR for Y, U, and V planes for the shown frame. - pub psnr: Option<(f64, f64, f64)>, /// QP selected for the frame. pub qp: u8, /// Block-level encoding stats for the frame diff --git a/src/bin/common.rs b/src/bin/common.rs index 57724005b4..f132bcfaa7 100644 --- a/src/bin/common.rs +++ b/src/bin/common.rs @@ -9,6 +9,7 @@ use crate::error::*; use crate::muxer::{create_muxer, Muxer}; +use crate::stats::MetricsEnabled; use crate::{ColorPrimaries, MatrixCoefficients, TransferCharacteristics}; use clap::{App, AppSettings, Arg, ArgMatches, Shell, SubCommand}; use rav1e::prelude::*; @@ -42,6 +43,7 @@ pub struct CliOptions { pub verbose: Verbose, pub benchmark: bool, pub threads: usize, + pub metrics_enabled: MetricsEnabled, pub pass1file_name: Option, pub pass2file_name: Option, pub save_config: Option, @@ -74,6 +76,8 @@ fn build_speed_long_help() -> String { } #[allow(unused_mut)] +/// Only call this once at the start of the app, +/// otherwise bad things will happen. pub fn parse_cli() -> Result { let ver_short = version::short(); let ver_long = version::full(); @@ -340,6 +344,11 @@ pub fn parse_cli() -> Result { .help("Calculate and display PSNR metrics") .long("psnr") ) + .arg( + Arg::with_name("METRICS") + .help("Calulate and display several metrics including PSNR, SSIM, CIEDE2000 etc") + .long("metrics") + ) .arg( Arg::with_name("RECONSTRUCTION") .help("Outputs a Y4M file containing the output from the decoder") @@ -448,6 +457,14 @@ pub fn parse_cli() -> Result { Verbose::Normal }; + let metrics_enabled = if matches.is_present("METRICS") { + MetricsEnabled::All + } else if matches.is_present("PSNR") { + MetricsEnabled::Psnr + } else { + MetricsEnabled::None + }; + Ok(CliOptions { io, enc, @@ -456,6 +473,7 @@ pub fn parse_cli() -> Result { // if a parameter has a default value. color_range_specified: matches.occurrences_of("PIXEL_RANGE") > 0, override_time_base: matches.is_present("FRAME_RATE"), + metrics_enabled, skip: matches.value_of("SKIP").unwrap().parse().unwrap(), benchmark: matches.is_present("BENCHMARK"), verbose, @@ -615,7 +633,6 @@ fn parse_config(matches: &ArgMatches<'_>) -> Result { .map(|reservior_frame_delay| reservior_frame_delay.parse().unwrap()); cfg.rdo_lookahead_frames = matches.value_of("RDO_LOOKAHEAD_FRAMES").unwrap_or("40").parse().unwrap(); - cfg.show_psnr = matches.is_present("PSNR"); cfg.tune = matches.value_of("TUNE").unwrap().parse().unwrap(); if cfg.tune == Tune::Psychovisual { diff --git a/src/bin/rav1e.rs b/src/bin/rav1e.rs index 8b9cd76622..8ff4e06019 100644 --- a/src/bin/rav1e.rs +++ b/src/bin/rav1e.rs @@ -100,6 +100,7 @@ fn process_frame( pass1file: Option<&mut File>, pass2file: Option<&mut File>, buffer: &mut [u8], buf_pos: &mut usize, mut y4m_enc: Option<&mut y4m::Encoder<'_, Box>>, + metrics_cli: MetricsEnabled, ) -> Result>, CliError> { let y4m_details = source.input.get_video_details(); let mut frame_summaries = Vec::new(); @@ -155,7 +156,12 @@ fn process_frame( { write_y4m_frame(y4m_enc_uw, rec, y4m_details); } - frame_summaries.push(pkt.into()); + frame_summaries.push(build_frame_summary( + pkt, + y4m_details.bit_depth, + y4m_details.chroma_sampling, + metrics_cli, + )); } Err(EncoderStatus::NeedMoreData) => { source.read_frame(ctx, y4m_details); @@ -197,6 +203,7 @@ fn do_encode( output: &mut dyn Muxer, source: &mut Source, pass1file_name: Option<&String>, pass2file_name: Option<&String>, mut y4m_enc: Option>>, + metrics_enabled: MetricsEnabled, ) -> Result<(), CliError> { let mut ctx: Context = cfg.new_context().map_err(|e| e.context("Invalid encoder settings"))?; @@ -224,6 +231,7 @@ fn do_encode( &mut buffer, &mut buf_pos, y4m_enc.as_mut(), + metrics_enabled, )? { if verbose != Verbose::Quiet { for frame in frame_info { @@ -419,7 +427,7 @@ fn run() -> Result<(), error::CliError> { let progress = ProgressInfo::new( Rational { num: video_info.time_base.den, den: video_info.time_base.num }, if cli.limit == 0 { None } else { Some(cli.limit) }, - cfg.enc.show_psnr, + cli.metrics_enabled, ); for _ in 0..cli.skip { @@ -467,6 +475,7 @@ fn run() -> Result<(), error::CliError> { cli.pass1file_name.as_ref(), cli.pass2file_name.as_ref(), y4m_enc, + cli.metrics_enabled, )? } else { do_encode::>>( @@ -478,6 +487,7 @@ fn run() -> Result<(), error::CliError> { cli.pass1file_name.as_ref(), cli.pass2file_name.as_ref(), y4m_enc, + cli.metrics_enabled, )? } if cli.benchmark { diff --git a/src/bin/stats.rs b/src/bin/stats.rs index 3b2abd9a5a..4d7ad9f7ec 100644 --- a/src/bin/stats.rs +++ b/src/bin/stats.rs @@ -7,7 +7,9 @@ // Media Patent License 1.0 was not distributed with this source code in the // PATENTS file, you can obtain it at www.aomedia.org/license/patent. +use av_metrics::video::*; use rav1e::data::EncoderStats; +use rav1e::prelude::Rational; use rav1e::prelude::*; use rav1e::{Packet, Pixel}; use std::fmt; @@ -19,24 +21,34 @@ pub struct FrameSummary { pub size: usize, pub input_frameno: u64, pub frame_type: FrameType, - /// PSNR for Y, U, and V planes - pub psnr: Option<(f64, f64, f64)>, + /// Contains metrics such as PSNR, SSIM, etc. + pub metrics: QualityMetrics, /// QP selected for the frame. pub qp: u8, /// Block-level encoding stats for the frame pub enc_stats: EncoderStats, } -impl From> for FrameSummary { - fn from(packet: Packet) -> Self { - Self { - size: packet.data.len(), - input_frameno: packet.input_frameno, - frame_type: packet.frame_type, - psnr: packet.psnr, - qp: packet.qp, - enc_stats: packet.enc_stats, - } +pub fn build_frame_summary( + packets: Packet, bit_depth: usize, chroma_sampling: ChromaSampling, + metrics_cli: MetricsEnabled, +) -> FrameSummary { + let metrics_input_frame: &Frame = packets.source.as_ref().unwrap(); + let metrics_output_frame: &Frame = packets.rec.as_ref().unwrap(); + let encode_metrics: QualityMetrics = calculate_frame_metrics( + metrics_input_frame, + metrics_output_frame, + bit_depth, + chroma_sampling, + metrics_cli, + ); + FrameSummary { + size: packets.data.len(), + input_frameno: packets.input_frameno, + frame_type: packets.frame_type, + metrics: encode_metrics, + qp: packets.qp, + enc_stats: packets.enc_stats, } } @@ -48,10 +60,10 @@ impl fmt::Display for FrameSummary { self.input_frameno, self.frame_type, self.size, - if let Some(psnr) = self.psnr { + if let Some(psnr) = self.metrics.psnr { format!( " - PSNR: Y: {:.4} Cb: {:.4} Cr: {:.4}", - psnr.0, psnr.1, psnr.2 + psnr.y, psnr.u, psnr.v ) } else { String::new() @@ -75,13 +87,14 @@ pub struct ProgressInfo { // This value will be updated in the CLI very frequently, so we cache the previous value // to reduce the overall complexity. encoded_size: usize, - // Whether to display PSNR statistics during and at end of encode - show_psnr: bool, + // Which Metrics to display during and at end of encode + metrics_enabled: MetricsEnabled, } impl ProgressInfo { pub fn new( - frame_rate: Rational, total_frames: Option, show_psnr: bool, + frame_rate: Rational, total_frames: Option, + metrics_enabled: MetricsEnabled, ) -> Self { Self { frame_rate, @@ -89,7 +102,7 @@ impl ProgressInfo { time_started: Instant::now(), frame_info: Vec::with_capacity(total_frames.unwrap_or_default()), encoded_size: 0, - show_psnr, + metrics_enabled, } } @@ -309,8 +322,13 @@ impl ProgressInfo { self.print_transform_type_summary(); self.print_prediction_modes_summary(); } - if self.show_psnr { - self.print_video_psnr(); + match self.metrics_enabled { + MetricsEnabled::None => info!("----"), + MetricsEnabled::Psnr => self.print_video_psnr(), + MetricsEnabled::All => { + self.print_video_psnr(); + self.print_video_all(); + } } } @@ -329,22 +347,28 @@ impl ProgressInfo { fn print_video_psnr(&self) { info!("----------"); - let psnr_y = - self.frame_info.iter().map(|fi| fi.psnr.unwrap().0).sum::() - / self.frame_info.len() as f64; - let psnr_u = - self.frame_info.iter().map(|fi| fi.psnr.unwrap().1).sum::() - / self.frame_info.len() as f64; - let psnr_v = - self.frame_info.iter().map(|fi| fi.psnr.unwrap().2).sum::() - / self.frame_info.len() as f64; + let psnr_y = sum_metric(&self.frame_info, |fi| fi.metrics.psnr.unwrap().y); + let psnr_u = sum_metric(&self.frame_info, |fi| fi.metrics.psnr.unwrap().u); + let psnr_v = sum_metric(&self.frame_info, |fi| fi.metrics.psnr.unwrap().v); + let psnr_avg = + sum_metric(&self.frame_info, |fi| fi.metrics.psnr.unwrap().avg); info!( - "Mean PSNR: Y: {:.4} Cb: {:.4} Cr: {:.4} Avg: {:.4}", - psnr_y, - psnr_u, - psnr_v, - (psnr_y + psnr_u + psnr_v) / 3.0 - ) + "Mean PSNR: Avg: {:.4} Y: {:.4} Cb: {:.4} Cr: {:.4}", + psnr_avg, psnr_y, psnr_u, psnr_v + ); + } + fn print_video_all(&self) { + info!("----------"); + let psnr_hvs = + sum_metric(&self.frame_info, |fi| fi.metrics.psnr_hvs.unwrap().avg); + let ssim = sum_metric(&self.frame_info, |fi| fi.metrics.ssim.unwrap().avg); + let ms_ssim = + sum_metric(&self.frame_info, |fi| fi.metrics.ms_ssim.unwrap().avg); + let ciede = sum_metric(&self.frame_info, |fi| fi.metrics.ciede.unwrap()); + info!("PSNR HVS: {:.4}", psnr_hvs); + info!("SSIM: {:.4} MS SSIM: {:.4}", ssim, ms_ssim); + info!("CIEDE2000: {:.4}", ciede); + info!("----------"); } fn print_block_type_summary(&self) { @@ -571,6 +595,12 @@ impl ProgressInfo { } } +fn sum_metric f64>( + frame_info: &[FrameSummary], map_fn: F, +) -> f64 { + frame_info.iter().map(map_fn).sum::() / frame_info.len() as f64 +} + impl fmt::Display for ProgressInfo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(total_frames) = self.total_frames { @@ -609,3 +639,77 @@ fn secs_to_human_time(mut secs: u64) -> String { format!("{}s", secs) } } + +#[derive(Debug, Clone, Copy, Default, PartialEq)] +pub struct QualityMetrics { + /// Peak Signal-to-Noise Ratio for Y, U, and V planes + pub psnr: Option, + /// Peak Signal-to-Noise Ratio as perceived by the Human Visual System-- + /// taking into account Contrast Sensitivity Function (CSF) + pub psnr_hvs: Option, + /// Structural Similarity + pub ssim: Option, + /// Multi-Scale Structural Similarity + pub ms_ssim: Option, + /// CIEDE 2000 color difference algorithm: https://en.wikipedia.org/wiki/Color_difference#CIEDE2000 + pub ciede: Option, + /// Aligned Peak Signal-to-Noise Ratio for Y, U, and V planes + pub apsnr: Option, + /// Netflix's Video Multimethod Assessment Fusion + pub vmaf: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum MetricsEnabled { + /// Don't calculate any metrics. + None, + /// Calculate the PSNR of each plane, but no other metrics. + Psnr, + /// Calculate all implemented metrics. Currently implemented metrics match what is available via AWCY. + All, +} + +pub fn calculate_frame_metrics( + frame1: &Frame, frame2: &Frame, bit_depth: usize, cs: ChromaSampling, + metrics: MetricsEnabled, +) -> QualityMetrics { + let frame1_info = FrameInfo { + planes: frame1.planes.clone(), + bit_depth, + chroma_sampling: cs, + }; + + let frame2_info = FrameInfo { + planes: frame2.planes.clone(), + bit_depth, + chroma_sampling: cs, + }; + + match metrics { + MetricsEnabled::None => QualityMetrics::default(), + MetricsEnabled::Psnr => { + let mut metrics = QualityMetrics::default(); + metrics.psnr = + Some(psnr::calculate_frame_psnr(&frame1_info, &frame2_info).unwrap()); + metrics + } + MetricsEnabled::All => { + let mut metrics = QualityMetrics::default(); + metrics.psnr = + Some(psnr::calculate_frame_psnr(&frame1_info, &frame2_info).unwrap()); + metrics.psnr_hvs = Some( + psnr_hvs::calculate_frame_psnr_hvs(&frame1_info, &frame2_info) + .unwrap(), + ); + let ssim = ssim::calculate_frame_ssim(&frame1_info, &frame2_info); + metrics.ssim = Some(ssim.unwrap()); + let ms_ssim = ssim::calculate_frame_msssim(&frame1_info, &frame2_info); + metrics.ms_ssim = Some(ms_ssim.unwrap()); + let ciede = ciede::calculate_frame_ciede(&frame1_info, &frame2_info); + metrics.ciede = Some(ciede.unwrap()); + // TODO APSNR + // TODO VMAF + metrics + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 31073155d9..5734bc3f47 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -180,7 +180,6 @@ mod entropymode; mod lrf; mod mc; mod me; -mod metrics; mod rate; mod recon_intra; mod scan_order; diff --git a/src/metrics.rs b/src/metrics.rs deleted file mode 100644 index 962b5643c7..0000000000 --- a/src/metrics.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2018-2019, The rav1e contributors. All rights reserved -// -// This source code is subject to the terms of the BSD 2 Clause License and -// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -// was not distributed with this source code in the LICENSE file, you can -// obtain it at www.aomedia.org/license/software. If the Alliance for Open -// Media Patent License 1.0 was not distributed with this source code in the -// PATENTS file, you can obtain it at www.aomedia.org/license/patent. - -use crate::frame::Frame; -use crate::frame::Plane; -use crate::util::{CastFromPrimitive, Pixel}; - -/// Calculates the PSNR for a `Frame` by comparing the original (uncompressed) to the compressed -/// version of the frame. Higher PSNR is better--PSNR is capped at 100 in order to avoid skewed -/// statistics from e.g. all black frames, which would otherwise show a PSNR of infinity. -/// -/// See https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio for more details. -pub fn calculate_frame_psnr( - original: &Frame, compressed: &Frame, bit_depth: usize, -) -> (f64, f64, f64) { - ( - calculate_plane_psnr( - &original.planes[0], - &compressed.planes[0], - bit_depth, - ), - calculate_plane_psnr( - &original.planes[1], - &compressed.planes[1], - bit_depth, - ), - calculate_plane_psnr( - &original.planes[2], - &compressed.planes[2], - bit_depth, - ), - ) -} - -/// Calculate the PSNR for a `Plane` by comparing the original (uncompressed) to the compressed -/// version. -fn calculate_plane_psnr( - original: &Plane, compressed: &Plane, bit_depth: usize, -) -> f64 { - let mse = calculate_plane_mse(original, compressed); - if mse <= 0.000_000_000_1 { - return 100.0; - } - let max = ((1 << bit_depth) - 1) as f64; - 20.0 * max.log10() - 10.0 * mse.log10() -} - -/// Calculate the mean squared error for a `Plane` by comparing the original (uncompressed) -/// to the compressed version. -fn calculate_plane_mse( - original: &Plane, compressed: &Plane, -) -> f64 { - original - .iter() - .zip(compressed.iter()) - .map(|(a, b)| (i32::cast_from(a) - i32::cast_from(b)).abs() as u64) - .map(|err| err * err) - .sum::() as f64 - / (original.cfg.width * original.cfg.height) as f64 -}