diff --git a/akaze/Cargo.toml b/akaze/Cargo.toml index bc218c7f..8411696e 100644 --- a/akaze/Cargo.toml +++ b/akaze/Cargo.toml @@ -27,7 +27,6 @@ space = "0.17.0" bitarray = "0.9.0" thiserror = { version = "1.0.40", default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } -imageproc = "0.23.0" rayon = { version = "1.7.0", optional = true } [dev-dependencies] @@ -40,6 +39,7 @@ criterion = "0.3.5" pretty_env_logger = "0.4.0" image = "0.24" bitarray = { version = "0.9.0", features = ["space"] } +imageproc = "0.23.0" [[bench]] name = "criterion" diff --git a/akaze/benches/criterion.rs b/akaze/benches/criterion.rs index ae5cc215..523b25d4 100644 --- a/akaze/benches/criterion.rs +++ b/akaze/benches/criterion.rs @@ -8,7 +8,9 @@ fn image_to_kps(path: impl AsRef) -> (Vec, Vec GrayFloatImage { // similar to cv::Scharr with xorder=1, yorder=0, scale=1, delta=0 - GrayFloatImage(imageproc::filter::separable_filter( - &image.0, - &[-1., 0., 1.], - &[3., 10., 3.], - )) + GrayFloatImage(separable_filter(&image.0, &[-1., 0., 1.], &[3., 10., 3.])) } pub fn simple_scharr_vertical(image: &GrayFloatImage) -> GrayFloatImage { // similar to cv::Scharr with xorder=0, yorder=1, scale=1, delta=0 - GrayFloatImage(imageproc::filter::separable_filter( - &image.0, - &[3., 10., 3.], - &[-1., 0., 1.], - )) + GrayFloatImage(separable_filter(&image.0, &[3., 10., 3.], &[-1., 0., 1.])) } /// Compute the Scharr derivative horizontally @@ -34,11 +26,7 @@ pub fn scharr_horizontal(image: &GrayFloatImage, sigma_size: u32) -> GrayFloatIm } let main_kernel = computer_scharr_kernel(sigma_size, FilterOrder::Main); let off_kernel = computer_scharr_kernel(sigma_size, FilterOrder::Off); - GrayFloatImage(imageproc::filter::separable_filter( - &image.0, - &main_kernel, - &off_kernel, - )) + GrayFloatImage(separable_filter(&image.0, &main_kernel, &off_kernel)) } /// Compute the Scharr derivative vertically @@ -57,11 +45,7 @@ pub fn scharr_vertical(image: &GrayFloatImage, sigma_size: u32) -> GrayFloatImag } let main_kernel = computer_scharr_kernel(sigma_size, FilterOrder::Main); let off_kernel = computer_scharr_kernel(sigma_size, FilterOrder::Off); - GrayFloatImage(imageproc::filter::separable_filter( - &image.0, - &off_kernel, - &main_kernel, - )) + GrayFloatImage(separable_filter(&image.0, &off_kernel, &main_kernel)) } #[derive(Copy, Clone, Debug, PartialEq)] diff --git a/akaze/src/image.rs b/akaze/src/image.rs index 5eaf6e7f..9aac3a00 100644 --- a/akaze/src/image.rs +++ b/akaze/src/image.rs @@ -1,11 +1,12 @@ use derive_more::{Deref, DerefMut}; use image::{DynamicImage, ImageBuffer, Luma, Pixel}; -use imageproc::filter::separable_filter_equal; use log::*; use ndarray::{azip, s, Array2, ArrayView2, ArrayViewMut2}; use nshare::{MutNdarray2, RefNdarray2}; use std::f32; +type GrayImageBuffer = ImageBuffer, Vec>; + /// The image type we use in this library. /// /// This is simply a wrapper around a contiguous f32 vector. A reader might @@ -31,7 +32,7 @@ use std::f32; /// like using a separable filter, and using the filters implemented /// here ended up speeding up everything a lot. #[derive(Debug, Clone, Deref, DerefMut)] -pub struct GrayFloatImage(pub ImageBuffer, Vec>); +pub struct GrayFloatImage(pub GrayImageBuffer); impl GrayFloatImage { /// Create a unit float image from the image crate's DynamicImage type. @@ -177,6 +178,104 @@ impl GrayFloatImage { } } +fn horizontal_filter(image: &GrayImageBuffer, kernel: &[f32]) -> GrayImageBuffer { + let kernel_size = kernel.len(); + debug_assert!(kernel_size % 2 == 1); + let kernel_half_size = kernel_size / 2; + let width = image.width() as usize; + let height = image.height() as usize; + let mut output = vec![0.0; width * height]; + // Process each row independently. + let row_in_it = image.as_raw().chunks_exact(width); + let row_out_it = output.chunks_exact_mut(width); + let mut scratch = vec![0f32; width + kernel_half_size * 2]; + for (row_in, row_out) in row_in_it.zip(row_out_it) { + // Prefill extended buffer with center and edge values. + scratch[0..kernel_half_size].fill(row_in[0]); + scratch[kernel_half_size..kernel_half_size + width].copy_from_slice(row_in); + scratch[kernel_half_size + width..].fill(row_in[width - 1]); + // Apply kernel. + scratch + .windows(kernel_size) + .zip(row_out) + .for_each(|(window, output)| { + *output = window + .iter() + .zip(kernel.iter()) + .map(|(a, b)| a * b) + .sum::() + }); + } + GrayImageBuffer::from_raw(width as u32, height as u32, output).unwrap() +} + +fn vertical_filter(image: &GrayImageBuffer, kernel: &[f32]) -> GrayImageBuffer { + let kernel_size = kernel.len(); + debug_assert!(kernel_size % 2 == 1); + let kernel_half_size = kernel_size / 2; + let width = image.width() as usize; + let height = image.height() as usize; + let mut output = vec![0.0; width * height]; + // We use a scratch buffer of L1 cache width (64 bytes) to optimize memory access. + const SCRATCH_WIDTH: usize = 16; + let scratch_height = height + kernel_half_size * 2; + let mut scratch = vec![0f32; SCRATCH_WIDTH * scratch_height]; + let image = image.as_raw(); + for x_s in (0..width).step_by(SCRATCH_WIDTH) { + // Fill the scratch buffer with the column values. + // First paddings. + let x_e: usize = (x_s + SCRATCH_WIDTH).min(width); + for x in x_s..x_e { + let scratch_col_start = (x - x_s) * scratch_height; + for i in 0..kernel_half_size { + scratch[scratch_col_start + i] = image[x]; + } + let scratch_end_col_start = scratch_col_start + kernel_half_size + height; + let image_last_row_start = (height - 1) * width; + for i in 0..kernel_half_size { + scratch[scratch_end_col_start + i] = image[image_last_row_start + x]; + } + } + // Then main content. + for y in 0..height { + let image_row_start = y * width; + for x in x_s..x_e { + scratch[(x - x_s) * scratch_height + y + kernel_half_size] = + image[image_row_start + x]; + } + } + // Apply kernel. + let col_count = x_e - x_s; + scratch + .chunks(scratch_height) + .take(col_count) + .enumerate() + .for_each(|(dx, col)| { + let x = x_s + dx; + col.windows(kernel_size) + .enumerate() + .for_each(|(y, window)| { + let value = window + .iter() + .zip(kernel.iter()) + .map(|(a, b)| a * b) + .sum::(); + output[y * width + x] = value; + }); + }); + } + GrayImageBuffer::from_raw(width as u32, height as u32, output).unwrap() +} + +pub(crate) fn separable_filter( + image: &GrayImageBuffer, + h_kernel: &[f32], + v_kernel: &[f32], +) -> GrayImageBuffer { + let h = horizontal_filter(image, h_kernel); + vertical_filter(&h, v_kernel) +} + /// The Gaussian function. /// /// # Arguments @@ -222,7 +321,7 @@ pub fn gaussian_blur(image: &GrayFloatImage, r: f32) -> GrayFloatImage { let kernel_radius = (2.0 * r).ceil() as usize; let kernel_size = kernel_radius * 2 + 1; let kernel = gaussian_kernel(r, kernel_size); - GrayFloatImage(separable_filter_equal(image, &kernel)) + GrayFloatImage(separable_filter(image, &kernel, &kernel)) } #[cfg(test)] @@ -246,4 +345,24 @@ mod tests { assert!(f32::abs(*i - *j) < 0.0001); } } + + #[test] + fn horizontal_filter() { + let image = image::open("../res/0000000000.png").unwrap(); + let image = super::GrayFloatImage::from_dynamic(&image); + let kernel = gaussian_kernel(3.0, 7); + let filtered_ours = super::horizontal_filter(&image.0, &kernel); + let filtered_imageproc = imageproc::filter::horizontal_filter(&image.0, &kernel); + imageproc::assert_pixels_eq!(filtered_ours, filtered_imageproc); + } + + #[test] + fn vertical_filter() { + let image = image::open("../res/0000000000.png").unwrap(); + let image = super::GrayFloatImage::from_dynamic(&image); + let kernel = gaussian_kernel(3.0, 7); + let filtered_ours = super::vertical_filter(&image.0, &kernel); + let filtered_imageproc = imageproc::filter::vertical_filter(&image.0, &kernel); + imageproc::assert_pixels_eq!(filtered_ours, filtered_imageproc); + } } diff --git a/akaze/src/lib.rs b/akaze/src/lib.rs index 571d90bd..2c6d8981 100644 --- a/akaze/src/lib.rs +++ b/akaze/src/lib.rs @@ -18,10 +18,10 @@ use log::*; use nonlinear_diffusion::pm_g2; use std::{cmp::Reverse, path::Path, time::Instant}; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; #[cfg(feature = "rayon")] use rayon::prelude::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; #[derive(thiserror::Error, Debug)] pub enum Error {