diff --git a/.gitignore b/.gitignore index 39564fe2b50..c10f9e51df0 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ target-tarpaulin venv lcov.info Cargo.lock +example.arrow fixtures settings.json dev/ diff --git a/Cargo.toml b/Cargo.toml index 17ae94193c8..d5d9a832d66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -249,6 +249,7 @@ harness = false name = "comparison_kernels" harness = false + [[bench]] name = "read_parquet" harness = false diff --git a/benches/filter_kernels.rs b/benches/filter_kernels.rs index ef9c725d60e..921950595f6 100644 --- a/benches/filter_kernels.rs +++ b/benches/filter_kernels.rs @@ -94,12 +94,21 @@ fn add_benchmark(c: &mut Criterion) { }); let data_array = create_primitive_array::(size, 0.5); + let data_array_nonull = create_primitive_array::(size, 0.0); c.bench_function("filter f32", |b| { b.iter(|| bench_filter(&data_array, &filter_array)) }); c.bench_function("filter f32 high selectivity", |b| { b.iter(|| bench_filter(&data_array, &dense_filter_array)) }); + + c.bench_function("filter f32 nonull", |b| { + b.iter(|| bench_filter(&data_array_nonull, &filter_array)) + }); + c.bench_function("filter f32 nonull high selectivity", |b| { + b.iter(|| bench_filter(&data_array_nonull, &dense_filter_array)) + }); + c.bench_function("filter context f32", |b| { b.iter(|| bench_built_filter(&filter, &data_array)) }); diff --git a/src/compute/filter.rs b/src/compute/filter.rs index bdda0adbd6d..27ddf4fdee9 100644 --- a/src/compute/filter.rs +++ b/src/compute/filter.rs @@ -1,13 +1,15 @@ //! Contains operators to filter arrays such as [`filter`]. use crate::array::growable::{make_growable, Growable}; -use crate::bitmap::utils::{BitChunkIterExact, BitChunksExact}; +use crate::bitmap::utils::{BitChunk, BitChunkIterExact, BitChunksExact}; use crate::bitmap::{utils::SlicesIterator, Bitmap, MutableBitmap}; use crate::chunk::Chunk; use crate::datatypes::DataType; use crate::error::Result; use crate::types::simd::{NativeSimd, Simd}; -use crate::types::BitChunkIter; +use crate::types::BitChunkOnes; use crate::{array::*, types::NativeType}; +use num_traits::One; +use num_traits::Zero; /// Function that can filter arbitrary arrays pub type Filter<'a> = Box Box + 'a + Send + Sync>; @@ -21,20 +23,25 @@ where I: BitChunkIterExact<<::Simd as NativeSimd>::Chunk>, { let mut chunks = values.chunks_exact(T::Simd::LANES); - let mut new = Vec::::with_capacity(filter_count); let mut dst = new.as_mut_ptr(); chunks .by_ref() .zip(mask_chunks.by_ref()) .for_each(|(chunk, validity_chunk)| { - let iter = BitChunkIter::new(validity_chunk, T::Simd::LANES); - for (value, b) in chunk.iter().zip(iter) { - if b { - unsafe { - dst.write(*value); - dst = dst.add(1); - }; + let ones_iter = BitChunkOnes::new(validity_chunk); + + let (size, _) = ones_iter.size_hint(); + if size == T::Simd::LANES { + // Fast path: all lanes are set + unsafe { + std::ptr::copy(chunk.as_ptr(), dst, size); + dst = dst.add(size); + } + } else { + for pos in ones_iter { + dst.write(chunk[pos]); + dst = dst.add(1); } } }); @@ -74,22 +81,32 @@ where let mut validity_chunks = validity.chunks::<::Chunk>(); let mut new = Vec::::with_capacity(filter_count); - let mut new_validity = MutableBitmap::with_capacity(filter_count); let mut dst = new.as_mut_ptr(); + let mut new_validity = MutableBitmap::with_capacity(filter_count); + chunks .by_ref() .zip(validity_chunks.by_ref()) .zip(mask_chunks.by_ref()) .for_each(|((chunk, validity_chunk), mask_chunk)| { - let mask_iter = BitChunkIter::new(mask_chunk, T::Simd::LANES); - let validity_iter = BitChunkIter::new(validity_chunk, T::Simd::LANES); - for ((value, is_valid), is_selected) in chunk.iter().zip(validity_iter).zip(mask_iter) { - if is_selected { - unsafe { - dst.write(*value); - dst = dst.add(1); - new_validity.push_unchecked(is_valid); - }; + let ones_iter = BitChunkOnes::new(mask_chunk); + let (size, _) = ones_iter.size_hint(); + + if size == T::Simd::LANES { + // Fast path: all lanes are set + unsafe { + std::ptr::copy(chunk.as_ptr(), dst, size); + dst = dst.add(size); + new_validity.extend_from_slice(validity_chunk.to_ne_bytes().as_ref(), 0, size); + } + } else { + for pos in ones_iter { + dst.write(chunk[pos]); + dst = dst.add(1); + new_validity.push( + validity_chunk & (<<::Simd as NativeSimd>::Chunk>::one() << pos) + > <<::Simd as NativeSimd>::Chunk>::zero(), + ); } } }); diff --git a/src/types/bit_chunk.rs b/src/types/bit_chunk.rs index ee78ac7c436..22f677aae4a 100644 --- a/src/types/bit_chunk.rs +++ b/src/types/bit_chunk.rs @@ -1,30 +1,26 @@ use std::{ fmt::Binary, - ops::{BitAnd, BitAndAssign, BitOr, Not, Shl, ShlAssign, ShrAssign}, + ops::{BitAndAssign, Not, Shl, ShlAssign, ShrAssign}, }; +use num_traits::PrimInt; + use super::NativeType; /// A chunk of bits. This is used to create masks of a given length /// whose width is `1` bit. In `simd_packed` notation, this corresponds to `m1xY`. pub trait BitChunk: super::private::Sealed + + PrimInt + NativeType + Binary - + BitAnd + ShlAssign + Not + ShrAssign + ShlAssign + Shl - + Eq + BitAndAssign - + BitOr { - /// A value with a single bit set at the most right position. - fn one() -> Self; - /// A value with no bits set. - fn zero() -> Self; /// convert itself into bytes. fn to_ne_bytes(self) -> Self::Bytes; /// convert itself from bytes. @@ -34,11 +30,6 @@ pub trait BitChunk: macro_rules! bit_chunk { ($ty:ty) => { impl BitChunk for $ty { - #[inline(always)] - fn zero() -> Self { - 0 - } - #[inline(always)] fn to_ne_bytes(self) -> Self::Bytes { self.to_ne_bytes() @@ -48,11 +39,6 @@ macro_rules! bit_chunk { fn from_ne_bytes(v: Self::Bytes) -> Self { Self::from_ne_bytes(v) } - - #[inline(always)] - fn one() -> Self { - 1 - } } }; } @@ -113,6 +99,62 @@ impl Iterator for BitChunkIter { } } +// # Safety +// a mathematical invariant of this iterator +unsafe impl crate::trusted_len::TrustedLen for BitChunkIter {} + +/// An [`Iterator`] over a [`BitChunk`]. +/// This iterator returns the postion of bit set. +/// Refer: https://lemire.me/blog/2018/03/08/iterating-over-set-bits-quickly-simd-edition/ +/// # Example +/// ``` +/// use arrow2::types::BitChunkOnes; +/// let a = 0b00010000u8; +/// let iter = BitChunkOnes::new(a); +/// let r = iter.collect::>(); +/// assert_eq!(r, vec![4]); +/// ``` +pub struct BitChunkOnes { + value: T, + remaining: usize, +} + +impl BitChunkOnes { + /// Creates a new [`BitChunkOnes`] with `len` bits. + #[inline] + pub fn new(value: T) -> Self { + Self { + value, + remaining: value.count_ones() as usize, + } + } +} + +impl Iterator for BitChunkOnes { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + if self.remaining == 0 { + return None; + } + let v = self.value.trailing_zeros() as usize; + self.value &= self.value - T::one(); + + self.remaining -= 1; + Some(v) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.remaining, Some(self.remaining)) + } +} + +// # Safety +// a mathematical invariant of this iterator +unsafe impl crate::trusted_len::TrustedLen for BitChunkOnes {} + #[cfg(test)] mod tests { use super::*; @@ -125,4 +167,14 @@ mod tests { let r = iter.collect::>(); assert_eq!(r, (0..16).map(|x| x == 0 || x == 12).collect::>(),); } + + #[test] + fn test_ones() { + let a = [0b00000001, 0b00010000]; // 0th and 13th entry + let a = u16::from_ne_bytes(a); + let mut iter = BitChunkOnes::new(a); + assert_eq!(iter.size_hint(), (2, Some(2))); + assert_eq!(iter.next(), Some(0)); + assert_eq!(iter.next(), Some(12)); + } } diff --git a/src/types/mod.rs b/src/types/mod.rs index 4b794c55a1f..93e5b0667ce 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -21,7 +21,7 @@ //! for SIMD, at [`mod@simd`]. mod bit_chunk; -pub use bit_chunk::{BitChunk, BitChunkIter}; +pub use bit_chunk::{BitChunk, BitChunkIter, BitChunkOnes}; mod index; pub mod simd; pub use index::*;