Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 30 additions & 16 deletions encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ mod tests {
use vortex_session::VortexSession;

use super::*;
use crate::BitPackedVTable;
use crate::bitpack_compress::bitpack_encode;

static SESSION: LazyLock<VortexSession> =
Expand Down Expand Up @@ -354,11 +353,15 @@ mod tests {
let bitpacked = bitpack_encode(&zeros, 10, None).unwrap();
assert_eq!(bitpacked.len(), 1025);
assert!(bitpacked.patches().is_some());
let bitpacked = bitpacked.slice(1023..1025).unwrap();
let actual = unpack_array(
bitpacked.as_::<BitPackedVTable>(),
&mut SESSION.create_execution_ctx(),
)?;
let slice_ref = bitpacked.into_array().slice(1023..1025).unwrap();
let actual = {
let mut ctx = SESSION.create_execution_ctx();
slice_ref
.clone()
.execute::<Canonical>(&mut ctx)
.unwrap()
.into_primitive()
};
assert_arrays_eq!(actual, PrimitiveArray::from_iter([1535u16, 1536]));
Ok(())
}
Expand All @@ -371,11 +374,15 @@ mod tests {
let bitpacked = bitpack_encode(&zeros, 10, None).unwrap();
assert_eq!(bitpacked.len(), 2229);
assert!(bitpacked.patches().is_some());
let bitpacked = bitpacked.slice(1023..2049).unwrap();
let actual = unpack_array(
bitpacked.as_::<BitPackedVTable>(),
&mut SESSION.create_execution_ctx(),
)?;
let slice_ref = bitpacked.into_array().slice(1023..2049).unwrap();
let actual = {
let mut ctx = SESSION.create_execution_ctx();
slice_ref
.clone()
.execute::<Canonical>(&mut ctx)
.unwrap()
.into_primitive()
};
assert_arrays_eq!(
actual,
PrimitiveArray::from_iter((1023u16..2049).map(|x| x + 512))
Expand Down Expand Up @@ -664,15 +671,22 @@ mod tests {
// Test with sliced array (offset > 0).
let values = PrimitiveArray::from_iter(0u32..2048);
let bitpacked = bitpack_encode(&values, 11, None).unwrap();
let sliced = bitpacked.slice(500..1500).unwrap();
let slice_ref = bitpacked.into_array().slice(500..1500).unwrap();
let sliced = {
let mut ctx = SESSION.create_execution_ctx();
slice_ref
.clone()
.execute::<Canonical>(&mut ctx)
.unwrap()
.into_primitive()
};

// Test all three methods on the sliced array.
let sliced_bp = sliced.as_::<BitPackedVTable>();
let primitive_result = unpack_to_primitive(sliced_bp);
let unpacked_array = unpack_array(sliced_bp, &mut SESSION.create_execution_ctx())?;
let primitive_result = sliced.clone();
let unpacked_array = sliced;
let executed = {
let mut ctx = SESSION.create_execution_ctx();
sliced.execute::<Canonical>(&mut ctx).unwrap()
slice_ref.clone().execute::<Canonical>(&mut ctx).unwrap()
};

assert_eq!(
Expand Down
7 changes: 5 additions & 2 deletions encodings/fastlanes/src/bitpacking/vtable/kernels/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@ use crate::BitPackedArray;
use crate::BitPackedVTable;
use crate::bitpacking::vtable::kernels::UNPACK_CHUNK_THRESHOLD;
use crate::bitpacking::vtable::kernels::chunked_indices;
use crate::bitpacking::vtable::kernels::slice::BitPackingSliceKernel;

pub(crate) const PARENT_KERNELS: ParentKernelSet<BitPackedVTable> =
ParentKernelSet::new(&[ParentKernelSet::lift(&BitPackingFilterKernel)]);
pub(crate) const PARENT_KERNELS: ParentKernelSet<BitPackedVTable> = ParentKernelSet::new(&[
ParentKernelSet::lift(&BitPackingFilterKernel),
ParentKernelSet::lift(&BitPackingSliceKernel),
]);

/// The threshold over which it is faster to fully unpack the entire [`BitPackedArray`] and then
/// filter the result than to unpack only specific bitpacked values into the output buffer.
Expand Down
1 change: 1 addition & 0 deletions encodings/fastlanes/src/bitpacking/vtable/kernels/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

pub(crate) mod filter;
pub(crate) mod slice;

/// Assuming the buffer is already allocated (which will happen at most once), then unpacking all
/// 1024 elements takes ~8.8x as long as unpacking a single element on an M2 Macbook Air.
Expand Down
111 changes: 111 additions & 0 deletions encodings/fastlanes/src/bitpacking/vtable/kernels/slice.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::cmp::max;

use vortex_array::ArrayRef;
use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::arrays::SliceArray;
use vortex_array::arrays::SliceVTable;
use vortex_array::kernel::ExecuteParentKernel;
use vortex_error::VortexResult;

use crate::BitPackedArray;
use crate::BitPackedVTable;

/// Kernel to execute slicing fused with bit-packed decoding.
#[derive(Debug)]
pub(crate) struct BitPackingSliceKernel;

impl ExecuteParentKernel<BitPackedVTable> for BitPackingSliceKernel {
type Parent = SliceVTable;

fn execute_parent(
&self,
array: &BitPackedArray,
parent: &SliceArray,
_child_idx: usize,
_ctx: &mut ExecutionCtx,
) -> VortexResult<Option<ArrayRef>> {
// TODO(joe): fix me https://github.com/vortex-data/vortex/pull/5958#discussion_r2696436008
// If buffers are on device, we cannot eagerly slice because Patches::slice
// requires binary search on the indices which needs host memory for now
if !array.is_host() {
return Ok(None);
}

let range = parent.slice_range().clone();
let offset_start = range.start + array.offset() as usize;
let offset_stop = range.end + array.offset() as usize;
let offset = offset_start % 1024;
let block_start = max(0, offset_start - offset);
let block_stop = offset_stop.div_ceil(1024) * 1024;

let encoded_start = (block_start / 8) * array.bit_width() as usize;
let encoded_stop = (block_stop / 8) * array.bit_width() as usize;

// slice the buffer using the encoded start/stop values
// SAFETY: slicing packed values without decoding preserves invariants
Ok(Some(unsafe {
BitPackedArray::new_unchecked(
array.packed().slice(encoded_start..encoded_stop),
array.dtype.clone(),
array.validity()?.slice(range.clone())?,
array
.patches()
.map(|p| p.slice(range.clone()))
.transpose()?
.flatten(),
array.bit_width(),
range.len(),
offset as u16,
)
.into_array()
}))
}
}

#[cfg(test)]
mod tests {
use std::sync::LazyLock;

use vortex_array::Array;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::SliceArray;
use vortex_array::session::ArraySession;
use vortex_array::vtable::VTable;
use vortex_error::VortexResult;
use vortex_session::VortexSession;

use crate::BitPackedVTable;
use crate::bitpack_compress::bitpack_encode;

static SESSION: LazyLock<VortexSession> =
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());

#[test]
fn test_execute_parent_returns_bitpacked_slice() -> VortexResult<()> {
let values = vortex_array::arrays::PrimitiveArray::from_iter(0u32..2048);
let bitpacked = bitpack_encode(&values, 11, None)?;

let slice_array = SliceArray::new(bitpacked.clone().into_array(), 500..1500);

let mut ctx = SESSION.create_execution_ctx();
let reduced = <BitPackedVTable as VTable>::execute_parent(
&bitpacked,
&slice_array.into_array(),
0,
&mut ctx,
)?
.expect("expected slice kernel to execute");

assert!(reduced.is::<BitPackedVTable>());
let reduced_bp = reduced.as_::<BitPackedVTable>();
assert_eq!(reduced_bp.offset(), 500);
assert_eq!(reduced.len(), 1000);

Ok(())
}
}
34 changes: 2 additions & 32 deletions encodings/fastlanes/src/bitpacking/vtable/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::cmp::max;
use std::ops::Range;

use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::DeserializeMetadata;
use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::ProstMetadata;
use vortex_array::SerializeMetadata;
use vortex_array::buffer::BufferHandle;
Expand All @@ -21,7 +19,6 @@ use vortex_array::vtable;
use vortex_array::vtable::ArrayId;
use vortex_array::vtable::NotSupported;
use vortex_array::vtable::VTable;
use vortex_array::vtable::ValidityHelper;
use vortex_array::vtable::ValidityVTableFromValidityHelper;
use vortex_dtype::DType;
use vortex_dtype::PType;
Expand Down Expand Up @@ -274,35 +271,8 @@ impl VTable for BitPackedVTable {
RULES.evaluate(array, parent, child_idx)
}

// TODO(joe): fix me https://github.com/vortex-data/vortex/pull/5958#discussion_r2696436008
fn slice(array: &Self::Array, range: Range<usize>) -> VortexResult<Option<ArrayRef>> {
let offset_start = range.start + array.offset() as usize;
let offset_stop = range.end + array.offset() as usize;
let offset = offset_start % 1024;
let block_start = max(0, offset_start - offset);
let block_stop = offset_stop.div_ceil(1024) * 1024;

let encoded_start = (block_start / 8) * array.bit_width() as usize;
let encoded_stop = (block_stop / 8) * array.bit_width() as usize;

// slice the buffer using the encoded start/stop values
// SAFETY: slicing packed values without decoding preserves invariants
Ok(Some(unsafe {
BitPackedArray::new_unchecked(
array.packed().slice(encoded_start..encoded_stop),
array.dtype.clone(),
array.validity().slice(range.clone())?,
array
.patches()
.map(|p| p.slice(range.clone()))
.transpose()?
.flatten(),
array.bit_width(),
range.len(),
offset as u16,
)
.into_array()
}))
fn slice(_array: &Self::Array, _range: Range<usize>) -> VortexResult<Option<ArrayRef>> {
Ok(None)
}
}

Expand Down
57 changes: 31 additions & 26 deletions encodings/fastlanes/src/bitpacking/vtable/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,22 @@ impl OperationsVTable<BitPackedVTable> for BitPackedVTable {

#[cfg(test)]
mod test {
use std::ops::Range;
use std::sync::LazyLock;

use vortex_array::Array;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::SliceArray;
use vortex_array::assert_arrays_eq;
use vortex_array::assert_nth_scalar;
use vortex_array::buffer::BufferHandle;
use vortex_array::compute::take;
use vortex_array::patches::Patches;
use vortex_array::session::ArraySession;
use vortex_array::validity::Validity;
use vortex_array::vtable::VTable;
use vortex_buffer::Alignment;
use vortex_buffer::Buffer;
use vortex_buffer::ByteBuffer;
Expand All @@ -46,18 +53,31 @@ mod test {
use crate::BitPackedArray;
use crate::BitPackedVTable;

static SESSION: LazyLock<vortex_session::VortexSession> =
LazyLock::new(|| vortex_session::VortexSession::empty().with::<ArraySession>());

fn slice_via_kernel(array: &BitPackedArray, range: Range<usize>) -> BitPackedArray {
let slice_array = SliceArray::new(array.clone().into_array(), range);
let mut ctx = SESSION.create_execution_ctx();
let sliced = <BitPackedVTable as VTable>::execute_parent(
array,
&slice_array.into_array(),
0,
&mut ctx,
)
.expect("execute_parent failed")
.expect("expected slice kernel to execute");
sliced.as_::<BitPackedVTable>().clone()
}

#[test]
pub fn slice_block() {
let arr = BitPackedArray::encode(
PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)).as_ref(),
6,
)
.unwrap();
let sliced = arr
.slice(1024..2048)
.unwrap()
.as_::<BitPackedVTable>()
.clone();
let sliced = slice_via_kernel(&arr, 1024..2048);
assert_nth_scalar!(sliced, 0, 1024u32 % 64);
assert_nth_scalar!(sliced, 1023, 2047u32 % 64);
assert_eq!(sliced.offset(), 0);
Expand All @@ -70,13 +90,8 @@ mod test {
PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)).as_ref(),
6,
)
.unwrap()
.into_array();
let sliced = arr
.slice(512..1434)
.unwrap()
.as_::<BitPackedVTable>()
.clone();
.unwrap();
let sliced = slice_via_kernel(&arr, 512..1434);
assert_nth_scalar!(sliced, 0, 512u32 % 64);
assert_nth_scalar!(sliced, 921, 1433u32 % 64);
assert_eq!(sliced.offset(), 512);
Expand Down Expand Up @@ -115,22 +130,13 @@ mod test {
PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)).as_ref(),
6,
)
.unwrap()
.into_array();
let sliced = arr
.slice(512..1434)
.unwrap()
.as_::<BitPackedVTable>()
.clone();
.unwrap();
let sliced = slice_via_kernel(&arr, 512..1434);
assert_nth_scalar!(sliced, 0, 512u32 % 64);
assert_nth_scalar!(sliced, 921, 1433u32 % 64);
assert_eq!(sliced.offset(), 512);
assert_eq!(sliced.len(), 922);
let doubly_sliced = sliced
.slice(127..911)
.unwrap()
.as_::<BitPackedVTable>()
.clone();
let doubly_sliced = slice_via_kernel(&sliced, 127..911);
assert_nth_scalar!(doubly_sliced, 0, (512u32 + 127) % 64);
assert_nth_scalar!(doubly_sliced, 783, (512u32 + 910) % 64);
assert_eq!(doubly_sliced.offset(), 639);
Expand All @@ -148,8 +154,7 @@ mod test {
assert_eq!(patch_indices.len(), 1);

// Slicing drops the empty patches array.
let sliced = array.slice(0..64).unwrap();
let sliced_bp = sliced.as_::<BitPackedVTable>();
let sliced_bp = slice_via_kernel(&array, 0..64);
assert!(sliced_bp.patches().is_none());
}

Expand Down
Loading
Loading