Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions encodings/alp/src/alp/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ where
0,
valid_exceptional_positions.into_array(),
valid_exceptional_values,
// TODO(0ax1): handle chunk offsets
None,
))
};
Ok((exponents, encoded_array, patches))
Expand Down
11 changes: 9 additions & 2 deletions encodings/alp/src/alp/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl SerdeVTable<ALPVTable> for ALPVTable {
.map(|p| {
let indices = children.get(1, &p.indices_dtype(), p.len())?;
let values = children.get(2, dtype, p.len())?;
Ok::<_, VortexError>(Patches::new(len, p.offset(), indices, values))
Ok::<_, VortexError>(Patches::new(len, p.offset(), indices, values, None))
})
.transpose()?;

Expand Down Expand Up @@ -114,7 +114,14 @@ mod tests {
check_metadata(
"alp.metadata",
ProstMetadata(ALPMetadata {
patches: Some(PatchesMetadata::new(usize::MAX, usize::MAX, PType::U64)),
patches: Some(PatchesMetadata::new(
usize::MAX,
usize::MAX,
PType::U64,
None,
None,
None,
)),
exp_e: u32::MAX,
exp_f: u32::MAX,
}),
Expand Down
12 changes: 10 additions & 2 deletions encodings/alp/src/alp_rd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#![allow(clippy::cast_possible_truncation)]

pub use array::*;
use vortex_array::IntoArray;
use vortex_array::patches::Patches;
use vortex_array::validity::Validity;

Expand All @@ -19,7 +20,7 @@ use num_traits::{Float, One, PrimInt};
use rustc_hash::FxBuildHasher;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::vtable::ValidityHelper;
use vortex_array::{Array, IntoArray, ToCanonical};
use vortex_array::{Array, ToCanonical};
use vortex_buffer::{Buffer, BufferMut};
use vortex_dtype::{DType, NativePType, match_each_integer_ptype};
use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic};
Expand Down Expand Up @@ -247,7 +248,14 @@ impl RDEncoder {
.into_array()
};

Patches::new(doubles.len(), 0, packed_pos, exceptions.into_array())
Patches::new(
doubles.len(),
0,
packed_pos,
exceptions.into_array(),
// TODO(0ax1): handle chunk offsets
None,
)
});

ALPRDArray::try_new(
Expand Down
19 changes: 17 additions & 2 deletions encodings/alp/src/alp_rd/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,15 @@ impl SerdeVTable<ALPRDVTable> for ALPRDVTable {
.map(|p| {
let indices = children.get(2, &p.indices_dtype(), p.len())?;
let values = children.get(3, &left_parts_dtype, p.len())?;
Ok::<_, VortexError>(Patches::new(len, p.offset(), indices, values))

Ok::<_, VortexError>(Patches::new(
len,
p.offset(),
indices,
values,
// TODO(0ax1): handle chunk offsets
None,
))
})
.transpose()?;

Expand Down Expand Up @@ -170,7 +178,14 @@ mod test {
"alprd.metadata",
ProstMetadata(ALPRDMetadata {
right_bit_width: u32::MAX,
patches: Some(PatchesMetadata::new(usize::MAX, usize::MAX, PType::U64)),
patches: Some(PatchesMetadata::new(
usize::MAX,
usize::MAX,
PType::U64,
None,
None,
None,
)),
dict: Vec::new(),
left_parts_ptype: PType::U64 as i32,
dict_len: 8,
Expand Down
105 changes: 100 additions & 5 deletions encodings/fastlanes/src/bitpacking/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,20 @@ where
let mut indices: BufferMut<P> = BufferMut::with_capacity(num_exceptions_hint);
let mut values: BufferMut<T> = BufferMut::with_capacity(num_exceptions_hint);

for (i, v) in data.iter().enumerate() {
if (v.leading_zeros() as usize) < T::PTYPE.bit_width() - bit_width as usize
&& validity_mask.value(i)
let total_chunks = data.len().div_ceil(1024);
let mut chunk_offsets: BufferMut<u64> = BufferMut::with_capacity(total_chunks);

for (idx, value) in data.iter().enumerate() {
if (idx % 1024) == 0 {
// Record the patch index offset for each chunk.
chunk_offsets.push(values.len() as u64);
}

if (value.leading_zeros() as usize) < T::PTYPE.bit_width() - bit_width as usize
&& validity_mask.value(idx)
{
indices.push(P::from(i).vortex_expect("cast index from usize"));
values.push(*v);
indices.push(P::from(idx).vortex_expect("cast index from usize"));
values.push(*value);
}
}

Expand All @@ -271,6 +279,7 @@ where
0,
indices.into_array(),
PrimitiveArray::new(values, patch_validity).into_array(),
Some(chunk_offsets.into_array()),
)
})
}
Expand Down Expand Up @@ -808,4 +817,90 @@ mod test {
// Verify all values were correctly unpacked including patches.
assert_eq!(result.as_slice::<u32>(), &values);
}

#[test]
fn test_chunk_offsets() {
let patch_value = 1u32 << 20;
let patch_indices = [100usize, 200, 3000, 3100];
let mut values = vec![0u32; 4096usize];

patch_indices
.iter()
.for_each(|&idx| values[idx] = patch_value);

let array = PrimitiveArray::from_iter(values);
let bitpacked = bitpack_encode(&array, 4, None).unwrap();

let patches = bitpacked.patches().unwrap();
let chunk_offsets = patches.chunk_offsets().as_ref().unwrap().to_primitive();

// chunk 0 (0-1023): patches at 100, 200 -> starts at patch index 0
// chunk 1 (1024-2047): no patches -> points to patch index 2
// chunk 2 (2048-3071): patch at 3000 -> starts at patch index 2
// chunk 3 (3072-4095): patch at 3100 -> starts at patch index 3
assert_eq!(chunk_offsets.as_slice::<u64>(), &[0, 2, 2, 3]);
}

#[test]
fn test_chunk_offsets_no_patches_in_middle() {
let patch_value = 1u32 << 20;
let patch_indices = [100usize, 200, 2500];
let mut values = vec![0u32; 3072usize];

patch_indices
.iter()
.for_each(|&idx| values[idx] = patch_value);

let array = PrimitiveArray::from_iter(values);
let bitpacked = bitpack_encode(&array, 4, None).unwrap();

let patches = bitpacked.patches().unwrap();
let chunk_offsets = patches.chunk_offsets().as_ref().unwrap().to_primitive();

assert_eq!(chunk_offsets.as_slice::<u64>(), &[0, 2, 2]);
}

#[test]
fn test_chunk_offsets_trailing_empty_chunks() {
let patch_value = 1u32 << 20;
let patch_indices = [100usize, 200, 1500];
let mut values = vec![0u32; 5120usize];

patch_indices
.iter()
.for_each(|&idx| values[idx] = patch_value);

let array = PrimitiveArray::from_iter(values);
let bitpacked = bitpack_encode(&array, 4, None).unwrap();

let patches = bitpacked.patches().unwrap();
let chunk_offsets = patches.chunk_offsets().as_ref().unwrap().to_primitive();

// chunk 0 (0-1023): patches at 100, 200 -> starts at patch index 0
// chunk 1 (1024-2047): patch at 1500 -> starts at patch index 2
// chunk 2 (2048-3071): no patches -> points to patch index 3
// chunk 3 (3072-4095): no patches -> points to patch index 3 (remaining chunks filled)
// chunk 4 (4096-5119): no patches -> points to patch index 3 (remaining chunks filled)
assert_eq!(chunk_offsets.as_slice::<u64>(), &[0, 2, 3, 3, 3]);
}

#[test]
fn test_chunk_offsets_single_chunk() {
let patch_value = 1u32 << 20;
let patch_indices = [100usize, 200];
let mut values = vec![0u32; 500usize];

patch_indices
.iter()
.for_each(|&idx| values[idx] = patch_value);

let array = PrimitiveArray::from_iter(values);
let bitpacked = bitpack_encode(&array, 4, None).unwrap();

let patches = bitpacked.patches().unwrap();
let chunk_offsets = patches.chunk_offsets().as_ref().unwrap().to_primitive();

// Single chunk starting at patch index 0.
assert_eq!(chunk_offsets.as_slice::<u64>(), &[0]);
}
}
1 change: 1 addition & 0 deletions encodings/fastlanes/src/bitpacking/compute/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ impl CastKernel for BitPackedVTable {
patches.offset(),
patches.indices().clone(),
new_values,
patches.chunk_offsets().clone(),
))
})
.transpose()?,
Expand Down
1 change: 1 addition & 0 deletions encodings/fastlanes/src/bitpacking/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ mod test {
0,
buffer![1u32].into_array(),
PrimitiveArray::new(buffer![999u32], Validity::AllValid).to_array(),
None,
)),
1,
8,
Expand Down
31 changes: 26 additions & 5 deletions encodings/fastlanes/src/bitpacking/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ impl SerdeVTable<BitPackedVTable> for BitPackedVTable {
})))
}

/// Deserialize a BitPackedArray from its components.
///
/// Note that the layout depends on whether patches and chunk_offsets are present:
/// - No patches: `[validity?]`
/// - With patches: `[patch_indices, patch_values, chunk_offsets?, validity?]`
fn build(
_encoding: &BitPackedEncoding,
dtype: &DType,
Expand Down Expand Up @@ -66,19 +71,35 @@ impl SerdeVTable<BitPackedVTable> for BitPackedVTable {
}
};

// Load validity from the zero'th or second child, depending on whether patches are present.
let validity = if metadata.patches.is_some() {
load_validity(2)?
let validity_idx = if let Some(patches_meta) = &metadata.patches {
if patches_meta.chunk_offsets_dtype().is_some() {
3
} else {
2
}
} else {
load_validity(0)?
0
};

let validity = load_validity(validity_idx)?;

let patches = metadata
.patches
.map(|p| {
let indices = children.get(0, &p.indices_dtype(), p.len())?;
let values = children.get(1, dtype, p.len())?;
Ok::<_, VortexError>(Patches::new(len, p.offset(), indices, values))
let chunk_offsets = p
.chunk_offsets_dtype()
.map(|dtype| children.get(2, &dtype, p.chunk_offsets_len() as usize))
.transpose()?;

Ok::<_, VortexError>(Patches::new(
len,
p.offset(),
indices,
values,
chunk_offsets,
))
})
.transpose()?;

Expand Down
15 changes: 11 additions & 4 deletions encodings/sparse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,9 @@ impl SparseArray {
);
}

let patches = Patches::new(len, 0, indices, values);

Ok(Self {
patches,
// TODO(0ax1): handle chunk offsets
patches: Patches::new(len, 0, indices, values, None),
fill_value,
stats_set: Default::default(),
})
Expand Down Expand Up @@ -133,7 +132,15 @@ impl SparseArray {
.vortex_expect("Patches offset must cast to the indices dtype");
let indices = sub_scalar(patches.indices(), indices_offset)
.vortex_expect("must be able to subtract offset from indices");
Patches::new(patches.array_len(), 0, indices, patches.values().clone())

Patches::new(
patches.array_len(),
0,
indices,
patches.values().clone(),
// TODO(0ax1): handle chunk offsets
None,
)
}

#[inline]
Expand Down
9 changes: 8 additions & 1 deletion vortex-array/benches/take_patches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,14 @@ fn fixture(len: usize, sparsity: f64, rng: &mut StdRng) -> Patches {
.collect::<Buffer<u64>>();
let sparse_len = indices.len();
let values = Buffer::from_iter((0..sparse_len).map(|x| x as u64)).into_array();
Patches::new(len, 0, indices.into_array(), values)
Patches::new(
len,
0,
indices.into_array(),
values,
// TODO(0ax1): handle chunk offsets
None,
)
}

fn indices(array_len: usize, n_indices: usize, rng: &mut StdRng) -> ArrayRef {
Expand Down
3 changes: 3 additions & 0 deletions vortex-array/src/array/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,8 @@ pub trait ArrayChildVisitor {
fn visit_patches(&mut self, patches: &Patches) {
self.visit_child("patch_indices", patches.indices());
self.visit_child("patch_values", patches.values());
if let Some(chunk_offsets) = patches.chunk_offsets() {
self.visit_child("patch_chunk_offsets", chunk_offsets);
}
}
}
2 changes: 2 additions & 0 deletions vortex-array/src/arrays/bool/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ mod tests {
0,
buffer![4u32].into_array(), // This creates a non-nullable array
BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
None,
);
let arr = arr.patch(&patches);
let arr_len = arr.len();
Expand Down Expand Up @@ -432,6 +433,7 @@ mod tests {
0,
PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
None,
);
let arr = arr.patch(&patches);
assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
Expand Down
Loading
Loading