From b44656c5a94df1427db9ab90607f3b5031c27dfc Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 2 Jan 2025 12:49:56 -0500 Subject: [PATCH] feat: consume right_parts buffer in alp-rd decompression (#1785) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This slightly improves decompression throughput for larger arrays. Passing in an owned buffer is insufficient, (see the benchmarks for "passing a BufferMut"), we must actually mutate the array in-place. develop ``` alp_compress fastest │ slowest │ median │ mean │ samples │ iters ╰─ decompress_rd │ │ │ │ │ ├─ f32 │ │ │ │ │ │ ├─ 100000 59.62 µs │ 171.8 µs │ 65.7 µs │ 65.89 µs │ 100 │ 100 │ ├─ 1000000 747.4 µs │ 1.116 ms │ 788.4 µs │ 809.7 µs │ 100 │ 100 │ ╰─ 10000000 9.708 ms │ 14.55 ms │ 9.948 ms │ 10.05 ms │ 100 │ 100 ╰─ f64 │ │ │ │ │ ├─ 100000 67.24 µs │ 137.2 µs │ 67.49 µs │ 69.21 µs │ 100 │ 100 ├─ 1000000 1.168 ms │ 1.906 ms │ 1.272 ms │ 1.337 ms │ 100 │ 100 ╰─ 10000000 18.65 ms │ 23.48 ms │ 19.24 ms │ 19.8 ms │ 100 │ 100 ``` passing a BufferMut ``` alp_compress fastest │ slowest │ median │ mean │ samples │ iters ╰─ decompress_rd │ │ │ │ │ ├─ f32 │ │ │ │ │ │ ├─ 100000 60.16 µs │ 94.2 µs │ 66.81 µs │ 68.19 µs │ 100 │ 100 │ ├─ 1000000 738.8 µs │ 1.189 ms │ 765.6 µs │ 793.3 µs │ 100 │ 100 │ ╰─ 10000000 9.681 ms │ 10.89 ms │ 9.822 ms │ 9.869 ms │ 100 │ 100 ╰─ f64 │ │ │ │ │ ├─ 100000 67.54 µs │ 109.9 µs │ 77.41 µs │ 79.14 µs │ 100 │ 100 ├─ 1000000 1.209 ms │ 1.804 ms │ 1.293 ms │ 1.328 ms │ 100 │ 100 ╰─ 10000000 18.58 ms │ 25.08 ms │ 19.87 ms │ 20.22 ms │ 100 │ 100 ``` using for_each_with_index ``` alp_compress fastest │ slowest │ median │ mean │ samples │ iters ╰─ decompress_rd │ │ │ │ │ ├─ f32 │ │ │ │ │ │ ├─ 100000 46.62 µs │ 163.3 µs │ 50.16 µs │ 50.75 µs │ 100 │ 100 │ ├─ 1000000 628.6 µs │ 828.2 µs │ 675.2 µs │ 688.2 µs │ 100 │ 100 │ ╰─ 10000000 8.016 ms │ 9.968 ms │ 8.114 ms │ 8.175 ms │ 100 │ 100 ╰─ f64 │ │ │ │ │ ├─ 100000 63.87 µs │ 86.29 µs │ 68.66 µs │ 68.3 µs │ 100 │ 100 ├─ 1000000 934.4 µs │ 2.173 ms │ 1.085 ms │ 1.09 ms │ 100 │ 100 ╰─ 10000000 14.18 ms │ 17.89 ms │ 14.52 ms │ 14.78 ms │ 100 │ 100 ``` --- encodings/alp/src/alp_rd/array.rs | 4 ++-- encodings/alp/src/alp_rd/mod.rs | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index 0ccd4cc8e2..c548ad4fb2 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -204,7 +204,7 @@ impl IntoCanonical for ALPRDArray { left_parts.into_buffer::(), left_parts_dict, self.metadata().right_bit_width, - right_parts.into_buffer::(), + right_parts.into_buffer_mut::(), self.left_parts_patches(), )?, self.logical_validity().into_validity(), @@ -215,7 +215,7 @@ impl IntoCanonical for ALPRDArray { left_parts.into_buffer::(), left_parts_dict, self.metadata().right_bit_width, - right_parts.into_buffer::(), + right_parts.into_buffer_mut::(), self.left_parts_patches(), )?, self.logical_validity().into_validity(), diff --git a/encodings/alp/src/alp_rd/mod.rs b/encodings/alp/src/alp_rd/mod.rs index 17c8d1436f..e03276dc35 100644 --- a/encodings/alp/src/alp_rd/mod.rs +++ b/encodings/alp/src/alp_rd/mod.rs @@ -260,7 +260,7 @@ pub fn alp_rd_decode( left_parts: Buffer, left_parts_dict: &[u16], right_bit_width: u8, - right_parts: Buffer, + right_parts: BufferMut, left_parts_patches: Option, ) -> VortexResult> { if left_parts.len() != right_parts.len() { @@ -288,13 +288,15 @@ pub fn alp_rd_decode( } // Shift the left-parts and add in the right-parts. - Ok( - BufferMut::::from_iter(values.iter().zip(right_parts.iter()).map(|(left, right)| { - let left = ::from_u16(*left); - T::from_bits((left << (right_bit_width as usize)) | *right) - })) - .freeze(), - ) + let mut index = 0; + Ok(right_parts + .map_each(|right| { + let left = values[index]; + index += 1; + let left = ::from_u16(left); + T::from_bits((left << (right_bit_width as usize)) | right) + }) + .freeze()) } /// Find the best "cut point" for a set of floating point values such that we can