Skip to content

Commit 818e6e3

Browse files
committed
Use set_bits from transform::util
1 parent 9eed57c commit 818e6e3

File tree

8 files changed

+250
-307
lines changed

8 files changed

+250
-307
lines changed

arrow/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ harness = false
104104
name = "boolean_kernels"
105105
harness = false
106106

107+
[[bench]]
108+
name = "boolean_append_packed"
109+
harness = false
110+
107111
[[bench]]
108112
name = "arithmetic_kernels"
109113
harness = false
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
use arrow::array::BooleanBufferBuilder;
2+
use criterion::{criterion_group, criterion_main, Criterion};
3+
use rand::{thread_rng, Rng};
4+
5+
fn rand_bytes(len: usize) -> Vec<u8> {
6+
let mut rng = thread_rng();
7+
let mut buf = vec![0_u8; len];
8+
rng.fill(buf.as_mut_slice());
9+
buf
10+
}
11+
12+
fn boolean_append_packed(c: &mut Criterion) {
13+
let mut rng = thread_rng();
14+
let source = rand_bytes(1024);
15+
let ranges: Vec<_> = (0..100)
16+
.into_iter()
17+
.map(|_| {
18+
let start: usize = rng.gen_range(0..1024 * 8);
19+
let end: usize = rng.gen_range(start..1024 * 8);
20+
start..end
21+
})
22+
.collect();
23+
24+
let total_bits: usize = ranges.iter().map(|x| x.end - x.start).sum();
25+
26+
c.bench_function("boolean_append_packed", |b| {
27+
b.iter(|| {
28+
let mut buffer = BooleanBufferBuilder::new(total_bits);
29+
for range in &ranges {
30+
buffer.append_packed_range(range.clone(), &source);
31+
}
32+
assert_eq!(buffer.len(), total_bits);
33+
})
34+
});
35+
}
36+
37+
criterion_group!(benches, boolean_append_packed);
38+
criterion_main!(benches);

arrow/src/array/builder.rs

Lines changed: 10 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -399,60 +399,6 @@ impl BooleanBufferBuilder {
399399
}
400400
}
401401

402-
/// Append `count` bits from `to_set`
403-
///
404-
/// `to_set` is a slice of bits packed LSB-first into `[u8]`
405-
///
406-
/// # Panics
407-
///
408-
/// Panics if `to_set` does not contain `ceil(count / 8)` bytes
409-
#[inline]
410-
pub fn append_packed(&mut self, count: usize, to_set: &[u8]) {
411-
assert_eq!((count + 7) >> 3, to_set.len());
412-
413-
let new_len = self.len + count;
414-
let new_buf_len = (new_len + 7) >> 3;
415-
self.buffer.reserve(new_buf_len - self.buffer.len());
416-
417-
let whole_bytes = count >> 3;
418-
let overrun = count & 7;
419-
420-
let skew = self.len & 7;
421-
if skew == 0 {
422-
self.buffer.extend_from_slice(&to_set[..whole_bytes]);
423-
if overrun > 0 {
424-
let masked = to_set[whole_bytes] & ((1 << overrun) - 1);
425-
self.buffer.push(masked)
426-
}
427-
428-
self.len = new_len;
429-
debug_assert_eq!(self.buffer.len(), new_buf_len);
430-
return;
431-
}
432-
433-
for to_set_byte in &to_set[..whole_bytes] {
434-
let low = *to_set_byte << skew;
435-
let high = *to_set_byte >> (8 - skew);
436-
437-
*self.buffer.last_mut().unwrap() |= low;
438-
self.buffer.push(high);
439-
}
440-
441-
if overrun > 0 {
442-
let masked = to_set[whole_bytes] & ((1 << overrun) - 1);
443-
let low = masked << skew;
444-
*self.buffer.last_mut().unwrap() |= low;
445-
446-
if overrun > 8 - skew {
447-
let high = masked >> (8 - skew);
448-
self.buffer.push(high)
449-
}
450-
}
451-
452-
self.len = new_len;
453-
debug_assert_eq!(self.buffer.len(), new_buf_len);
454-
}
455-
456402
/// Append `range` bits from `to_set`
457403
///
458404
/// `to_set` is a slice of bits packed LSB-first into `[u8]`
@@ -461,31 +407,16 @@ impl BooleanBufferBuilder {
461407
///
462408
/// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
463409
pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
464-
let count = range.end - range.start;
465-
if count == 0 {
466-
return;
467-
}
468-
469-
let start_byte = range.start >> 3;
470-
let end_byte = (range.end + 7) >> 3;
471-
let skew = range.start & 7;
472-
473-
// `append_packed` requires the provided `to_set` to be byte aligned, therefore
474-
// if the range being copied is not byte aligned we must first append
475-
// the leading bits to reach a byte boundary
476-
if skew == 0 {
477-
// No skew can simply append bytes directly
478-
self.append_packed(count, &to_set[start_byte..end_byte])
479-
} else if start_byte + 1 == end_byte {
480-
// Append bits from single byte
481-
self.append_packed(count, &[to_set[start_byte] >> skew])
482-
} else {
483-
// Append trailing bits from first byte to reach byte boundary, then append
484-
// bits from the remaining byte-aligned mask
485-
let offset = 8 - skew;
486-
self.append_packed(offset, &[to_set[start_byte] >> skew]);
487-
self.append_packed(count - offset, &to_set[(start_byte + 1)..end_byte]);
488-
}
410+
let offset_write = self.len;
411+
let len = range.end - range.start;
412+
self.advance(len);
413+
crate::util::bit_mask::set_bits(
414+
self.buffer.as_slice_mut(),
415+
to_set,
416+
offset_write,
417+
range.start,
418+
len,
419+
);
489420
}
490421

491422
#[inline]
@@ -2925,37 +2856,6 @@ mod tests {
29252856
assert!(buffer.get_bit(11));
29262857
}
29272858

2928-
#[test]
2929-
fn test_bit_append_packed() {
2930-
let mut buffer = BooleanBufferBuilder::new(0);
2931-
2932-
buffer.append_packed(8, &[0b11111111]);
2933-
assert_eq!(buffer.buffer.as_slice(), &[0b11111111]);
2934-
assert_eq!(buffer.len(), 8);
2935-
2936-
buffer.append_packed(3, &[0b01010010]);
2937-
assert_eq!(buffer.buffer.as_slice(), &[0b11111111, 0b00000010]);
2938-
assert_eq!(buffer.len(), 11);
2939-
2940-
buffer.append_packed(5, &[0b00010100]);
2941-
assert_eq!(buffer.buffer.as_slice(), &[0b11111111, 0b10100010]);
2942-
assert_eq!(buffer.len(), 16);
2943-
2944-
buffer.append_packed(2, &[0b11110010]);
2945-
assert_eq!(
2946-
buffer.buffer.as_slice(),
2947-
&[0b11111111, 0b10100010, 0b00000010]
2948-
);
2949-
assert_eq!(buffer.len(), 18);
2950-
2951-
buffer.append_packed(15, &[0b11011010, 0b01010101]);
2952-
assert_eq!(
2953-
buffer.buffer.as_slice(),
2954-
&[0b11111111, 0b10100010, 0b01101010, 0b01010111, 0b00000001]
2955-
);
2956-
assert_eq!(buffer.len(), 33);
2957-
}
2958-
29592859
#[test]
29602860
fn test_bool_buffer_fuzz() {
29612861
use rand::prelude::*;
@@ -2964,19 +2864,6 @@ mod tests {
29642864
let mut all_bools = vec![];
29652865
let mut rng = rand::thread_rng();
29662866

2967-
for _ in 0..100 {
2968-
let mask_length = (rng.next_u32() % 50) as usize;
2969-
let bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
2970-
.take(mask_length)
2971-
.collect();
2972-
2973-
let mut compacted = BooleanBufferBuilder::new(mask_length);
2974-
compacted.append_slice(&bools);
2975-
2976-
buffer.append_packed(mask_length, compacted.buffer.as_slice());
2977-
all_bools.extend_from_slice(&bools);
2978-
}
2979-
29802867
let src_len = 32;
29812868
let (src, compacted_src) = {
29822869
let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))

arrow/src/array/transform/boolean.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,8 @@
1616
// under the License.
1717

1818
use crate::array::ArrayData;
19-
20-
use super::{
21-
Extend, _MutableArrayData,
22-
utils::{resize_for_bits, set_bits},
23-
};
19+
use crate::util::bit_mask::set_bits;
20+
use super::{Extend, _MutableArrayData, utils::resize_for_bits};
2421

2522
pub(super) fn build_extend(array: &ArrayData) -> Extend {
2623
let values = array.buffers()[0].as_slice();

arrow/src/array/transform/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits
9797
let bytes = bitmap.bits.as_slice();
9898
Box::new(move |mutable, start, len| {
9999
utils::resize_for_bits(&mut mutable.null_buffer, mutable.len + len);
100-
mutable.null_count += utils::set_bits(
100+
mutable.null_count += crate::util::bit_mask::set_bits(
101101
mutable.null_buffer.as_slice_mut(),
102102
bytes,
103103
mutable.len,

0 commit comments

Comments
 (0)