Skip to content

Commit 7b4a2f1

Browse files
committed
Add IterationStrategy::None
1 parent 2aa46b0 commit 7b4a2f1

File tree

1 file changed

+33
-31
lines changed

1 file changed

+33
-31
lines changed

arrow/src/compute/kernels/filter.rs

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -330,20 +330,7 @@ impl FilterBuilder {
330330
};
331331

332332
let count = filter_count(&filter);
333-
let strategy = if count == filter.len() {
334-
IterationStrategy::All
335-
} else {
336-
// Compute the selectivity of the predicate by dividing the number of true
337-
// bits in the predicate by the predicate's total length
338-
//
339-
// This can then be used as a heuristic for the optimal iteration strategy
340-
let selectivity_frac = count as f64 / filter.len() as f64;
341-
if selectivity_frac > FILTER_SLICES_SELECTIVITY_THRESHOLD {
342-
IterationStrategy::SlicesIterator
343-
} else {
344-
IterationStrategy::IndexIterator
345-
}
346-
};
333+
let strategy = IterationStrategy::default_strategy(filter.len(), count);
347334

348335
Self {
349336
filter,
@@ -395,6 +382,32 @@ enum IterationStrategy {
395382
Slices(Vec<(usize, usize)>),
396383
/// Select all rows
397384
All,
385+
/// Select no rows
386+
None,
387+
}
388+
389+
impl IterationStrategy {
390+
/// The default [`IterationStrategy`] for a filter of length `filter_length`
391+
/// and selecting `filter_count` rows
392+
fn default_strategy(filter_length: usize, filter_count: usize) -> Self {
393+
if filter_length == 0 || filter_count == 0 {
394+
return IterationStrategy::None;
395+
}
396+
397+
if filter_count == filter_length {
398+
return IterationStrategy::All;
399+
}
400+
401+
// Compute the selectivity of the predicate by dividing the number of true
402+
// bits in the predicate by the predicate's total length
403+
//
404+
// This can then be used as a heuristic for the optimal iteration strategy
405+
let selectivity_frac = filter_count as f64 / filter_length as f64;
406+
if selectivity_frac > FILTER_SLICES_SELECTIVITY_THRESHOLD {
407+
return IterationStrategy::SlicesIterator;
408+
}
409+
IterationStrategy::IndexIterator
410+
}
398411
}
399412

400413
/// A filtering predicate that can be applied to an [`Array`]
@@ -421,16 +434,9 @@ fn filter_array(values: &dyn Array, predicate: &FilterPredicate) -> Result<Array
421434
)));
422435
}
423436

424-
match predicate.count {
425-
0 => {
426-
// return empty
427-
Ok(new_empty_array(values.data_type()))
428-
}
429-
len if len == values.len() => {
430-
// return all
431-
let data = values.data().clone();
432-
Ok(make_array(data))
433-
}
437+
match predicate.strategy {
438+
IterationStrategy::None => Ok(new_empty_array(values.data_type())),
439+
IterationStrategy::All => Ok(make_array(values.data().slice(0, predicate.count))),
434440
// actually filter
435441
_ => match values.data_type() {
436442
DataType::Boolean => {
@@ -634,7 +640,7 @@ fn filter_bits(buffer: &Buffer, offset: usize, predicate: &FilterPredicate) -> B
634640
}
635641
builder.finish()
636642
}
637-
IterationStrategy::All => buffer.clone(),
643+
IterationStrategy::All | IterationStrategy::None => unreachable!(),
638644
}
639645
}
640646

@@ -703,9 +709,7 @@ where
703709
// SAFETY: `Vec::iter` is trusted length
704710
unsafe { MutableBuffer::from_trusted_len_iter(iter) }
705711
}
706-
IterationStrategy::All => {
707-
return PrimitiveArray::from(data.slice(0, predicate.filter.len()))
708-
}
712+
IterationStrategy::All | IterationStrategy::None => unreachable!(),
709713
};
710714

711715
let mut builder = ArrayDataBuilder::new(data.data_type().clone())
@@ -822,9 +826,7 @@ where
822826
filter.extend_idx(IndexIterator::new(&predicate.filter, predicate.count))
823827
}
824828
IterationStrategy::Indices(indices) => filter.extend_idx(indices.iter().cloned()),
825-
IterationStrategy::All => {
826-
return GenericStringArray::from(data.slice(0, predicate.filter.len()))
827-
}
829+
IterationStrategy::All | IterationStrategy::None => unreachable!(),
828830
}
829831

830832
let mut builder = ArrayDataBuilder::new(data.data_type().clone())

0 commit comments

Comments
 (0)