Skip to content

Commit

Permalink
rename bloom
Browse files Browse the repository at this point in the history
  • Loading branch information
jiacai2050 committed Feb 9, 2023
1 parent 4376658 commit bc77f15
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 16 deletions.
4 changes: 2 additions & 2 deletions analytic_engine/src/sst/meta_data/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl MetaData {
/// contains no extended custom information.
pub fn try_new(
parquet_meta_data: &parquet_ext::ParquetMetaData,
ignore_bloom_filter: bool,
ignore_sst_filter: bool,
) -> Result<Self> {
let file_meta_data = parquet_meta_data.file_metadata();
let kv_metas = file_meta_data
Expand All @@ -46,7 +46,7 @@ impl MetaData {
let custom = {
let mut sst_meta =
encoding::decode_sst_meta_data(&kv_metas[0]).context(DecodeCustomMetaData)?;
if ignore_bloom_filter {
if ignore_sst_filter {
sst_meta.sst_filter = None;
}

Expand Down
4 changes: 2 additions & 2 deletions analytic_engine/src/sst/parquet/async_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ impl<'a> Reader<'a> {
let meta_data = {
let parquet_meta_data = self.load_meta_data_from_storage().await?;

let ignore_bloom_filter = avoid_update_cache && empty_predicate;
MetaData::try_new(&parquet_meta_data, ignore_bloom_filter)
let ignore_sst_filter = avoid_update_cache && empty_predicate;
MetaData::try_new(&parquet_meta_data, ignore_sst_filter)
.map_err(|e| Box::new(e) as _)
.context(DecodeSstMeta)?
};
Expand Down
16 changes: 8 additions & 8 deletions analytic_engine/src/sst/parquet/meta_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub enum Error {
source,
backtrace
))]
InvalidXor8Filter {
ParseXor8Filter {
source: std::io::Error,
backtrace: Backtrace,
},
Expand All @@ -43,7 +43,7 @@ pub enum Error {
},

#[snafu(display(
"Unsupported bloom filter version, version:{}.\nBacktrace\n:{}",
"Unsupported sst_filter version, version:{}.\nBacktrace\n:{}",
version,
backtrace
))]
Expand Down Expand Up @@ -102,7 +102,7 @@ impl Filter for Xor8Filter {
Self: Sized,
{
Xor8::from_bytes(buf)
.context(InvalidXor8Filter)
.context(ParseXor8Filter)
.map(|xor8| Self { xor8 })
}
}
Expand Down Expand Up @@ -342,8 +342,8 @@ impl fmt::Debug for ParquetMetaData {
.field("time_range", &self.time_range)
.field("max_sequence", &self.max_sequence)
.field("schema", &self.schema)
// Avoid the messy output from bloom filter.
.field("has_bloom_filter", &self.sst_filter.is_some())
// Avoid the messy output from filter.
.field("has_filter", &self.sst_filter.is_some())
.field("collapsible_cols_idx", &self.collapsible_cols_idx)
.finish()
}
Expand Down Expand Up @@ -394,7 +394,7 @@ mod tests {
use super::*;

#[test]
fn test_conversion_sst_bloom_filter() {
fn test_conversion_sst_filter() {
let sst_filter = SstFilter {
row_group_filters: vec![
RowGroupFilter {
Expand Down Expand Up @@ -422,7 +422,7 @@ mod tests {
);
assert!(sst_filter_pb.row_group_filters[1].column_filters[1].is_empty());

let decoded_bloom_filter = SstFilter::try_from(sst_filter_pb).unwrap();
assert_eq!(decoded_bloom_filter, sst_filter);
let decoded_sst_filter = SstFilter::try_from(sst_filter_pb).unwrap();
assert_eq!(decoded_sst_filter, sst_filter);
}
}
6 changes: 3 additions & 3 deletions analytic_engine/src/sst/parquet/row_group_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::sst::reader::error::{OtherNoCause, Result};
/// predicates and filters.
///
/// Currently, two kinds of filters will be applied to such filtering:
/// min max & bloom filter.
/// min max & sst_filter.
pub struct RowGroupPruner<'a> {
schema: &'a SchemaRef,
row_groups: &'a [RowGroupMetaData],
Expand Down Expand Up @@ -68,7 +68,7 @@ impl<'a> RowGroupPruner<'a> {
let pruned = Self::intersect_pruned_row_groups(&pruned0, &pruned1);

debug!(
"Finish prune row groups by blooms and min_max, total_row_groups:{}, pruned_by_min_max:{}, pruned_by_blooms:{}, pruned_by_both:{}",
"Finish prune row groups by sst_filter and min_max, total_row_groups:{}, pruned_by_min_max:{}, pruned_by_blooms:{}, pruned_by_both:{}",
self.row_groups.len(),
pruned0.len(),
pruned1.len(),
Expand Down Expand Up @@ -101,7 +101,7 @@ impl<'a> RowGroupPruner<'a> {
.contains_column_data(col_pos.column_idx, &datum.to_bytes())?;
if exist {
// sst_filter has false positivity, that is to say we are unsure whether this
// value exists even if the bloom filter says it exists.
// value exists even if the sst_filter says it exists.
None
} else {
Some(negated)
Expand Down
2 changes: 1 addition & 1 deletion analytic_engine/src/sst/parquet/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ mod tests {
.unwrap()
.as_ref()
.clone();
// bloom filter is built insider sst writer, so overwrite to default for
// sst filter is built insider sst writer, so overwrite to default for
// comparison.
sst_meta_readback.sst_filter = Default::default();
assert_eq!(&sst_meta_readback, &ParquetMetaData::from(sst_meta));
Expand Down

0 comments on commit bc77f15

Please sign in to comment.