Skip to content

Commit

Permalink
feat: add some logs for reading sst (#581)
Browse files Browse the repository at this point in the history
* feat: add some logs for reading sst

* add build-slim target in the Makfile

* captialize first letter of log message
  • Loading branch information
ShiKaiWi authored Jan 18, 2023
1 parent 0163f60 commit 4f6a417
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ build:
ls -alh
cd $(DIR); cargo build --release

build-slim:
ls -alh
cd $(DIR); cargo build --profile release-slim

build-asan:
ls -alh
export RUSTFLAGS=-Zsanitizer=address RUSTDOCFLAGS=-Zsanitizer=address
Expand Down
2 changes: 1 addition & 1 deletion analytic_engine/src/sst/parquet/async_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ impl ObjectStoreReader {

impl Drop for ObjectStoreReader {
fn drop(&mut self) {
info!("ObjectStoreReader dropped, metrics:{:?}", self.metrics);
debug!("ObjectStoreReader dropped, metrics:{:?}", self.metrics);
}
}

Expand Down
29 changes: 27 additions & 2 deletions analytic_engine/src/sst/parquet/row_group_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use arrow::datatypes::SchemaRef;
use common_types::datum::Datum;
use datafusion::{prelude::Expr, scalar::ScalarValue};
use ethbloom::{Bloom, Input};
use log::debug;
use parquet::file::metadata::RowGroupMetaData;
use parquet_ext::prune::{
equal::{self, ColumnPosition},
Expand Down Expand Up @@ -50,15 +51,39 @@ impl<'a> RowGroupFilter<'a> {
}

pub fn filter(&self) -> Vec<usize> {
debug!(
"Begin to filter row groups, total_row_groups:{}, bloom_filtering:{}, predicates:{:?}",
self.row_groups.len(),
self.blooms.is_some(),
self.predicates,
);

let filtered0 = self.filter_by_min_max();
match self.blooms {
Some(v) => {
// TODO: We can do continuous filtering based on the `filtered0` to reduce the
// filtering cost.
let filtered1 = self.filter_by_bloom(v);
Self::intersect_filtered_row_groups(&filtered0, &filtered1)
let filtered = Self::intersect_filtered_row_groups(&filtered0, &filtered1);

debug!(
"Finish filtering row groups by blooms and min_max, total_row_groups:{}, filtered_by_min_max:{}, filtered_by_blooms:{}, filtered_by_both:{}",
self.row_groups.len(),
filtered0.len(),
filtered1.len(),
filtered.len(),
);

filtered
}
None => {
debug!(
"Finish filtering row groups by min_max, total_row_groups:{}, filtered_row_groups:{}",
self.row_groups.len(),
filtered0.len(),
);
filtered0
}
None => filtered0,
}
}

Expand Down

0 comments on commit 4f6a417

Please sign in to comment.