From 05c6227ba63e3f3b66f9328b96c49d686d866516 Mon Sep 17 00:00:00 2001 From: WEI Xikai Date: Wed, 18 Jan 2023 14:02:09 +0800 Subject: [PATCH] feat: add some logs for reading sst (#581) * feat: add some logs for reading sst * add build-slim target in the Makfile * captialize first letter of log message --- Makefile | 4 +++ .../src/sst/parquet/async_reader.rs | 2 +- .../src/sst/parquet/row_group_filter.rs | 29 +++++++++++++++++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 553b93feb0..15f36760c1 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,10 @@ build: ls -alh cd $(DIR); cargo build --release +build-slim: + ls -alh + cd $(DIR); cargo build --profile release-slim + build-asan: ls -alh export RUSTFLAGS=-Zsanitizer=address RUSTDOCFLAGS=-Zsanitizer=address diff --git a/analytic_engine/src/sst/parquet/async_reader.rs b/analytic_engine/src/sst/parquet/async_reader.rs index 9e40ff8b71..fe2434b283 100644 --- a/analytic_engine/src/sst/parquet/async_reader.rs +++ b/analytic_engine/src/sst/parquet/async_reader.rs @@ -373,7 +373,7 @@ impl ObjectStoreReader { impl Drop for ObjectStoreReader { fn drop(&mut self) { - info!("ObjectStoreReader dropped, metrics:{:?}", self.metrics); + debug!("ObjectStoreReader dropped, metrics:{:?}", self.metrics); } } diff --git a/analytic_engine/src/sst/parquet/row_group_filter.rs b/analytic_engine/src/sst/parquet/row_group_filter.rs index 33cd967a47..f8f07f16d1 100644 --- a/analytic_engine/src/sst/parquet/row_group_filter.rs +++ b/analytic_engine/src/sst/parquet/row_group_filter.rs @@ -8,6 +8,7 @@ use arrow::datatypes::SchemaRef; use common_types::datum::Datum; use datafusion::{prelude::Expr, scalar::ScalarValue}; use ethbloom::{Bloom, Input}; +use log::debug; use parquet::file::metadata::RowGroupMetaData; use parquet_ext::prune::{ equal::{self, ColumnPosition}, @@ -50,15 +51,39 @@ impl<'a> RowGroupFilter<'a> { } pub fn filter(&self) -> Vec { + debug!( + "Begin to filter row groups, total_row_groups:{}, bloom_filtering:{}, predicates:{:?}", + self.row_groups.len(), + self.blooms.is_some(), + self.predicates, + ); + let filtered0 = self.filter_by_min_max(); match self.blooms { Some(v) => { // TODO: We can do continuous filtering based on the `filtered0` to reduce the // filtering cost. let filtered1 = self.filter_by_bloom(v); - Self::intersect_filtered_row_groups(&filtered0, &filtered1) + let filtered = Self::intersect_filtered_row_groups(&filtered0, &filtered1); + + debug!( + "Finish filtering row groups by blooms and min_max, total_row_groups:{}, filtered_by_min_max:{}, filtered_by_blooms:{}, filtered_by_both:{}", + self.row_groups.len(), + filtered0.len(), + filtered1.len(), + filtered.len(), + ); + + filtered + } + None => { + debug!( + "Finish filtering row groups by min_max, total_row_groups:{}, filtered_row_groups:{}", + self.row_groups.len(), + filtered0.len(), + ); + filtered0 } - None => filtered0, } }