Skip to content

Commit

Permalink
fix: xor filter build (#666)
Browse files Browse the repository at this point in the history
* test: add string column testcase

* init when builder is none

* add testcase
  • Loading branch information
jiacai2050 authored Feb 27, 2023
1 parent 9e06dcc commit 617ca95
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 13 deletions.
1 change: 1 addition & 0 deletions analytic_engine/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

//! Analytic table engine implementations

#![feature(option_get_or_insert_default)]
mod compaction;
mod context;
mod engine;
Expand Down
22 changes: 19 additions & 3 deletions analytic_engine/src/sst/parquet/meta_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,7 @@ impl RowGroupFilterBuilder {
}

pub(crate) fn add_key(&mut self, col_idx: usize, key: &[u8]) {
if let Some(b) = self.builders[col_idx].as_mut() {
b.insert(key)
}
self.builders[col_idx].get_or_insert_default().insert(key)
}

pub(crate) fn build(self) -> Result<RowGroupFilter> {
Expand Down Expand Up @@ -422,4 +420,22 @@ mod tests {
let decoded_parquet_filter = ParquetFilter::try_from(parquet_filter_pb).unwrap();
assert_eq!(decoded_parquet_filter, parquet_filter);
}

#[test]
fn test_row_group_filter_builder() {
let mut builders = RowGroupFilterBuilder::with_num_columns(1);
for key in ["host-123", "host-456", "host-789"] {
builders.add_key(0, key.as_bytes());
}
let row_group_filter = builders.build().unwrap();

let testcase = [("host-123", true), ("host-321", false)];
for (key, expected) in testcase {
let actual = row_group_filter
.contains_column_data(0, key.as_bytes())
.unwrap();

assert_eq!(expected, actual);
}
}
}
35 changes: 25 additions & 10 deletions components/parquet_ext/src/prune/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,43 +243,58 @@ mod test {
.unwrap()
}

fn int32_stat(min: i32, max: i32) -> Statistics {
Statistics::int32(Some(min), Some(max), None, 0, false)
}

fn string_stat(min: &str, max: &str) -> Statistics {
Statistics::byte_array(Some(min.into()), Some(max.into()), None, 0, false)
}

#[test]
fn test_row_group_filter() {
let testcases = vec![
// (expr, min, max, schema, expected)
(
col("a").eq(lit(5i64)), // a == 5
10,
20,
int32_stat(10, 20),
vec![("a", ArrowDataType::Int64)],
vec![],
),
(
col("a").eq(lit(14i64)), // a == 14
10,
20,
int32_stat(10, 20),
vec![("a", ArrowDataType::Int64)],
vec![0],
),
(
col("a").lt(col("b")), // a < b
10,
20,
int32_stat(10, 20),
vec![("a", ArrowDataType::Int32), ("b", ArrowDataType::Int32)],
// nothing actually gets calculated.
vec![0],
),
(
col("a").in_list(vec![lit(17i64), lit(100i64)], false), // a in (17, 100)
101,
200,
int32_stat(101, 200),
vec![("a", ArrowDataType::Int64)],
vec![],
),
(
col("hostname").eq(lit("host-1794")), // hostname == host-1794
string_stat("host-18000", "host-20000"),
vec![("hostname", ArrowDataType::Utf8)],
vec![],
),
(
col("hostname").eq(lit("host-1794")), // hostname == host-1794
string_stat("host-1000", "host-20000"),
vec![("hostname", ArrowDataType::Utf8)],
vec![0],
),
];

for (expr, min, max, schema, expected) in testcases {
let stat = Statistics::int32(Some(min), Some(max), None, 0, false);
for (expr, stat, schema, expected) in testcases {
let schema = prepare_arrow_schema(schema);
let metadata = prepare_metadata(&schema, stat);

Expand Down

0 comments on commit 617ca95

Please sign in to comment.