Skip to content

Commit

Permalink
Add row group pruning tests for different timestamp units
Browse files Browse the repository at this point in the history
  • Loading branch information
Weijun-H committed Feb 7, 2024
1 parent 4c05c91 commit 6c41090
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,8 @@ mod tests {
fn row_group_pruning_predicate_timestamp() {
// For the timestamp data type, parquet can use `INT64` to store the data.
// In this case, construct four types of statistics to filtered with the timestamp predication.

// Nanoseconds
let schema = Arc::new(Schema::new(vec![Field::new(
"c1",
DataType::Timestamp(TimeUnit::Nanosecond, None),
Expand Down Expand Up @@ -1151,13 +1153,97 @@ mod tests {
prune_row_groups_by_statistics(
&schema,
&schema_descr,
&[rgm1, rgm2, rgm3, rgm4],
&[rgm1.clone(), rgm2.clone(), rgm3.clone(), rgm4.clone()],
None,
Some(&pruning_predicate),
&metrics
),
vec![0, 1, 3]
);

// Microseconds
let schema = Arc::new(Schema::new(vec![Field::new(
"c1",
DataType::Timestamp(TimeUnit::Microsecond, None),
false,
)]));
let field = PrimitiveTypeField::new("c1", PhysicalType::INT64).with_logical_type(
LogicalType::Timestamp {
unit: parse_time_unit::MICROS(Default::default()),
is_adjusted_to_u_t_c: false,
},
);
let schema_descr = get_test_schema_descr(vec![field]);
let expr = col("c1").gt(lit(ScalarValue::TimestampMicrosecond(Some(1000), None)));
let expr = logical2physical(&expr, &schema);
let pruning_predicate = PruningPredicate::try_new(expr, schema.clone()).unwrap();
assert_eq!(
prune_row_groups_by_statistics(
&schema,
&schema_descr,
&[rgm1.clone(), rgm2.clone(), rgm3.clone(), rgm4.clone()],
None,
Some(&pruning_predicate),
&metrics
),
vec![0, 1, 3]
);

// Milliseconds
let schema = Arc::new(Schema::new(vec![Field::new(
"c1",
DataType::Timestamp(TimeUnit::Millisecond, None),
false,
)]));
let field = PrimitiveTypeField::new("c1", PhysicalType::INT64).with_logical_type(
LogicalType::Timestamp {
unit: parse_time_unit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: false,
},
);
let schema_descr = get_test_schema_descr(vec![field]);
let expr = col("c1").gt(lit(ScalarValue::TimestampMillisecond(Some(1000), None)));
let expr = logical2physical(&expr, &schema);
let pruning_predicate = PruningPredicate::try_new(expr, schema.clone()).unwrap();
assert_eq!(
prune_row_groups_by_statistics(
&schema,
&schema_descr,
&[rgm1.clone(), rgm2.clone(), rgm3.clone(), rgm4.clone()],
None,
Some(&pruning_predicate),
&metrics
),
vec![0, 1, 3]
);

// Seconds
let schema = Arc::new(Schema::new(vec![Field::new(
"c1",
DataType::Timestamp(TimeUnit::Second, None),
false,
)]));
let field = PrimitiveTypeField::new("c1", PhysicalType::INT64).with_logical_type(
LogicalType::Timestamp {
unit: parse_time_unit::MILLIS(Default::default()),
is_adjusted_to_u_t_c: false,
},
);
let schema_descr = get_test_schema_descr(vec![field]);
let expr = col("c1").gt(lit(ScalarValue::TimestampSecond(Some(1000), None)));
let expr = logical2physical(&expr, &schema);
let pruning_predicate = PruningPredicate::try_new(expr, schema.clone()).unwrap();
assert_eq!(
prune_row_groups_by_statistics(
&schema,
&schema_descr,
&[rgm1.clone(), rgm2.clone(), rgm3.clone(), rgm4.clone()],
None,
Some(&pruning_predicate),
&metrics
),
vec![3]
);
}

fn get_row_group_meta_data(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,10 +354,6 @@ mod test {
}

#[test]
#[should_panic(
expected = "Inconsistent types in ScalarValue::iter_to_array. Expected Int64, got TimestampNanosecond(NULL, None)"
)]
// Due to https://github.com/apache/arrow-datafusion/issues/8295
fn roundtrip_timestamp() {
Test {
input: timestamp_array([
Expand Down

0 comments on commit 6c41090

Please sign in to comment.