Skip to content

Commit ef8b5ea

Browse files
committed
Remove redundant code in favor of min/max/add
1 parent 7ef5c4b commit ef8b5ea

File tree

3 files changed

+17
-84
lines changed

3 files changed

+17
-84
lines changed

datafusion/core/src/datasource/listing/table.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ use datafusion_physical_expr::{
5454
use async_trait::async_trait;
5555
use datafusion_catalog::Session;
5656
use datafusion_common::stats::Precision;
57-
use datafusion_datasource::add_row_stats;
5857
use datafusion_datasource::compute_all_files_statistics;
5958
use datafusion_datasource::file_groups::FileGroup;
6059
use datafusion_physical_expr_common::sort_expr::LexRequirement;
@@ -1230,7 +1229,7 @@ async fn get_files_with_limit(
12301229
file_stats.num_rows
12311230
} else {
12321231
// For subsequent files, accumulate the counts
1233-
add_row_stats(num_rows, file_stats.num_rows)
1232+
num_rows.add(&file_stats.num_rows)
12341233
};
12351234
}
12361235
}

datafusion/datasource/src/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ use file_meta::FileMeta;
5858
use futures::{Stream, StreamExt};
5959
use object_store::{path::Path, ObjectMeta};
6060
use object_store::{GetOptions, GetRange, ObjectStore};
61+
// Remove when add_row_stats is remove
62+
#[allow(deprecated)]
6163
pub use statistics::add_row_stats;
6264
pub use statistics::compute_all_files_statistics;
6365
use std::ops::Range;

datafusion/datasource/src/statistics.rs

Lines changed: 14 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
//! respect to the required sort order. See [`MinMaxStatistics`]
2222
2323
use futures::{Stream, StreamExt};
24-
use std::mem;
2524
use std::sync::Arc;
2625

2726
use crate::file_groups::FileGroup;
@@ -34,7 +33,6 @@ use arrow::{
3433
row::{Row, Rows},
3534
};
3635
use datafusion_common::stats::Precision;
37-
use datafusion_common::ScalarValue;
3836
use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result};
3937
use datafusion_physical_expr::{expressions::Column, PhysicalSortExpr};
4038
use datafusion_physical_expr_common::sort_expr::LexOrdering;
@@ -357,10 +355,9 @@ pub async fn get_statistics_with_limit(
357355
// counts across all the files in question. If any file does not
358356
// provide any information or provides an inexact value, we demote
359357
// the statistic precision to inexact.
360-
num_rows = add_row_stats(file_stats.num_rows, num_rows);
358+
num_rows = num_rows.add(&file_stats.num_rows);
361359

362-
total_byte_size =
363-
add_row_stats(file_stats.total_byte_size, total_byte_size);
360+
total_byte_size = total_byte_size.add(&file_stats.total_byte_size);
364361

365362
for (file_col_stats, col_stats) in file_stats
366363
.column_statistics
@@ -375,10 +372,10 @@ pub async fn get_statistics_with_limit(
375372
distinct_count: _,
376373
} = file_col_stats;
377374

378-
col_stats.null_count = add_row_stats(*file_nc, col_stats.null_count);
379-
set_max_if_greater(file_max, &mut col_stats.max_value);
380-
set_min_if_lesser(file_min, &mut col_stats.min_value);
381-
col_stats.sum_value = file_sum.add(&col_stats.sum_value);
375+
col_stats.null_count = col_stats.null_count.add(file_nc);
376+
col_stats.max_value = col_stats.max_value.max(file_max);
377+
col_stats.min_value = col_stats.min_value.min(file_min);
378+
col_stats.sum_value = col_stats.sum_value.add(file_sum);
382379
}
383380

384381
// If the number of rows exceeds the limit, we can stop processing
@@ -441,19 +438,19 @@ where
441438
}
442439

443440
// Accumulate statistics for subsequent items
444-
num_rows = add_row_stats(item_stats.num_rows, num_rows);
445-
total_byte_size = add_row_stats(item_stats.total_byte_size, total_byte_size);
441+
num_rows = num_rows.add(&item_stats.num_rows);
442+
total_byte_size = total_byte_size.add(&item_stats.total_byte_size);
446443

447444
for (item_col_stats, col_stats) in item_stats
448445
.column_statistics
449446
.iter()
450447
.zip(col_stats_set.iter_mut())
451448
{
452449
col_stats.null_count =
453-
add_row_stats(item_col_stats.null_count, col_stats.null_count);
454-
set_max_if_greater(&item_col_stats.max_value, &mut col_stats.max_value);
455-
set_min_if_lesser(&item_col_stats.min_value, &mut col_stats.min_value);
456-
col_stats.sum_value = item_col_stats.sum_value.add(&col_stats.sum_value);
450+
col_stats.null_count.add(&item_col_stats.null_count);
451+
col_stats.max_value = col_stats.max_value.max(&item_col_stats.max_value);
452+
col_stats.min_value = col_stats.min_value.min(&item_col_stats.min_value);
453+
col_stats.sum_value = col_stats.sum_value.add(&item_col_stats.sum_value);
457454
}
458455
}
459456
}
@@ -545,77 +542,12 @@ pub fn compute_all_files_statistics(
545542
Ok((file_groups_with_stats, statistics))
546543
}
547544

545+
#[deprecated(since = "47.0.0", note = "Use Statistics::add")]
548546
pub fn add_row_stats(
549547
file_num_rows: Precision<usize>,
550548
num_rows: Precision<usize>,
551549
) -> Precision<usize> {
552-
match (file_num_rows, &num_rows) {
553-
(Precision::Absent, _) => num_rows.to_inexact(),
554-
(lhs, Precision::Absent) => lhs.to_inexact(),
555-
(lhs, rhs) => lhs.add(rhs),
556-
}
557-
}
558-
559-
/// If the given value is numerically greater than the original maximum value,
560-
/// return the new maximum value with appropriate exactness information.
561-
fn set_max_if_greater(
562-
max_nominee: &Precision<ScalarValue>,
563-
max_value: &mut Precision<ScalarValue>,
564-
) {
565-
match (&max_value, max_nominee) {
566-
(Precision::Exact(val1), Precision::Exact(val2)) if val1 < val2 => {
567-
*max_value = max_nominee.clone();
568-
}
569-
(Precision::Exact(val1), Precision::Inexact(val2))
570-
| (Precision::Inexact(val1), Precision::Inexact(val2))
571-
| (Precision::Inexact(val1), Precision::Exact(val2))
572-
if val1 < val2 =>
573-
{
574-
*max_value = max_nominee.clone().to_inexact();
575-
}
576-
(Precision::Exact(_), Precision::Absent) => {
577-
let exact_max = mem::take(max_value);
578-
*max_value = exact_max.to_inexact();
579-
}
580-
(Precision::Absent, Precision::Exact(_)) => {
581-
*max_value = max_nominee.clone().to_inexact();
582-
}
583-
(Precision::Absent, Precision::Inexact(_)) => {
584-
*max_value = max_nominee.clone();
585-
}
586-
_ => {}
587-
}
588-
}
589-
590-
/// If the given value is numerically lesser than the original minimum value,
591-
/// return the new minimum value with appropriate exactness information.
592-
fn set_min_if_lesser(
593-
min_nominee: &Precision<ScalarValue>,
594-
min_value: &mut Precision<ScalarValue>,
595-
) {
596-
match (&min_value, min_nominee) {
597-
(Precision::Exact(val1), Precision::Exact(val2)) if val1 > val2 => {
598-
*min_value = min_nominee.clone();
599-
}
600-
(Precision::Exact(val1), Precision::Inexact(val2))
601-
| (Precision::Inexact(val1), Precision::Inexact(val2))
602-
| (Precision::Inexact(val1), Precision::Exact(val2))
603-
if val1 > val2 =>
604-
{
605-
*min_value = min_nominee.clone().to_inexact();
606-
}
607-
(Precision::Exact(_), Precision::Absent) => {
608-
let exact_min = mem::take(min_value);
609-
*min_value = exact_min.to_inexact();
610-
}
611-
(Precision::Absent, Precision::Exact(_)) => {
612-
*min_value = min_nominee.clone().to_inexact();
613-
}
614-
(Precision::Absent, Precision::Inexact(_)) => {
615-
*min_value = min_nominee.clone();
616-
}
617-
_ => {}
618-
}
550+
file_num_rows.add(&num_rows)
619551
}
620552

621553
#[cfg(test)]

0 commit comments

Comments
 (0)