2121//! respect to the required sort order. See [`MinMaxStatistics`]
2222
2323use futures:: { Stream , StreamExt } ;
24- use std:: mem;
2524use std:: sync:: Arc ;
2625
2726use crate :: file_groups:: FileGroup ;
@@ -34,7 +33,6 @@ use arrow::{
3433 row:: { Row , Rows } ,
3534} ;
3635use datafusion_common:: stats:: Precision ;
37- use datafusion_common:: ScalarValue ;
3836use datafusion_common:: { plan_datafusion_err, plan_err, DataFusionError , Result } ;
3937use datafusion_physical_expr:: { expressions:: Column , PhysicalSortExpr } ;
4038use datafusion_physical_expr_common:: sort_expr:: LexOrdering ;
@@ -357,10 +355,9 @@ pub async fn get_statistics_with_limit(
357355 // counts across all the files in question. If any file does not
358356 // provide any information or provides an inexact value, we demote
359357 // the statistic precision to inexact.
360- num_rows = add_row_stats ( file_stats. num_rows , num_rows) ;
358+ num_rows = num_rows . add ( & file_stats. num_rows ) ;
361359
362- total_byte_size =
363- add_row_stats ( file_stats. total_byte_size , total_byte_size) ;
360+ total_byte_size = total_byte_size. add ( & file_stats. total_byte_size ) ;
364361
365362 for ( file_col_stats, col_stats) in file_stats
366363 . column_statistics
@@ -375,10 +372,10 @@ pub async fn get_statistics_with_limit(
375372 distinct_count : _,
376373 } = file_col_stats;
377374
378- col_stats. null_count = add_row_stats ( * file_nc , col_stats. null_count ) ;
379- set_max_if_greater ( file_max , & mut col_stats. max_value ) ;
380- set_min_if_lesser ( file_min , & mut col_stats. min_value ) ;
381- col_stats. sum_value = file_sum . add ( & col_stats. sum_value ) ;
375+ col_stats. null_count = col_stats. null_count . add ( file_nc ) ;
376+ col_stats . max_value = col_stats. max_value . max ( file_max ) ;
377+ col_stats . min_value = col_stats. min_value . min ( file_min ) ;
378+ col_stats. sum_value = col_stats. sum_value . add ( file_sum ) ;
382379 }
383380
384381 // If the number of rows exceeds the limit, we can stop processing
@@ -441,19 +438,19 @@ where
441438 }
442439
443440 // Accumulate statistics for subsequent items
444- num_rows = add_row_stats ( item_stats. num_rows , num_rows) ;
445- total_byte_size = add_row_stats ( item_stats. total_byte_size , total_byte_size) ;
441+ num_rows = num_rows . add ( & item_stats. num_rows ) ;
442+ total_byte_size = total_byte_size . add ( & item_stats. total_byte_size ) ;
446443
447444 for ( item_col_stats, col_stats) in item_stats
448445 . column_statistics
449446 . iter ( )
450447 . zip ( col_stats_set. iter_mut ( ) )
451448 {
452449 col_stats. null_count =
453- add_row_stats ( item_col_stats . null_count , col_stats . null_count ) ;
454- set_max_if_greater ( & item_col_stats . max_value , & mut col_stats. max_value ) ;
455- set_min_if_lesser ( & item_col_stats . min_value , & mut col_stats. min_value ) ;
456- col_stats. sum_value = item_col_stats . sum_value . add ( & col_stats . sum_value ) ;
450+ col_stats . null_count . add ( & item_col_stats . null_count ) ;
451+ col_stats . max_value = col_stats. max_value . max ( & item_col_stats . max_value ) ;
452+ col_stats . min_value = col_stats. min_value . min ( & item_col_stats . min_value ) ;
453+ col_stats. sum_value = col_stats . sum_value . add ( & item_col_stats . sum_value ) ;
457454 }
458455 }
459456 }
@@ -545,77 +542,12 @@ pub fn compute_all_files_statistics(
545542 Ok ( ( file_groups_with_stats, statistics) )
546543}
547544
545+ #[ deprecated( since = "47.0.0" , note = "Use Statistics::add" ) ]
548546pub fn add_row_stats (
549547 file_num_rows : Precision < usize > ,
550548 num_rows : Precision < usize > ,
551549) -> Precision < usize > {
552- match ( file_num_rows, & num_rows) {
553- ( Precision :: Absent , _) => num_rows. to_inexact ( ) ,
554- ( lhs, Precision :: Absent ) => lhs. to_inexact ( ) ,
555- ( lhs, rhs) => lhs. add ( rhs) ,
556- }
557- }
558-
559- /// If the given value is numerically greater than the original maximum value,
560- /// return the new maximum value with appropriate exactness information.
561- fn set_max_if_greater (
562- max_nominee : & Precision < ScalarValue > ,
563- max_value : & mut Precision < ScalarValue > ,
564- ) {
565- match ( & max_value, max_nominee) {
566- ( Precision :: Exact ( val1) , Precision :: Exact ( val2) ) if val1 < val2 => {
567- * max_value = max_nominee. clone ( ) ;
568- }
569- ( Precision :: Exact ( val1) , Precision :: Inexact ( val2) )
570- | ( Precision :: Inexact ( val1) , Precision :: Inexact ( val2) )
571- | ( Precision :: Inexact ( val1) , Precision :: Exact ( val2) )
572- if val1 < val2 =>
573- {
574- * max_value = max_nominee. clone ( ) . to_inexact ( ) ;
575- }
576- ( Precision :: Exact ( _) , Precision :: Absent ) => {
577- let exact_max = mem:: take ( max_value) ;
578- * max_value = exact_max. to_inexact ( ) ;
579- }
580- ( Precision :: Absent , Precision :: Exact ( _) ) => {
581- * max_value = max_nominee. clone ( ) . to_inexact ( ) ;
582- }
583- ( Precision :: Absent , Precision :: Inexact ( _) ) => {
584- * max_value = max_nominee. clone ( ) ;
585- }
586- _ => { }
587- }
588- }
589-
590- /// If the given value is numerically lesser than the original minimum value,
591- /// return the new minimum value with appropriate exactness information.
592- fn set_min_if_lesser (
593- min_nominee : & Precision < ScalarValue > ,
594- min_value : & mut Precision < ScalarValue > ,
595- ) {
596- match ( & min_value, min_nominee) {
597- ( Precision :: Exact ( val1) , Precision :: Exact ( val2) ) if val1 > val2 => {
598- * min_value = min_nominee. clone ( ) ;
599- }
600- ( Precision :: Exact ( val1) , Precision :: Inexact ( val2) )
601- | ( Precision :: Inexact ( val1) , Precision :: Inexact ( val2) )
602- | ( Precision :: Inexact ( val1) , Precision :: Exact ( val2) )
603- if val1 > val2 =>
604- {
605- * min_value = min_nominee. clone ( ) . to_inexact ( ) ;
606- }
607- ( Precision :: Exact ( _) , Precision :: Absent ) => {
608- let exact_min = mem:: take ( min_value) ;
609- * min_value = exact_min. to_inexact ( ) ;
610- }
611- ( Precision :: Absent , Precision :: Exact ( _) ) => {
612- * min_value = min_nominee. clone ( ) . to_inexact ( ) ;
613- }
614- ( Precision :: Absent , Precision :: Inexact ( _) ) => {
615- * min_value = min_nominee. clone ( ) ;
616- }
617- _ => { }
618- }
550+ file_num_rows. add ( & num_rows)
619551}
620552
621553#[ cfg( test) ]
0 commit comments