@@ -30,6 +30,7 @@ use datafusion::execution::context::SessionState;
30
30
use datafusion:: logical_expr:: Expr ;
31
31
use datafusion:: physical_plan:: memory:: MemoryExec ;
32
32
use datafusion:: physical_plan:: ExecutionPlan ;
33
+ use datafusion:: scalar:: ScalarValue ;
33
34
use parquet:: file:: reader:: FileReader ;
34
35
use parquet:: file:: serialized_reader:: SerializedFileReader ;
35
36
use parquet:: file:: statistics:: Statistics ;
@@ -249,11 +250,17 @@ pub struct ParquetMetadataFunc {}
249
250
250
251
impl TableFunctionImpl for ParquetMetadataFunc {
251
252
fn call ( & self , exprs : & [ Expr ] ) -> Result < Arc < dyn TableProvider > > {
252
- let Some ( Expr :: Column ( Column { name, .. } ) ) = exprs. get ( 0 ) else {
253
- return plan_err ! ( "parquet_metadata requires string argument as its input" ) ;
253
+ let filename = match exprs. get ( 0 ) {
254
+ Some ( Expr :: Literal ( ScalarValue :: Utf8 ( Some ( s) ) ) ) => s, // single quote: parquet_metadata('x.parquet')
255
+ Some ( Expr :: Column ( Column { name, .. } ) ) => name, // double quote: parquet_metadata("x.parquet")
256
+ _ => {
257
+ return plan_err ! (
258
+ "parquet_metadata requires string argument as its input"
259
+ ) ;
260
+ }
254
261
} ;
255
262
256
- let file = File :: open ( name . clone ( ) ) ?;
263
+ let file = File :: open ( filename . clone ( ) ) ?;
257
264
let reader = SerializedFileReader :: new ( file) ?;
258
265
let metadata = reader. metadata ( ) ;
259
266
@@ -309,7 +316,7 @@ impl TableFunctionImpl for ParquetMetadataFunc {
309
316
let mut total_uncompressed_size_arr = vec ! [ ] ;
310
317
for ( rg_idx, row_group) in metadata. row_groups ( ) . iter ( ) . enumerate ( ) {
311
318
for ( col_idx, column) in row_group. columns ( ) . iter ( ) . enumerate ( ) {
312
- filename_arr. push ( name . clone ( ) ) ;
319
+ filename_arr. push ( filename . clone ( ) ) ;
313
320
row_group_id_arr. push ( rg_idx as i64 ) ;
314
321
row_group_num_rows_arr. push ( row_group. num_rows ( ) ) ;
315
322
row_group_num_columns_arr. push ( row_group. num_columns ( ) as i64 ) ;
@@ -320,38 +327,43 @@ impl TableFunctionImpl for ParquetMetadataFunc {
320
327
path_in_schema_arr. push ( column. column_path ( ) . to_string ( ) ) ;
321
328
type_arr. push ( column. column_type ( ) . to_string ( ) ) ;
322
329
if let Some ( s) = column. statistics ( ) {
323
- let ( min_val, max_val) = match s {
324
- Statistics :: Boolean ( val) => {
325
- ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
326
- }
327
- Statistics :: Int32 ( val) => {
328
- ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
329
- }
330
- Statistics :: Int64 ( val) => {
331
- ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
332
- }
333
- Statistics :: Int96 ( val) => {
334
- ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
335
- }
336
- Statistics :: Float ( val) => {
337
- ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
338
- }
339
- Statistics :: Double ( val) => {
340
- ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
341
- }
342
- Statistics :: ByteArray ( val) => {
343
- ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
344
- }
345
- Statistics :: FixedLenByteArray ( val) => {
346
- ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
347
- }
330
+ let ( min_val, max_val) = if s. has_min_max_set ( ) {
331
+ let ( min_val, max_val) = match s {
332
+ Statistics :: Boolean ( val) => {
333
+ ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
334
+ }
335
+ Statistics :: Int32 ( val) => {
336
+ ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
337
+ }
338
+ Statistics :: Int64 ( val) => {
339
+ ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
340
+ }
341
+ Statistics :: Int96 ( val) => {
342
+ ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
343
+ }
344
+ Statistics :: Float ( val) => {
345
+ ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
346
+ }
347
+ Statistics :: Double ( val) => {
348
+ ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
349
+ }
350
+ Statistics :: ByteArray ( val) => {
351
+ ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
352
+ }
353
+ Statistics :: FixedLenByteArray ( val) => {
354
+ ( val. min ( ) . to_string ( ) , val. max ( ) . to_string ( ) )
355
+ }
356
+ } ;
357
+ ( Some ( min_val) , Some ( max_val) )
358
+ } else {
359
+ ( None , None )
348
360
} ;
349
- stats_min_arr. push ( Some ( min_val. clone ( ) ) ) ;
350
- stats_max_arr. push ( Some ( max_val. clone ( ) ) ) ;
361
+ stats_min_arr. push ( min_val. clone ( ) ) ;
362
+ stats_max_arr. push ( max_val. clone ( ) ) ;
351
363
stats_null_count_arr. push ( Some ( s. null_count ( ) as i64 ) ) ;
352
364
stats_distinct_count_arr. push ( s. distinct_count ( ) . map ( |c| c as i64 ) ) ;
353
- stats_min_value_arr. push ( Some ( min_val) ) ;
354
- stats_max_value_arr. push ( Some ( max_val) ) ;
365
+ stats_min_value_arr. push ( min_val) ;
366
+ stats_max_value_arr. push ( max_val) ;
355
367
} else {
356
368
stats_min_arr. push ( None ) ;
357
369
stats_max_arr. push ( None ) ;
0 commit comments