@@ -313,6 +313,28 @@ impl FromStr for BuiltinScalarFunction {
313313 }
314314}
315315
316+ macro_rules! make_utf8_to_return_type {
317+ ( $FUNC: ident, $largeUtf8Type: expr, $utf8Type: expr) => {
318+ fn $FUNC( arg_type: & DataType , name: & str ) -> Result <DataType > {
319+ Ok ( match arg_type {
320+ DataType :: LargeUtf8 => $largeUtf8Type,
321+ DataType :: Utf8 => $utf8Type,
322+ _ => {
323+ // this error is internal as `data_types` should have captured this.
324+ return Err ( DataFusionError :: Internal ( format!(
325+ "The {:?} function can only accept strings." ,
326+ name
327+ ) ) ) ;
328+ }
329+ } )
330+ }
331+ } ;
332+ }
333+
334+ make_utf8_to_return_type ! ( utf8_to_str_type, DataType :: LargeUtf8 , DataType :: Utf8 ) ;
335+ make_utf8_to_return_type ! ( utf8_to_int_type, DataType :: Int64 , DataType :: Int32 ) ;
336+ make_utf8_to_return_type ! ( utf8_to_binary_type, DataType :: Binary , DataType :: Binary ) ;
337+
316338/// Returns the datatype of the scalar function
317339pub fn return_type (
318340 fun : & BuiltinScalarFunction ,
@@ -332,260 +354,50 @@ pub fn return_type(
332354 arg_types. len ( ) as i32 ,
333355 ) ) ,
334356 BuiltinScalarFunction :: Ascii => Ok ( DataType :: Int32 ) ,
335- BuiltinScalarFunction :: BitLength => Ok ( match arg_types[ 0 ] {
336- DataType :: LargeUtf8 => DataType :: Int64 ,
337- DataType :: Utf8 => DataType :: Int32 ,
338- _ => {
339- // this error is internal as `data_types` should have captured this.
340- return Err ( DataFusionError :: Internal (
341- "The bit_length function can only accept strings." . to_string ( ) ,
342- ) ) ;
343- }
344- } ) ,
345- BuiltinScalarFunction :: Btrim => Ok ( match arg_types[ 0 ] {
346- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
347- DataType :: Utf8 => DataType :: Utf8 ,
348- _ => {
349- // this error is internal as `data_types` should have captured this.
350- return Err ( DataFusionError :: Internal (
351- "The btrim function can only accept strings." . to_string ( ) ,
352- ) ) ;
353- }
354- } ) ,
355- BuiltinScalarFunction :: CharacterLength => Ok ( match arg_types[ 0 ] {
356- DataType :: LargeUtf8 => DataType :: Int64 ,
357- DataType :: Utf8 => DataType :: Int32 ,
358- _ => {
359- // this error is internal as `data_types` should have captured this.
360- return Err ( DataFusionError :: Internal (
361- "The character_length function can only accept strings." . to_string ( ) ,
362- ) ) ;
363- }
364- } ) ,
357+ BuiltinScalarFunction :: BitLength => utf8_to_int_type ( & arg_types[ 0 ] , "bit_length" ) ,
358+ BuiltinScalarFunction :: Btrim => utf8_to_str_type ( & arg_types[ 0 ] , "btrim" ) ,
359+ BuiltinScalarFunction :: CharacterLength => {
360+ utf8_to_int_type ( & arg_types[ 0 ] , "character_length" )
361+ }
365362 BuiltinScalarFunction :: Chr => Ok ( DataType :: Utf8 ) ,
366363 BuiltinScalarFunction :: Concat => Ok ( DataType :: Utf8 ) ,
367364 BuiltinScalarFunction :: ConcatWithSeparator => Ok ( DataType :: Utf8 ) ,
368365 BuiltinScalarFunction :: DatePart => Ok ( DataType :: Int32 ) ,
369366 BuiltinScalarFunction :: DateTrunc => {
370367 Ok ( DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) )
371368 }
372- BuiltinScalarFunction :: InitCap => Ok ( match arg_types[ 0 ] {
373- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
374- DataType :: Utf8 => DataType :: Utf8 ,
375- _ => {
376- // this error is internal as `data_types` should have captured this.
377- return Err ( DataFusionError :: Internal (
378- "The initcap function can only accept strings." . to_string ( ) ,
379- ) ) ;
380- }
381- } ) ,
382- BuiltinScalarFunction :: Left => Ok ( match arg_types[ 0 ] {
383- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
384- DataType :: Utf8 => DataType :: Utf8 ,
385- _ => {
386- // this error is internal as `data_types` should have captured this.
387- return Err ( DataFusionError :: Internal (
388- "The left function can only accept strings." . to_string ( ) ,
389- ) ) ;
390- }
391- } ) ,
392- BuiltinScalarFunction :: Lower => Ok ( match arg_types[ 0 ] {
393- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
394- DataType :: Utf8 => DataType :: Utf8 ,
395- _ => {
396- // this error is internal as `data_types` should have captured this.
397- return Err ( DataFusionError :: Internal (
398- "The upper function can only accept strings." . to_string ( ) ,
399- ) ) ;
400- }
401- } ) ,
402- BuiltinScalarFunction :: Lpad => Ok ( match arg_types[ 0 ] {
403- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
404- DataType :: Utf8 => DataType :: Utf8 ,
405- _ => {
406- // this error is internal as `data_types` should have captured this.
407- return Err ( DataFusionError :: Internal (
408- "The lpad function can only accept strings." . to_string ( ) ,
409- ) ) ;
410- }
411- } ) ,
412- BuiltinScalarFunction :: Ltrim => Ok ( match arg_types[ 0 ] {
413- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
414- DataType :: Utf8 => DataType :: Utf8 ,
415- _ => {
416- // this error is internal as `data_types` should have captured this.
417- return Err ( DataFusionError :: Internal (
418- "The ltrim function can only accept strings." . to_string ( ) ,
419- ) ) ;
420- }
421- } ) ,
422- BuiltinScalarFunction :: MD5 => Ok ( match arg_types[ 0 ] {
423- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
424- DataType :: Utf8 => DataType :: Utf8 ,
425- _ => {
426- // this error is internal as `data_types` should have captured this.
427- return Err ( DataFusionError :: Internal (
428- "The md5 function can only accept strings." . to_string ( ) ,
429- ) ) ;
430- }
431- } ) ,
369+ BuiltinScalarFunction :: InitCap => utf8_to_str_type ( & arg_types[ 0 ] , "initcap" ) ,
370+ BuiltinScalarFunction :: Left => utf8_to_str_type ( & arg_types[ 0 ] , "left" ) ,
371+ BuiltinScalarFunction :: Lower => utf8_to_str_type ( & arg_types[ 0 ] , "lower" ) ,
372+ BuiltinScalarFunction :: Lpad => utf8_to_str_type ( & arg_types[ 0 ] , "lpad" ) ,
373+ BuiltinScalarFunction :: Ltrim => utf8_to_str_type ( & arg_types[ 0 ] , "ltrim" ) ,
374+ BuiltinScalarFunction :: MD5 => utf8_to_str_type ( & arg_types[ 0 ] , "md5" ) ,
432375 BuiltinScalarFunction :: NullIf => {
433376 // NULLIF has two args and they might get coerced, get a preview of this
434377 let coerced_types = data_types ( arg_types, & signature ( fun) ) ;
435378 coerced_types. map ( |typs| typs[ 0 ] . clone ( ) )
436379 }
437- BuiltinScalarFunction :: OctetLength => Ok ( match arg_types[ 0 ] {
438- DataType :: LargeUtf8 => DataType :: Int64 ,
439- DataType :: Utf8 => DataType :: Int32 ,
440- _ => {
441- // this error is internal as `data_types` should have captured this.
442- return Err ( DataFusionError :: Internal (
443- "The octet_length function can only accept strings." . to_string ( ) ,
444- ) ) ;
445- }
446- } ) ,
380+ BuiltinScalarFunction :: OctetLength => {
381+ utf8_to_int_type ( & arg_types[ 0 ] , "octet_length" )
382+ }
447383 BuiltinScalarFunction :: Random => Ok ( DataType :: Float64 ) ,
448- BuiltinScalarFunction :: RegexpReplace => Ok ( match arg_types[ 0 ] {
449- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
450- DataType :: Utf8 => DataType :: Utf8 ,
451- _ => {
452- // this error is internal as `data_types` should have captured this.
453- return Err ( DataFusionError :: Internal (
454- "The regexp_replace function can only accept strings." . to_string ( ) ,
455- ) ) ;
456- }
457- } ) ,
458- BuiltinScalarFunction :: Repeat => Ok ( match arg_types[ 0 ] {
459- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
460- DataType :: Utf8 => DataType :: Utf8 ,
461- _ => {
462- // this error is internal as `data_types` should have captured this.
463- return Err ( DataFusionError :: Internal (
464- "The repeat function can only accept strings." . to_string ( ) ,
465- ) ) ;
466- }
467- } ) ,
468- BuiltinScalarFunction :: Replace => Ok ( match arg_types[ 0 ] {
469- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
470- DataType :: Utf8 => DataType :: Utf8 ,
471- _ => {
472- // this error is internal as `data_types` should have captured this.
473- return Err ( DataFusionError :: Internal (
474- "The replace function can only accept strings." . to_string ( ) ,
475- ) ) ;
476- }
477- } ) ,
478- BuiltinScalarFunction :: Reverse => Ok ( match arg_types[ 0 ] {
479- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
480- DataType :: Utf8 => DataType :: Utf8 ,
481- _ => {
482- // this error is internal as `data_types` should have captured this.
483- return Err ( DataFusionError :: Internal (
484- "The reverse function can only accept strings." . to_string ( ) ,
485- ) ) ;
486- }
487- } ) ,
488- BuiltinScalarFunction :: Right => Ok ( match arg_types[ 0 ] {
489- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
490- DataType :: Utf8 => DataType :: Utf8 ,
491- _ => {
492- // this error is internal as `data_types` should have captured this.
493- return Err ( DataFusionError :: Internal (
494- "The right function can only accept strings." . to_string ( ) ,
495- ) ) ;
496- }
497- } ) ,
498- BuiltinScalarFunction :: Rpad => Ok ( match arg_types[ 0 ] {
499- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
500- DataType :: Utf8 => DataType :: Utf8 ,
501- _ => {
502- // this error is internal as `data_types` should have captured this.
503- return Err ( DataFusionError :: Internal (
504- "The rpad function can only accept strings." . to_string ( ) ,
505- ) ) ;
506- }
507- } ) ,
508- BuiltinScalarFunction :: Rtrim => Ok ( match arg_types[ 0 ] {
509- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
510- DataType :: Utf8 => DataType :: Utf8 ,
511- _ => {
512- // this error is internal as `data_types` should have captured this.
513- return Err ( DataFusionError :: Internal (
514- "The rtrim function can only accept strings." . to_string ( ) ,
515- ) ) ;
516- }
517- } ) ,
518- BuiltinScalarFunction :: SHA224 => Ok ( match arg_types[ 0 ] {
519- DataType :: LargeUtf8 => DataType :: Binary ,
520- DataType :: Utf8 => DataType :: Binary ,
521- _ => {
522- // this error is internal as `data_types` should have captured this.
523- return Err ( DataFusionError :: Internal (
524- "The sha224 function can only accept strings." . to_string ( ) ,
525- ) ) ;
526- }
527- } ) ,
528- BuiltinScalarFunction :: SHA256 => Ok ( match arg_types[ 0 ] {
529- DataType :: LargeUtf8 => DataType :: Binary ,
530- DataType :: Utf8 => DataType :: Binary ,
531- _ => {
532- // this error is internal as `data_types` should have captured this.
533- return Err ( DataFusionError :: Internal (
534- "The sha256 function can only accept strings." . to_string ( ) ,
535- ) ) ;
536- }
537- } ) ,
538- BuiltinScalarFunction :: SHA384 => Ok ( match arg_types[ 0 ] {
539- DataType :: LargeUtf8 => DataType :: Binary ,
540- DataType :: Utf8 => DataType :: Binary ,
541- _ => {
542- // this error is internal as `data_types` should have captured this.
543- return Err ( DataFusionError :: Internal (
544- "The sha384 function can only accept strings." . to_string ( ) ,
545- ) ) ;
546- }
547- } ) ,
548- BuiltinScalarFunction :: SHA512 => Ok ( match arg_types[ 0 ] {
549- DataType :: LargeUtf8 => DataType :: Binary ,
550- DataType :: Utf8 => DataType :: Binary ,
551- _ => {
552- // this error is internal as `data_types` should have captured this.
553- return Err ( DataFusionError :: Internal (
554- "The sha512 function can only accept strings." . to_string ( ) ,
555- ) ) ;
556- }
557- } ) ,
558- BuiltinScalarFunction :: SplitPart => Ok ( match arg_types[ 0 ] {
559- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
560- DataType :: Utf8 => DataType :: Utf8 ,
561- _ => {
562- // this error is internal as `data_types` should have captured this.
563- return Err ( DataFusionError :: Internal (
564- "The split_part function can only accept strings." . to_string ( ) ,
565- ) ) ;
566- }
567- } ) ,
384+ BuiltinScalarFunction :: RegexpReplace => {
385+ utf8_to_str_type ( & arg_types[ 0 ] , "regex_replace" )
386+ }
387+ BuiltinScalarFunction :: Repeat => utf8_to_str_type ( & arg_types[ 0 ] , "repeat" ) ,
388+ BuiltinScalarFunction :: Replace => utf8_to_str_type ( & arg_types[ 0 ] , "replace" ) ,
389+ BuiltinScalarFunction :: Reverse => utf8_to_str_type ( & arg_types[ 0 ] , "reverse" ) ,
390+ BuiltinScalarFunction :: Right => utf8_to_str_type ( & arg_types[ 0 ] , "right" ) ,
391+ BuiltinScalarFunction :: Rpad => utf8_to_str_type ( & arg_types[ 0 ] , "rpad" ) ,
392+ BuiltinScalarFunction :: Rtrim => utf8_to_str_type ( & arg_types[ 0 ] , "rtrimp" ) ,
393+ BuiltinScalarFunction :: SHA224 => utf8_to_binary_type ( & arg_types[ 0 ] , "sha224" ) ,
394+ BuiltinScalarFunction :: SHA256 => utf8_to_binary_type ( & arg_types[ 0 ] , "sha256" ) ,
395+ BuiltinScalarFunction :: SHA384 => utf8_to_binary_type ( & arg_types[ 0 ] , "sha384" ) ,
396+ BuiltinScalarFunction :: SHA512 => utf8_to_binary_type ( & arg_types[ 0 ] , "sha512" ) ,
397+ BuiltinScalarFunction :: SplitPart => utf8_to_str_type ( & arg_types[ 0 ] , "split_part" ) ,
568398 BuiltinScalarFunction :: StartsWith => Ok ( DataType :: Boolean ) ,
569- BuiltinScalarFunction :: Strpos => Ok ( match arg_types[ 0 ] {
570- DataType :: LargeUtf8 => DataType :: Int64 ,
571- DataType :: Utf8 => DataType :: Int32 ,
572- _ => {
573- // this error is internal as `data_types` should have captured this.
574- return Err ( DataFusionError :: Internal (
575- "The strpos function can only accept strings." . to_string ( ) ,
576- ) ) ;
577- }
578- } ) ,
579- BuiltinScalarFunction :: Substr => Ok ( match arg_types[ 0 ] {
580- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
581- DataType :: Utf8 => DataType :: Utf8 ,
582- _ => {
583- // this error is internal as `data_types` should have captured this.
584- return Err ( DataFusionError :: Internal (
585- "The substr function can only accept strings." . to_string ( ) ,
586- ) ) ;
587- }
588- } ) ,
399+ BuiltinScalarFunction :: Strpos => utf8_to_int_type ( & arg_types[ 0 ] , "strpos" ) ,
400+ BuiltinScalarFunction :: Substr => utf8_to_str_type ( & arg_types[ 0 ] , "substr" ) ,
589401 BuiltinScalarFunction :: ToHex => Ok ( match arg_types[ 0 ] {
590402 DataType :: Int8 | DataType :: Int16 | DataType :: Int32 | DataType :: Int64 => {
591403 DataType :: Utf8
@@ -601,36 +413,9 @@ pub fn return_type(
601413 Ok ( DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) )
602414 }
603415 BuiltinScalarFunction :: Now => Ok ( DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) ) ,
604- BuiltinScalarFunction :: Translate => Ok ( match arg_types[ 0 ] {
605- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
606- DataType :: Utf8 => DataType :: Utf8 ,
607- _ => {
608- // this error is internal as `data_types` should have captured this.
609- return Err ( DataFusionError :: Internal (
610- "The translate function can only accept strings." . to_string ( ) ,
611- ) ) ;
612- }
613- } ) ,
614- BuiltinScalarFunction :: Trim => Ok ( match arg_types[ 0 ] {
615- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
616- DataType :: Utf8 => DataType :: Utf8 ,
617- _ => {
618- // this error is internal as `data_types` should have captured this.
619- return Err ( DataFusionError :: Internal (
620- "The trim function can only accept strings." . to_string ( ) ,
621- ) ) ;
622- }
623- } ) ,
624- BuiltinScalarFunction :: Upper => Ok ( match arg_types[ 0 ] {
625- DataType :: LargeUtf8 => DataType :: LargeUtf8 ,
626- DataType :: Utf8 => DataType :: Utf8 ,
627- _ => {
628- // this error is internal as `data_types` should have captured this.
629- return Err ( DataFusionError :: Internal (
630- "The upper function can only accept strings." . to_string ( ) ,
631- ) ) ;
632- }
633- } ) ,
416+ BuiltinScalarFunction :: Translate => utf8_to_str_type ( & arg_types[ 0 ] , "translate" ) ,
417+ BuiltinScalarFunction :: Trim => utf8_to_str_type ( & arg_types[ 0 ] , "trim" ) ,
418+ BuiltinScalarFunction :: Upper => utf8_to_str_type ( & arg_types[ 0 ] , "upper" ) ,
634419 BuiltinScalarFunction :: RegexpMatch => Ok ( match arg_types[ 0 ] {
635420 DataType :: LargeUtf8 => {
636421 DataType :: List ( Box :: new ( Field :: new ( "item" , DataType :: LargeUtf8 , true ) ) )
0 commit comments