@@ -49,6 +49,7 @@ use crate::analyzer::type_coercion::TypeCoercionRewriter;
49
49
use crate :: simplify_expressions:: guarantees:: GuaranteeRewriter ;
50
50
use crate :: simplify_expressions:: regex:: simplify_regex_expr;
51
51
use crate :: simplify_expressions:: SimplifyInfo ;
52
+ use regex:: Regex ;
52
53
53
54
use super :: inlist_simplifier:: ShortenInListSimplifier ;
54
55
use super :: utils:: * ;
@@ -1470,34 +1471,54 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
1470
1471
} ) => Transformed :: yes ( simplify_regex_expr ( left, op, right) ?) ,
1471
1472
1472
1473
// Rules for Like
1473
- Expr :: Like ( Like {
1474
- expr,
1475
- pattern,
1476
- negated,
1477
- escape_char : _,
1478
- case_insensitive : _,
1479
- } ) if matches ! (
1480
- pattern. as_ref( ) ,
1481
- Expr :: Literal ( ScalarValue :: Utf8 ( Some ( pattern_str) ) ) if pattern_str == "%"
1482
- ) || matches ! (
1483
- pattern. as_ref( ) ,
1484
- Expr :: Literal ( ScalarValue :: LargeUtf8 ( Some ( pattern_str) ) ) if pattern_str == "%"
1485
- ) || matches ! (
1486
- pattern. as_ref( ) ,
1487
- Expr :: Literal ( ScalarValue :: Utf8View ( Some ( pattern_str) ) ) if pattern_str == "%"
1488
- ) =>
1489
- {
1490
- // exp LIKE '%' is
1491
- // - when exp is not NULL, it's true
1492
- // - when exp is NULL, it's NULL
1493
- // exp NOT LIKE '%' is
1494
- // - when exp is not NULL, it's false
1495
- // - when exp is NULL, it's NULL
1496
- Transformed :: yes ( Expr :: Case ( Case {
1497
- expr : Some ( Box :: new ( Expr :: IsNotNull ( expr) ) ) ,
1498
- when_then_expr : vec ! [ ( Box :: new( lit( true ) ) , Box :: new( lit( !negated) ) ) ] ,
1499
- else_expr : None ,
1500
- } ) )
1474
+ Expr :: Like ( like) => {
1475
+ match as_string_scalar ( & like. pattern ) {
1476
+ Some ( ( data_type, pattern_str) ) => {
1477
+ match pattern_str {
1478
+ None => return Ok ( Transformed :: yes ( lit_bool_null ( ) ) ) ,
1479
+ Some ( pattern_str) if pattern_str == "%" => {
1480
+ // exp LIKE '%' is
1481
+ // - when exp is not NULL, it's true
1482
+ // - when exp is NULL, it's NULL
1483
+ // exp NOT LIKE '%' is
1484
+ // - when exp is not NULL, it's false
1485
+ // - when exp is NULL, it's NULL
1486
+ let result_for_non_null = lit ( !like. negated ) ;
1487
+ Transformed :: yes ( if !info. nullable ( & like. expr ) ? {
1488
+ result_for_non_null
1489
+ } else {
1490
+ Expr :: Case ( Case {
1491
+ expr : Some ( Box :: new ( Expr :: IsNotNull ( like. expr ) ) ) ,
1492
+ when_then_expr : vec ! [ (
1493
+ Box :: new( lit( true ) ) ,
1494
+ Box :: new( result_for_non_null) ,
1495
+ ) ] ,
1496
+ else_expr : None ,
1497
+ } )
1498
+ } )
1499
+ }
1500
+ Some ( pattern_str)
1501
+ if pattern_str. contains ( "%%" ) &&
1502
+ // TODO support more complete unescaping
1503
+ ( like. escape_char . is_none ( ) || pattern_str. contains ( like. escape_char . unwrap ( ) ) ) =>
1504
+ {
1505
+ let simplified_pattern = Regex :: new ( "%%+" )
1506
+ . unwrap ( )
1507
+ . replace_all ( & pattern_str, "%" )
1508
+ . to_string ( ) ;
1509
+ Transformed :: yes ( Expr :: Like ( Like {
1510
+ pattern : Box :: new ( to_string_scalar (
1511
+ data_type,
1512
+ Some ( simplified_pattern) ,
1513
+ ) ) ,
1514
+ ..like
1515
+ } ) )
1516
+ }
1517
+ Some ( _pattern_str) => Transformed :: no ( Expr :: Like ( like) ) ,
1518
+ }
1519
+ }
1520
+ None => Transformed :: no ( Expr :: Like ( like) ) ,
1521
+ }
1501
1522
}
1502
1523
1503
1524
// a is not null/unknown --> true (if a is not nullable)
@@ -1696,6 +1717,24 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
1696
1717
}
1697
1718
}
1698
1719
1720
+ fn as_string_scalar ( expr : & Expr ) -> Option < ( DataType , & Option < String > ) > {
1721
+ match expr {
1722
+ Expr :: Literal ( ScalarValue :: Utf8 ( s) ) => Some ( ( DataType :: Utf8 , s) ) ,
1723
+ Expr :: Literal ( ScalarValue :: LargeUtf8 ( s) ) => Some ( ( DataType :: LargeUtf8 , s) ) ,
1724
+ Expr :: Literal ( ScalarValue :: Utf8View ( s) ) => Some ( ( DataType :: Utf8View , s) ) ,
1725
+ _ => None ,
1726
+ }
1727
+ }
1728
+
1729
+ fn to_string_scalar ( data_type : DataType , value : Option < String > ) -> Expr {
1730
+ match data_type {
1731
+ DataType :: Utf8 => Expr :: Literal ( ScalarValue :: Utf8 ( value) ) ,
1732
+ DataType :: LargeUtf8 => Expr :: Literal ( ScalarValue :: LargeUtf8 ( value) ) ,
1733
+ DataType :: Utf8View => Expr :: Literal ( ScalarValue :: Utf8View ( value) ) ,
1734
+ _ => unreachable ! ( ) ,
1735
+ }
1736
+ }
1737
+
1699
1738
fn has_common_conjunction ( lhs : & Expr , rhs : & Expr ) -> bool {
1700
1739
let lhs_set: HashSet < & Expr > = iter_conjunction ( lhs) . collect ( ) ;
1701
1740
iter_conjunction ( rhs) . any ( |e| lhs_set. contains ( & e) && !e. is_volatile ( ) )
@@ -2810,10 +2849,16 @@ mod tests {
2810
2849
) ;
2811
2850
2812
2851
// single character
2813
- assert_change ( regex_match ( col ( "c1" ) , lit ( "x" ) ) , like ( col ( "c1" ) , "%x%" ) ) ;
2852
+ assert_change (
2853
+ regex_match ( col ( "c1" ) , lit ( "x" ) ) ,
2854
+ like ( col ( "c1" ) , lit ( "%x%" ) ) ,
2855
+ ) ;
2814
2856
2815
2857
// single word
2816
- assert_change ( regex_match ( col ( "c1" ) , lit ( "foo" ) ) , like ( col ( "c1" ) , "%foo%" ) ) ;
2858
+ assert_change (
2859
+ regex_match ( col ( "c1" ) , lit ( "foo" ) ) ,
2860
+ like ( col ( "c1" ) , lit ( "%foo%" ) ) ,
2861
+ ) ;
2817
2862
2818
2863
// regular expressions that match an exact literal
2819
2864
assert_change ( regex_match ( col ( "c1" ) , lit ( "^$" ) ) , col ( "c1" ) . eq ( lit ( "" ) ) ) ;
@@ -2900,44 +2945,50 @@ mod tests {
2900
2945
assert_no_change ( regex_match ( col ( "c1" ) , lit ( "$foo^" ) ) ) ;
2901
2946
2902
2947
// regular expressions that match a partial literal
2903
- assert_change ( regex_match ( col ( "c1" ) , lit ( "^foo" ) ) , like ( col ( "c1" ) , "foo%" ) ) ;
2904
- assert_change ( regex_match ( col ( "c1" ) , lit ( "foo$" ) ) , like ( col ( "c1" ) , "%foo" ) ) ;
2948
+ assert_change (
2949
+ regex_match ( col ( "c1" ) , lit ( "^foo" ) ) ,
2950
+ like ( col ( "c1" ) , lit ( "foo%" ) ) ,
2951
+ ) ;
2952
+ assert_change (
2953
+ regex_match ( col ( "c1" ) , lit ( "foo$" ) ) ,
2954
+ like ( col ( "c1" ) , lit ( "%foo" ) ) ,
2955
+ ) ;
2905
2956
assert_change (
2906
2957
regex_match ( col ( "c1" ) , lit ( "^foo|bar$" ) ) ,
2907
- like ( col ( "c1" ) , "foo%" ) . or ( like ( col ( "c1" ) , "%bar" ) ) ,
2958
+ like ( col ( "c1" ) , lit ( "foo%" ) ) . or ( like ( col ( "c1" ) , lit ( "%bar" ) ) ) ,
2908
2959
) ;
2909
2960
2910
2961
// OR-chain
2911
2962
assert_change (
2912
2963
regex_match ( col ( "c1" ) , lit ( "foo|bar|baz" ) ) ,
2913
- like ( col ( "c1" ) , "%foo%" )
2914
- . or ( like ( col ( "c1" ) , "%bar%" ) )
2915
- . or ( like ( col ( "c1" ) , "%baz%" ) ) ,
2964
+ like ( col ( "c1" ) , lit ( "%foo%" ) )
2965
+ . or ( like ( col ( "c1" ) , lit ( "%bar%" ) ) )
2966
+ . or ( like ( col ( "c1" ) , lit ( "%baz%" ) ) ) ,
2916
2967
) ;
2917
2968
assert_change (
2918
2969
regex_match ( col ( "c1" ) , lit ( "foo|x|baz" ) ) ,
2919
- like ( col ( "c1" ) , "%foo%" )
2920
- . or ( like ( col ( "c1" ) , "%x%" ) )
2921
- . or ( like ( col ( "c1" ) , "%baz%" ) ) ,
2970
+ like ( col ( "c1" ) , lit ( "%foo%" ) )
2971
+ . or ( like ( col ( "c1" ) , lit ( "%x%" ) ) )
2972
+ . or ( like ( col ( "c1" ) , lit ( "%baz%" ) ) ) ,
2922
2973
) ;
2923
2974
assert_change (
2924
2975
regex_not_match ( col ( "c1" ) , lit ( "foo|bar|baz" ) ) ,
2925
- not_like ( col ( "c1" ) , "%foo%" )
2926
- . and ( not_like ( col ( "c1" ) , "%bar%" ) )
2927
- . and ( not_like ( col ( "c1" ) , "%baz%" ) ) ,
2976
+ not_like ( col ( "c1" ) , lit ( "%foo%" ) )
2977
+ . and ( not_like ( col ( "c1" ) , lit ( "%bar%" ) ) )
2978
+ . and ( not_like ( col ( "c1" ) , lit ( "%baz%" ) ) ) ,
2928
2979
) ;
2929
2980
// both anchored expressions (translated to equality) and unanchored
2930
2981
assert_change (
2931
2982
regex_match ( col ( "c1" ) , lit ( "foo|^x$|baz" ) ) ,
2932
- like ( col ( "c1" ) , "%foo%" )
2983
+ like ( col ( "c1" ) , lit ( "%foo%" ) )
2933
2984
. or ( col ( "c1" ) . eq ( lit ( "x" ) ) )
2934
- . or ( like ( col ( "c1" ) , "%baz%" ) ) ,
2985
+ . or ( like ( col ( "c1" ) , lit ( "%baz%" ) ) ) ,
2935
2986
) ;
2936
2987
assert_change (
2937
2988
regex_not_match ( col ( "c1" ) , lit ( "foo|^bar$|baz" ) ) ,
2938
- not_like ( col ( "c1" ) , "%foo%" )
2989
+ not_like ( col ( "c1" ) , lit ( "%foo%" ) )
2939
2990
. and ( col ( "c1" ) . not_eq ( lit ( "bar" ) ) )
2940
- . and ( not_like ( col ( "c1" ) , "%baz%" ) ) ,
2991
+ . and ( not_like ( col ( "c1" ) , lit ( "%baz%" ) ) ) ,
2941
2992
) ;
2942
2993
// Too many patterns (MAX_REGEX_ALTERNATIONS_EXPANSION)
2943
2994
assert_no_change ( regex_match ( col ( "c1" ) , lit ( "foo|bar|baz|blarg|bozo|etc" ) ) ) ;
@@ -2987,41 +3038,41 @@ mod tests {
2987
3038
} )
2988
3039
}
2989
3040
2990
- fn like ( expr : Expr , pattern : & str ) -> Expr {
3041
+ fn like ( expr : Expr , pattern : impl Into < Expr > ) -> Expr {
2991
3042
Expr :: Like ( Like {
2992
3043
negated : false ,
2993
3044
expr : Box :: new ( expr) ,
2994
- pattern : Box :: new ( lit ( pattern) ) ,
3045
+ pattern : Box :: new ( pattern. into ( ) ) ,
2995
3046
escape_char : None ,
2996
3047
case_insensitive : false ,
2997
3048
} )
2998
3049
}
2999
3050
3000
- fn not_like ( expr : Expr , pattern : & str ) -> Expr {
3051
+ fn not_like ( expr : Expr , pattern : impl Into < Expr > ) -> Expr {
3001
3052
Expr :: Like ( Like {
3002
3053
negated : true ,
3003
3054
expr : Box :: new ( expr) ,
3004
- pattern : Box :: new ( lit ( pattern) ) ,
3055
+ pattern : Box :: new ( pattern. into ( ) ) ,
3005
3056
escape_char : None ,
3006
3057
case_insensitive : false ,
3007
3058
} )
3008
3059
}
3009
3060
3010
- fn ilike ( expr : Expr , pattern : & str ) -> Expr {
3061
+ fn ilike ( expr : Expr , pattern : impl Into < Expr > ) -> Expr {
3011
3062
Expr :: Like ( Like {
3012
3063
negated : false ,
3013
3064
expr : Box :: new ( expr) ,
3014
- pattern : Box :: new ( lit ( pattern) ) ,
3065
+ pattern : Box :: new ( pattern. into ( ) ) ,
3015
3066
escape_char : None ,
3016
3067
case_insensitive : true ,
3017
3068
} )
3018
3069
}
3019
3070
3020
- fn not_ilike ( expr : Expr , pattern : & str ) -> Expr {
3071
+ fn not_ilike ( expr : Expr , pattern : impl Into < Expr > ) -> Expr {
3021
3072
Expr :: Like ( Like {
3022
3073
negated : true ,
3023
3074
expr : Box :: new ( expr) ,
3024
- pattern : Box :: new ( lit ( pattern) ) ,
3075
+ pattern : Box :: new ( pattern. into ( ) ) ,
3025
3076
escape_char : None ,
3026
3077
case_insensitive : true ,
3027
3078
} )
@@ -3633,31 +3684,112 @@ mod tests {
3633
3684
3634
3685
#[ test]
3635
3686
fn test_like_and_ilke ( ) {
3636
- // LIKE '%'
3637
- let expr = like ( col ( "c1" ) , "%" ) ;
3687
+ let null = lit ( ScalarValue :: Utf8 ( None ) ) ;
3688
+
3689
+ // expr [NOT] [I]LIKE NULL
3690
+ let expr = like ( col ( "c1" ) , null. clone ( ) ) ;
3691
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3692
+
3693
+ let expr = not_like ( col ( "c1" ) , null. clone ( ) ) ;
3694
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3695
+
3696
+ let expr = ilike ( col ( "c1" ) , null. clone ( ) ) ;
3697
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3698
+
3699
+ let expr = not_ilike ( col ( "c1" ) , null. clone ( ) ) ;
3700
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3701
+
3702
+ // expr [NOT] [I]LIKE '%'
3703
+ let expr = like ( col ( "c1" ) , lit ( "%" ) ) ;
3704
+ assert_eq ! ( simplify( expr) , if_not_null( col( "c1" ) , true ) ) ;
3705
+
3706
+ let expr = not_like ( col ( "c1" ) , lit ( "%" ) ) ;
3707
+ assert_eq ! ( simplify( expr) , if_not_null( col( "c1" ) , false ) ) ;
3708
+
3709
+ let expr = ilike ( col ( "c1" ) , lit ( "%" ) ) ;
3710
+ assert_eq ! ( simplify( expr) , if_not_null( col( "c1" ) , true ) ) ;
3711
+
3712
+ let expr = not_ilike ( col ( "c1" ) , lit ( "%" ) ) ;
3713
+ assert_eq ! ( simplify( expr) , if_not_null( col( "c1" ) , false ) ) ;
3714
+
3715
+ // expr [NOT] [I]LIKE '%%'
3716
+ let expr = like ( col ( "c1" ) , lit ( "%%" ) ) ;
3638
3717
assert_eq ! ( simplify( expr) , if_not_null( col( "c1" ) , true ) ) ;
3639
3718
3640
- let expr = not_like ( col ( "c1" ) , "%" ) ;
3719
+ let expr = not_like ( col ( "c1" ) , lit ( "%%" ) ) ;
3641
3720
assert_eq ! ( simplify( expr) , if_not_null( col( "c1" ) , false ) ) ;
3642
3721
3643
- let expr = ilike ( col ( "c1" ) , "%" ) ;
3722
+ let expr = ilike ( col ( "c1" ) , lit ( "%%" ) ) ;
3644
3723
assert_eq ! ( simplify( expr) , if_not_null( col( "c1" ) , true ) ) ;
3645
3724
3646
- let expr = not_ilike ( col ( "c1" ) , "%" ) ;
3725
+ let expr = not_ilike ( col ( "c1" ) , lit ( "%%" ) ) ;
3647
3726
assert_eq ! ( simplify( expr) , if_not_null( col( "c1" ) , false ) ) ;
3648
3727
3649
- // null_constant LIKE '%'
3728
+ // not_null_expr [NOT] [I]LIKE '%'
3729
+ let expr = like ( col ( "c1_non_null" ) , lit ( "%" ) ) ;
3730
+ assert_eq ! ( simplify( expr) , lit( true ) ) ;
3731
+
3732
+ let expr = not_like ( col ( "c1_non_null" ) , lit ( "%" ) ) ;
3733
+ assert_eq ! ( simplify( expr) , lit( false ) ) ;
3734
+
3735
+ let expr = ilike ( col ( "c1_non_null" ) , lit ( "%" ) ) ;
3736
+ assert_eq ! ( simplify( expr) , lit( true ) ) ;
3737
+
3738
+ let expr = not_ilike ( col ( "c1_non_null" ) , lit ( "%" ) ) ;
3739
+ assert_eq ! ( simplify( expr) , lit( false ) ) ;
3740
+
3741
+ // not_null_expr [NOT] [I]LIKE '%%'
3742
+ let expr = like ( col ( "c1_non_null" ) , lit ( "%%" ) ) ;
3743
+ assert_eq ! ( simplify( expr) , lit( true ) ) ;
3744
+
3745
+ let expr = not_like ( col ( "c1_non_null" ) , lit ( "%%" ) ) ;
3746
+ assert_eq ! ( simplify( expr) , lit( false ) ) ;
3747
+
3748
+ let expr = ilike ( col ( "c1_non_null" ) , lit ( "%%" ) ) ;
3749
+ assert_eq ! ( simplify( expr) , lit( true ) ) ;
3750
+
3751
+ let expr = not_ilike ( col ( "c1_non_null" ) , lit ( "%%" ) ) ;
3752
+ assert_eq ! ( simplify( expr) , lit( false ) ) ;
3753
+
3754
+ // null_constant [NOT] [I]LIKE '%'
3755
+ let expr = like ( null. clone ( ) , lit ( "%" ) ) ;
3756
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3757
+
3758
+ let expr = not_like ( null. clone ( ) , lit ( "%" ) ) ;
3759
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3760
+
3761
+ let expr = ilike ( null. clone ( ) , lit ( "%" ) ) ;
3762
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3763
+
3764
+ let expr = not_ilike ( null, lit ( "%" ) ) ;
3765
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3766
+
3767
+ // null_constant [NOT] [I]LIKE '%%'
3768
+ let null = lit ( ScalarValue :: Utf8 ( None ) ) ;
3769
+ let expr = like ( null. clone ( ) , lit ( "%%" ) ) ;
3770
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3771
+
3772
+ let expr = not_like ( null. clone ( ) , lit ( "%%" ) ) ;
3773
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3774
+
3775
+ let expr = ilike ( null. clone ( ) , lit ( "%%" ) ) ;
3776
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3777
+
3778
+ let expr = not_ilike ( null, lit ( "%%" ) ) ;
3779
+ assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3780
+
3781
+ // null_constant [NOT] [I]LIKE 'a%'
3650
3782
let null = lit ( ScalarValue :: Utf8 ( None ) ) ;
3651
- let expr = like ( null. clone ( ) , "%" ) ;
3783
+ let expr = like ( null. clone ( ) , lit ( "a%" ) ) ;
3652
3784
assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3653
3785
3654
- let expr = not_like ( null. clone ( ) , "%" ) ;
3786
+ let expr = not_like ( null. clone ( ) , lit ( "a%" ) ) ;
3655
3787
assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3656
3788
3657
- let expr = ilike ( null. clone ( ) , "%" ) ;
3789
+ let expr = ilike ( null. clone ( ) , lit ( "a%" ) ) ;
3658
3790
assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3659
3791
3660
- let expr = not_ilike ( null, "%" ) ;
3792
+ let expr = not_ilike ( null, lit ( "a%" ) ) ;
3661
3793
assert_eq ! ( simplify( expr) , lit_bool_null( ) ) ;
3662
3794
}
3663
3795
0 commit comments