enso-org · mergify · Oct 15, 2022 · Oct 7, 2022 · Oct 10, 2022 · Oct 10, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -210,6 +210,7 @@
 - [Extended `Filter_Condition` with `Is_Empty`, `Not_Empty`, `Like` and
   `Not_Like`.][3775]
 - [Implemented `Table.replace_text` for in-memory table.][3793]
+- [Extended `Filter_Condition` with `Is_In` and `Not_In`.][3790]
 
 [debug-shortcuts]:
   https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -337,6 +338,7 @@
 [3770]: https://github.com/enso-org/enso/pull/3770
 [3775]: https://github.com/enso-org/enso/pull/3775
 [3793]: https://github.com/enso-org/enso/pull/3793
+[3790]: https://github.com/enso-org/enso/pull/3790
 
 #### Enso Compiler
 

@@ -1190,7 +1190,6 @@ lazy val parser = (project in file("lib/scala/parser"))
       s"-Djava.library.path=$root/target/rust/debug"
     },
     libraryDependencies ++= Seq(
-      "com.storm-enroute" %% "scalameter" % scalameterVersion % "bench",
       "org.scalatest"    %%% "scalatest"  % scalatestVersion  % Test
     ),
     testFrameworks := List(

@@ -105,6 +105,12 @@ type Filter_Condition
          See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926
     Not_Like pattern:Text
 
+    ## Is the value contained in `values`?
+    Is_In values:Vector
+
+    ## Is the value not contained in `values`?
+    Not_In values:Vector
+
     ## Converts a `Filter_Condition` condition into a predicate taking an
        element and returning a value indicating whether the element should be
        accepted by the filter.
@@ -142,6 +148,10 @@ type Filter_Condition
         Not_Like sql_pattern ->
             regex = sql_like_to_regex sql_pattern
             elem -> regex.matches elem . not
+        ## TODO once we have proper hashing we could create a hashmap and
+           answer quicker, currently we need to do a full scan for each element.
+        Is_In values -> values.contains
+        Not_In values -> elem -> values.contains elem . not
 
 ## PRIVATE
 sql_like_to_regex sql_pattern =

@@ -13,7 +13,7 @@ import project.Internal.IR.Internal_Column.Internal_Column
 
 from project.Data.Table import Table, freshen_columns
 
-from project.Errors import Unsupported_Database_Operation_Error
+from project.Errors import Unsupported_Database_Operation_Error, Unsupported_Database_Operation_Error_Data
 
 type Column
 
@@ -130,7 +130,7 @@ type Column
         prepare_operand operand operand_type = case operand of
             other_column : Column ->
                 if Helpers.check_integrity self other_column then other_column.expression else
-                    Error.throw <| Unsupported_Database_Operation_Error "Cannot use columns coming from different contexts in one expression without a join."
+                    Error.throw <| Unsupported_Database_Operation_Error_Data "Cannot use columns coming from different contexts in one expression without a join."
             constant ->
                 actual_operand_type = operand_type.if_nothing self.sql_type
                 Expression.Constant actual_operand_type constant
@@ -394,6 +394,32 @@ type Column
     / : Column | Any -> Column
     / self other = self.make_binary_op "/" other
 
+    ## Element-wise modulus.
+
+       Arguments:
+       - other: The value to modulo `self` against. If `other` is a column, the
+         modulus is performed pairwise between corresponding elements of `self`
+         and `other`.
+
+       Returns a column with results of modulus this column's elements against
+       `other`.
+
+       > Example
+         Modulus of two columns against each other.
+
+             import Standard.Examples
+
+             example_mod = Examples.integer_column % Examples.decimal_column
+
+       > Example
+         Modulus of a column with a number.
+
+             import Standard.Examples
+
+             example_mod = Examples.integer_column % 3
+    % : Column | Any -> Column
+    % self other = self.make_binary_op "%" other
+
     ## UNSTABLE
 
        Element-wise boolean conjunction.
@@ -433,20 +459,20 @@ type Column
        Returns a column of booleans, with `True` items at the positions where
        this column contains a `Nothing`.
     is_missing : Column
-    is_missing self = self.make_unary_op "ISNULL" new_type=SQL_Type.boolean
+    is_missing self = self.make_unary_op "IS_NULL" new_type=SQL_Type.boolean
 
     ## PRIVATE
        Returns a column of booleans, with `True` items at the positions where
        this column contains an empty string or `Nothing`.
     is_empty : Column
-    is_empty self = self.make_unary_op "ISEMPTY" new_type=SQL_Type.boolean
+    is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean
 
     ## UNSTABLE
 
        Returns a new column where missing values have been replaced with the
        provided default.
     fill_missing : Any -> Column
-    fill_missing self default = self.make_binary_op "FILLNULL" default
+    fill_missing self default = self.make_binary_op "FILL_NULL" default
 
     ## UNSTABLE
 
@@ -495,7 +521,7 @@ type Column
     take self range=(First 1) =
         _ = range
         msg = "`Column.take` is not yet implemented."
-        Error.throw (Unsupported_Database_Operation_Error msg)
+        Error.throw (Unsupported_Database_Operation_Error_Data msg)
 
     ## UNSTABLE
        Creates a new Column from the input with the specified range of rows
@@ -507,7 +533,7 @@ type Column
     drop self range=(First 1) =
         _ = range
         msg = "`Column.drop` is not yet implemented."
-        Error.throw (Unsupported_Database_Operation_Error msg)
+        Error.throw (Unsupported_Database_Operation_Error_Data msg)
 
     ## UNSTABLE
 
@@ -551,10 +577,63 @@ type Column
     contains : Column | Text -> Column
     contains self other = self.make_binary_op "contains" other new_type=SQL_Type.boolean
 
-    ## PRIVATE
-       Checks for each element of the column if it matches an SQL-like pattern.
+    ## Checks for each element of the column if it matches an SQL-like pattern.
+
+       Arguments:
+       - pattern: The pattern to match `self` against. If it is a column, the
+         operation is performed pairwise between corresponding elements of
+         `self` and that column. The pattern is an SQL-like pattern, where
+         `%` matches any sequence of characters and `_` matches any single
+         character.
+
+       > Example
+         Check if elements of a column start with 'F' and end with a dot.
+
+             import Standard.Examples
+
+             example_contains = Examples.text_column_1.like "F%."
     like : Column | Text -> Column
-    like self other = self.make_binary_op "LIKE" other new_type=SQL_Type.boolean
+    like self pattern = self.make_binary_op "LIKE" pattern new_type=SQL_Type.boolean
+
+    ## Checks for each element of the column if it is contained within the
+       provided vector.
+
+       Arguments:
+       - vector: A vector of elements. The resulting column will contain true at
+         the positions where the corresponding element of `self` is contained
+         in `vector`.
+
+       > Example
+         Check if elements of a column are contained in a provided vector.
+
+             import Standard.Examples
+
+             example_contains = Examples.text_column_1.is_in [1, 2, 5]
+    is_in self vector =
+        ## This is slightly hacky - we don't provide operand types as we want to
+           allow any type to get through and currently we do not have a mapping
+           from Enso types to SQL types (it may be available in the future). So
+           we just rely on Nothing resolving to the current column type. That
+           type may not always match the operands, but the current
+           implementation uses this type only for two purposes: generated SQL
+           visualization (so the color will be consistent with the column type
+           and not the value type - that can be confusing, we probably want to
+           fix it later) and setting up the query - but at the set up this only
+           applies to adding nulls - setting any other object does not check the
+           type at this level anyway.
+        partitioned = vector.partition .is_nothing
+        nulls = partitioned.first
+        non_nulls = partitioned.second
+        ## Since SQL `NULL IN (NULL)` yields `NULL`, we need to handle this case
+           separately. So we handle all non-null values using `IS_IN` and then
+           `OR` that with a null check (if the vector contained any nulls to
+           begin with). The implementation also ensures that even
+           `NULL IN (...)` is coalesced to False, so that negation works as
+           expected.
+        is_in_not_null = self.make_op "IS_IN" operands=non_nulls new_type=SQL_Type.boolean
+        case nulls.not_empty of
+            True -> is_in_not_null || self.is_missing
+            False -> is_in_not_null
 
     ## PRIVATE
     as_internal : Internal_Column

@@ -11,7 +11,7 @@ import project.Internal.IR.Nulls_Order.Nulls_Order
 import project.Internal.IR.Query.Query
 from project.Data.SQL import code
 
-from project.Errors import Unsupported_Database_Operation_Error
+from project.Errors import Unsupported_Database_Operation_Error_Data
 
 type Internal_Dialect
 
@@ -169,14 +169,15 @@ base_dialect =
     unary = name -> [name, make_unary_op name]
     fun = name -> [name, make_function name]
 
-    arith = [bin "+", bin "-", bin "*", bin "/"]
+    arith = [bin "+", bin "-", bin "*", bin "/", bin "%"]
     logic = [bin "AND", bin "OR", unary "NOT"]
     compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
     agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
     counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
-    text = [["ISEMPTY", make_is_empty], bin "LIKE"]
-    nulls = [["ISNULL", make_right_unary_op "IS NULL"], ["FILLNULL", make_function "COALESCE"]]
-    base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls)
+    text = [["IS_EMPTY", make_is_empty], bin "LIKE"]
+    nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
+    contains = [["IS_IN", make_is_in]]
+    base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls + contains)
     Internal_Dialect.Value base_map wrap_in_quotes
 
 ## PRIVATE
@@ -188,7 +189,7 @@ make_is_empty arguments = case arguments.length of
         is_empty = (arg ++ " = ''").paren
         (is_null ++ " OR " ++ is_empty).paren
     _ ->
-        Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation ISEMPTY")
+        Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation IS_EMPTY")
 
 ## PRIVATE
 make_between : Vector Builder -> Builder
@@ -201,6 +202,21 @@ make_between arguments = case arguments.length of
     _ ->
         Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation BETWEEN")
 
+## PRIVATE
+make_is_in : Vector Builder -> Builder
+make_is_in arguments = case arguments.length of
+    0 -> Error.throw <| Illegal_State_Error_Data ("The operation IS_IN requires at least one argument.")
+    ## If only the self argument is provided, no value will ever be in the empty list, so we just short circuit to false.
+       `IN ()` would be more meaningful, but it is a syntax error.
+    1 -> code '2=1' . paren
+    _ ->
+        expr = arguments.first
+        list = arguments.tail
+        is_in = expr ++ " IN (" ++ (SQL.join ", " list) ++ ")"
+        ## We ensure that even `NULL IN (...)` is coalesced to False, so that
+           negation will work as expected.
+        code "COALESCE(" ++ is_in ++ ", 2=1)"
+
 ## PRIVATE
 
    Builds code for an expression.
@@ -214,7 +230,7 @@ generate_expression dialect expr = case expr of
         dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
     Expression.Constant sql_type value -> SQL.interpolation sql_type value
     Expression.Operation kind arguments ->
-        op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error kind)
+        op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error_Data kind)
         parsed_args = arguments.map (generate_expression dialect)
         op parsed_args
     _ : Order_Descriptor -> generate_order dialect expr
@@ -337,7 +353,7 @@ generate_query dialect query = case query of
         code "SELECT * " ++ generate_select_context dialect ctx
     Query.Insert table_name pairs ->
         generate_insert_query dialect table_name pairs
-    _ -> Error.throw <| Unsupported_Database_Operation_Error "Unsupported query type."
+    _ -> Error.throw <| Unsupported_Database_Operation_Error_Data "Unsupported query type."
 
 ## PRIVATE
    Arguments:

@@ -13,7 +13,7 @@ import project.Internal.IR.Order_Descriptor.Order_Descriptor
 import project.Internal.IR.Nulls_Order.Nulls_Order
 
 from project.Data.SQL import code
-from project.Errors import Unsupported_Database_Operation_Error
+from project.Errors import Unsupported_Database_Operation_Error_Data
 
 ## PRIVATE
 
@@ -232,15 +232,15 @@ make_order_descriptor internal_column sort_direction text_ordering =
     case internal_column.sql_type.is_likely_text of
         True ->
             ## In the future we can modify this error to suggest using a custom defined collation.
-            if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else
+            if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error_Data "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else
                 case text_ordering.case_sensitivity of
                     Nothing ->
                         Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing
                     Case_Sensitivity.Sensitive ->
                         Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic"
                     Case_Sensitivity.Insensitive locale -> case locale == Locale.default of
                         False ->
-                            Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
+                            Error.throw (Unsupported_Database_Operation_Error_Data "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
                         True ->
                             upper = Expression.Operation "UPPER" [internal_column.expression]
                             folded_expression = Expression.Operation "LOWER" [upper]