ray-project · bveeramani · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025
@@ -88,9 +88,9 @@ def visit_Compare(self, node: ast.Compare) -> ds.Expression:
 
         op = node.ops[0]
         if isinstance(op, ast.In):
-            return left_expr.is_in(comparators[0])
+            return pc.is_in(left_expr, comparators[0])
         elif isinstance(op, ast.NotIn):
-            return ~left_expr.is_in(comparators[0])
+            return ~pc.is_in(left_expr, comparators[0])
         elif isinstance(op, ast.Eq):
             return left_expr == comparators[0]
         elif isinstance(op, ast.NotEq):
@@ -233,7 +233,7 @@ def visit_Call(self, node: ast.Call) -> ds.Expression:
                 nan_is_null=nan_is_null
             ),
             "is_valid": lambda arg: arg.is_valid(),
-            "is_in": lambda arg1, arg2: arg1.is_in(arg2),
+            "is_in": lambda arg1, arg2: pc.is_in(arg1, arg2),
         }
 
         if func_name in function_map:

@@ -3,10 +3,12 @@
 import pyarrow as pa
 import pyarrow.parquet as pq
 import pytest
+from pkg_resources import parse_version
 
 from ray.data._internal.planner.plan_expression.expression_evaluator import (
     ExpressionEvaluator,
 )
+from ray.data.tests.conftest import get_pyarrow_version
 
 
 @pytest.fixture(scope="module")
@@ -292,6 +294,10 @@ def sample_data(tmpdir_factory):
 ]
 
 
+@pytest.mark.skipif(
+    get_pyarrow_version() < parse_version("20.0.0"),
+    reason="test_filter requires PyArrow >= 20.0.0",
+)
 @pytest.mark.parametrize("expression, expected_data", expressions_and_expected_data)
 def test_filter(sample_data, expression, expected_data):
     """Test the filter functionality of the ExpressionEvaluator."""
@@ -329,6 +335,10 @@ def test_filter_equal_negative_number():
     assert result_df == expected
 
 
+@pytest.mark.skipif(
+    get_pyarrow_version() < parse_version("20.0.0"),
+    reason="test_filter requires PyArrow >= 20.0.0",
+)
 def test_filter_bad_expression(sample_data):
     with pytest.raises(ValueError, match="Invalid syntax in the expression"):
         ExpressionEvaluator.get_filters(expression="bad filter")
@@ -338,3 +348,9 @@ def test_filter_bad_expression(sample_data):
     sample_data_path, _ = sample_data
     with pytest.raises(pa.ArrowInvalid):
         pq.read_table(sample_data_path, filters=filters)
+
+
+if __name__ == "__main__":
+    import sys
+
+    sys.exit(pytest.main(["-v", __file__]))
diff --git a/semgrep.yml b/semgrep.yml
@@ -36,7 +36,6 @@ rules:
         # FIXME: These tests weren't run in CI, and now they're failing.
         - "python/ray/data/tests/test_arrow_serialization.py"
         - "python/ray/data/tests/test_block.py"
-        - "python/ray/data/tests/test_expression_evaluator.py"
         - "python/ray/data/tests/test_hash_shuffle.py"
         - "python/ray/data/tests/test_operator_fusion.py"
     languages: