rapidsai · rapids-bot · Sep 25, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
@@ -21,7 +21,7 @@ python -m pytest \
        -m "" \
        -p cudf_polars.testing.plugin \
        -v \
-       --tb=short \
+       --tb=native \
        ${DESELECTED_TESTS} \
        "$@" \
        py-polars/tests
@@ -33,8 +33,7 @@ python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl
 rapids-logger "Install cudf_polars"
 python -m pip install $(echo ./dist/cudf_polars*.whl)
 
-# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
-TAG="py-1.7.0"
+TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
 rapids-logger "Clone polars to ${TAG}"
 git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1
 

@@ -39,17 +39,14 @@ if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
       | tee ./constraints.txt
 fi
 
-# echo to expand wildcard before adding `[extra]` requires for pip
+# echo to expand wildcard before adding `[test]` requires for pip
 python -m pip install \
     -v \
     --constraint ./constraints.txt \
     "$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
     "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
     "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 
-rapids-logger "Pin to 1.7.0 Temporarily"
-python -m pip install polars==1.7.0
-
 rapids-logger "Run cudf_polars tests"
 
 function set_exitcode()

@@ -663,7 +663,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - polars>=1.6
+          - polars>=1.8,<1.9
   run_dask_cudf:
     common:
       - output_types: [conda, requirements, pyproject]

@@ -10,13 +10,15 @@
 
 from __future__ import annotations
 
-# Check we have a supported polars version
-import cudf_polars.utils.versions as v
 from cudf_polars._version import __git_commit__, __version__
 from cudf_polars.callback import execute_with_cudf
 from cudf_polars.dsl.translate import translate_ir
 
-del v
+# Check we have a supported polars version
+from cudf_polars.utils.versions import _ensure_polars_version
+
+_ensure_polars_version()
+del _ensure_polars_version
 
 __all__: list[str] = [
     "execute_with_cudf",

@@ -93,14 +93,6 @@ def _(
         cloud_options = None
     else:
         reader_options, cloud_options = map(json.loads, options)
-    if (
-        typ == "csv"
-        and visitor.version()[0] == 1
-        and reader_options["schema"] is not None
-    ):
-        reader_options["schema"] = {
-            "fields": reader_options["schema"]["inner"]
-        }  # pragma: no cover; CI tests 1.7
     file_options = node.file_options
     with_columns = file_options.with_columns
     n_rows = file_options.n_rows

@@ -164,9 +164,11 @@ def assert_collect_raises(
         cudf-polars.
         Useful for controlling optimization settings.
     polars_except
-        Exception or exceptions polars CPU is expected to raise.
+        Exception or exceptions polars CPU is expected to raise. If
+        None, CPU is not expected to raise an exception.
     cudf_except
-        Exception or exceptions polars GPU is expected to raise.
+        Exception or exceptions polars GPU is expected to raise. If
+        None, GPU is not expected to raise an exception.
     collect_kwargs
         Common keyword arguments to pass to collect for both polars CPU and
         cudf-polars.
@@ -203,7 +205,8 @@ def assert_collect_raises(
             f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}"
         ) from e
     else:
-        raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")
+        if polars_except != ():
+            raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")
 
     engine = GPUEngine(raise_on_fail=True)
     try:
@@ -212,7 +215,8 @@ def assert_collect_raises(
         pass
     except Exception as e:
         raise AssertionError(
-            f"GPU execution RAISED {type(e)}, EXPECTED {polars_except}"
+            f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}"
         ) from e
     else:
-        raise AssertionError(f"GPU execution DID NOT RAISE {polars_except}")
+        if cudf_except != ():
+            raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}")
@@ -49,11 +49,15 @@ def pytest_configure(config: pytest.Config):
     "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
     "tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed",
     "tests/unit/io/test_lazy_csv.py::test_scan_csv_slice_offset_zero": "Integer overflow in sliced read",
+    "tests/unit/io/test_lazy_parquet.py::test_dsl2ir_cached_metadata[False]": "cudf-polars doesn't use metadata read by rust preprocessing",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?",
+    "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394",
+    "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception",
+    "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception",
     "tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU",
     "tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match",
     "tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match",

@@ -12,11 +12,11 @@
 
 POLARS_VERSION = parse(__version__)
 
-POLARS_VERSION_GE_16 = POLARS_VERSION >= parse("1.6")
-POLARS_VERSION_GT_16 = POLARS_VERSION > parse("1.6")
-POLARS_VERSION_LT_16 = POLARS_VERSION < parse("1.6")
-
-if POLARS_VERSION_LT_16:
-    raise ImportError(
-        "cudf_polars requires py-polars v1.6 or greater."
-    )  # pragma: no cover
+POLARS_VERSION_LT_18 = POLARS_VERSION < parse("1.8")
+
+
+def _ensure_polars_version():
+    if POLARS_VERSION_LT_18:
+        raise ImportError(
+            "cudf_polars requires py-polars v1.8 or greater."
+        )  # pragma: no cover
@@ -19,7 +19,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "polars>=1.6",
+    "polars>=1.8,<1.9",
     "pylibcudf==24.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [

@@ -168,7 +168,11 @@ def test_groupby_nan_minmax_raises(op):
     "expr",
     [
         pl.lit(1).alias("value"),
-        pl.lit([[4, 5, 6]]).alias("value"),
+        pytest.param(
+            pl.lit([[4, 5, 6]]).alias("value"),
+            marks=pytest.mark.xfail(reason="Need to expose OtherScalar in rust IR"),
+        ),
+        pl.Series("value", [[4, 5, 6]], dtype=pl.List(pl.Int32)),
         pl.col("float") * (1 - pl.col("int")),
         [pl.lit(2).alias("value"), pl.col("float") * 2],
     ],

@@ -7,8 +7,6 @@
 
 import polars as pl
 
-from cudf_polars.containers import DataFrame
-from cudf_polars.dsl.ir import Select
 from cudf_polars.testing.asserts import (
     assert_collect_raises,
     assert_gpu_result_equal,
@@ -38,14 +36,24 @@ class E(Exception):
         assert_ir_translation_raises(unsupported, E)
 
 
-def test_collect_assert_raises(monkeypatch):
+def test_collect_assert_raises():
     df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
 
-    with pytest.raises(AssertionError):
-        # This should raise, because polars CPU can run this query
+    with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"):
+        # This should raise, because polars CPU can run this query,
+        # but we expect an error.
         assert_collect_raises(
             df,
             polars_except=pl.exceptions.InvalidOperationError,
+            cudf_except=(),
+        )
+
+    with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"):
+        # This should raise, because polars GPU can run this query,
+        # but we expect an error.
+        assert_collect_raises(
+            df,
+            polars_except=(),
             cudf_except=pl.exceptions.InvalidOperationError,
         )
 
@@ -60,31 +68,18 @@ def test_collect_assert_raises(monkeypatch):
         cudf_except=pl.exceptions.InvalidOperationError,
     )
 
-    with pytest.raises(AssertionError):
+    with pytest.raises(AssertionError, match="GPU execution RAISED"):
         # This should raise because the expected GPU error is wrong
         assert_collect_raises(
             q,
             polars_except=pl.exceptions.InvalidOperationError,
             cudf_except=NotImplementedError,
         )
 
-    with pytest.raises(AssertionError):
+    with pytest.raises(AssertionError, match="CPU execution RAISED"):
         # This should raise because the expected CPU error is wrong
         assert_collect_raises(
             q,
             polars_except=NotImplementedError,
             cudf_except=pl.exceptions.InvalidOperationError,
         )
-
-    with monkeypatch.context() as m:
-        m.setattr(Select, "evaluate", lambda self, cache: DataFrame([]))
-        # This query should fail, but we monkeypatch a bad
-        # implementation of Select which "succeeds" to check that our
-        # assertion notices this case.
-        q = df.select(pl.col("a") + pl.Series([1, 2]))
-        with pytest.raises(AssertionError):
-            assert_collect_raises(
-                q,
-                polars_except=pl.exceptions.ComputeError,
-                cudf_except=pl.exceptions.ComputeError,
-            )