Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pin polars for 24.10 and update polars test suite xfail list #16886

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ python -m pytest \
-m "" \
-p cudf_polars.testing.plugin \
-v \
--tb=short \
--tb=native \
${DESELECTED_TESTS} \
"$@" \
py-polars/tests
3 changes: 1 addition & 2 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl
rapids-logger "Install cudf_polars"
python -m pip install $(echo ./dist/cudf_polars*.whl)

# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
TAG="py-1.7.0"
TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
rapids-logger "Clone polars to ${TAG}"
git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1

Expand Down
5 changes: 1 addition & 4 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,14 @@ if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
| tee ./constraints.txt
fi

# echo to expand wildcard before adding `[extra]` requires for pip
# echo to expand wildcard before adding `[test]` requires for pip
python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

rapids-logger "Pin to 1.7.0 Temporarily"
python -m pip install polars==1.7.0

rapids-logger "Run cudf_polars tests"

function set_exitcode()
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=1.6
- polars>=1.8,<1.9
run_dask_cudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
8 changes: 5 additions & 3 deletions python/cudf_polars/cudf_polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@

from __future__ import annotations

# Check we have a supported polars version
import cudf_polars.utils.versions as v
from cudf_polars._version import __git_commit__, __version__
from cudf_polars.callback import execute_with_cudf
from cudf_polars.dsl.translate import translate_ir

del v
# Check we have a supported polars version
from cudf_polars.utils.versions import _ensure_polars_version

_ensure_polars_version()
del _ensure_polars_version

__all__: list[str] = [
"execute_with_cudf",
Expand Down
8 changes: 0 additions & 8 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,6 @@ def _(
cloud_options = None
else:
reader_options, cloud_options = map(json.loads, options)
if (
typ == "csv"
and visitor.version()[0] == 1
and reader_options["schema"] is not None
):
reader_options["schema"] = {
"fields": reader_options["schema"]["inner"]
} # pragma: no cover; CI tests 1.7
file_options = node.file_options
with_columns = file_options.with_columns
n_rows = file_options.n_rows
Expand Down
14 changes: 9 additions & 5 deletions python/cudf_polars/cudf_polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,11 @@ def assert_collect_raises(
cudf-polars.
Useful for controlling optimization settings.
polars_except
Exception or exceptions polars CPU is expected to raise.
Exception or exceptions polars CPU is expected to raise. If
None, CPU is not expected to raise an exception.
cudf_except
Exception or exceptions polars GPU is expected to raise.
Exception or exceptions polars GPU is expected to raise. If
None, GPU is not expected to raise an exception.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Expand Down Expand Up @@ -203,7 +205,8 @@ def assert_collect_raises(
f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}"
) from e
else:
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")
if polars_except != ():
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")

engine = GPUEngine(raise_on_fail=True)
try:
Expand All @@ -212,7 +215,8 @@ def assert_collect_raises(
pass
except Exception as e:
raise AssertionError(
f"GPU execution RAISED {type(e)}, EXPECTED {polars_except}"
f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}"
) from e
else:
raise AssertionError(f"GPU execution DID NOT RAISE {polars_except}")
if cudf_except != ():
raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}")
4 changes: 4 additions & 0 deletions python/cudf_polars/cudf_polars/testing/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,15 @@ def pytest_configure(config: pytest.Config):
"tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
"tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed",
"tests/unit/io/test_lazy_csv.py::test_scan_csv_slice_offset_zero": "Integer overflow in sliced read",
"tests/unit/io/test_lazy_parquet.py::test_dsl2ir_cached_metadata[False]": "cudf-polars doesn't use metadata read by rust preprocessing",
"tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match",
"tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match",
"tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception",
"tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU",
"tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match",
"tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match",
Expand Down
16 changes: 8 additions & 8 deletions python/cudf_polars/cudf_polars/utils/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@

POLARS_VERSION = parse(__version__)

POLARS_VERSION_GE_16 = POLARS_VERSION >= parse("1.6")
POLARS_VERSION_GT_16 = POLARS_VERSION > parse("1.6")
POLARS_VERSION_LT_16 = POLARS_VERSION < parse("1.6")

if POLARS_VERSION_LT_16:
raise ImportError(
"cudf_polars requires py-polars v1.6 or greater."
) # pragma: no cover
POLARS_VERSION_LT_18 = POLARS_VERSION < parse("1.8")


def _ensure_polars_version():
if POLARS_VERSION_LT_18:
raise ImportError(
"cudf_polars requires py-polars v1.8 or greater."
) # pragma: no cover
2 changes: 1 addition & 1 deletion python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ authors = [
license = { text = "Apache 2.0" }
requires-python = ">=3.10"
dependencies = [
"polars>=1.6",
"polars>=1.8,<1.9",
"pylibcudf==24.10.*,>=0.0.0a0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
Expand Down
6 changes: 5 additions & 1 deletion python/cudf_polars/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ def test_groupby_nan_minmax_raises(op):
"expr",
[
pl.lit(1).alias("value"),
pl.lit([[4, 5, 6]]).alias("value"),
pytest.param(
pl.lit([[4, 5, 6]]).alias("value"),
marks=pytest.mark.xfail(reason="Need to expose OtherScalar in rust IR"),
),
pl.Series("value", [[4, 5, 6]], dtype=pl.List(pl.Int32)),
pl.col("float") * (1 - pl.col("int")),
[pl.lit(2).alias("value"), pl.col("float") * 2],
],
Expand Down
35 changes: 15 additions & 20 deletions python/cudf_polars/tests/testing/test_asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

import polars as pl

from cudf_polars.containers import DataFrame
from cudf_polars.dsl.ir import Select
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
Expand Down Expand Up @@ -38,14 +36,24 @@ class E(Exception):
assert_ir_translation_raises(unsupported, E)


def test_collect_assert_raises(monkeypatch):
def test_collect_assert_raises():
df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

with pytest.raises(AssertionError):
# This should raise, because polars CPU can run this query
with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"):
# This should raise, because polars CPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)

with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"):
# This should raise, because polars GPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)

Expand All @@ -60,31 +68,18 @@ def test_collect_assert_raises(monkeypatch):
cudf_except=pl.exceptions.InvalidOperationError,
)

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="GPU execution RAISED"):
# This should raise because the expected GPU error is wrong
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=NotImplementedError,
)

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="CPU execution RAISED"):
# This should raise because the expected CPU error is wrong
assert_collect_raises(
q,
polars_except=NotImplementedError,
cudf_except=pl.exceptions.InvalidOperationError,
)

with monkeypatch.context() as m:
m.setattr(Select, "evaluate", lambda self, cache: DataFrame([]))
# This query should fail, but we monkeypatch a bad
# implementation of Select which "succeeds" to check that our
# assertion notices this case.
q = df.select(pl.col("a") + pl.Series([1, 2]))
with pytest.raises(AssertionError):
assert_collect_raises(
q,
polars_except=pl.exceptions.ComputeError,
cudf_except=pl.exceptions.ComputeError,
)
Loading