diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 53f674be5642..e4bcd9cbc549 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -3665,7 +3665,7 @@ def write_delta( target: str | Path | deltalake.DeltaTable, *, mode: Literal["error", "append", "overwrite", "ignore"] = ..., - overwrite_schema: bool = ..., + overwrite_schema: bool | None = ..., storage_options: dict[str, str] | None = ..., delta_write_options: dict[str, Any] | None = ..., ) -> None: ... @@ -3676,7 +3676,7 @@ def write_delta( target: str | Path | deltalake.DeltaTable, *, mode: Literal["merge"], - overwrite_schema: bool = ..., + overwrite_schema: bool | None = ..., storage_options: dict[str, str] | None = ..., delta_merge_options: dict[str, Any], ) -> deltalake.table.TableMerger: ... @@ -3686,7 +3686,7 @@ def write_delta( target: str | Path | deltalake.DeltaTable, *, mode: Literal["error", "append", "overwrite", "ignore", "merge"] = "error", - overwrite_schema: bool = False, + overwrite_schema: bool | None = None, storage_options: dict[str, str] | None = None, delta_write_options: dict[str, Any] | None = None, delta_merge_options: dict[str, Any] | None = None, @@ -3709,6 +3709,10 @@ def write_delta( with the existing data. overwrite_schema If True, allows updating the schema of the table. + + .. deprecated:: 0.20.14 + Use the parameter `delta_write_options` instead and pass + `{"schema_mode": "overwrite"}`. storage_options Extra options for the storage backends supported by `deltalake`. For cloud storages, this may include configurations for authentication etc. @@ -3764,12 +3768,14 @@ def write_delta( >>> df.write_delta(table_path, mode="append") # doctest: +SKIP Overwrite a Delta Lake table as a new version. - If the schemas of the new and old data are the same, setting - `overwrite_schema` is not required. + If the schemas of the new and old data are the same, specifying the + `schema_mode` is not required. >>> existing_table_path = "/path/to/delta-table/" >>> df.write_delta( - ... existing_table_path, mode="overwrite", overwrite_schema=True + ... existing_table_path, + ... mode="overwrite", + ... delta_write_options={"schema_mode": "overwrite"}, ... ) # doctest: +SKIP Write a DataFrame as a Delta Lake table to a cloud object store like S3. @@ -3799,9 +3805,6 @@ def write_delta( For all `TableMerger` methods, check the deltalake docs `here `__. - Schema evolution is not yet supported in by the `deltalake` package, therefore - `overwrite_schema` will not have any effect on a merge operation. - >>> df = pl.DataFrame( ... { ... "foo": [1, 2, 3, 4, 5], @@ -3825,6 +3828,13 @@ def write_delta( ... .execute() ... ) # doctest: +SKIP """ + if overwrite_schema is not None: + issue_deprecation_warning( + "The parameter `overwrite_schema` for `write_delta` is deprecated." + ' Use the parameter `delta_write_options` instead and pass `{"schema_mode": "overwrite"}`.', + version="0.20.14", + ) + from polars.io.delta import ( _check_for_unsupported_types, _check_if_delta_available, @@ -3857,13 +3867,15 @@ def write_delta( if delta_write_options is None: delta_write_options = {} + if overwrite_schema: + delta_write_options["schema_mode"] = "overwrite" + schema = delta_write_options.pop("schema", None) write_deltalake( table_or_uri=target, data=data, schema=schema, mode=mode, - overwrite_schema=overwrite_schema, storage_options=storage_options, large_dtypes=True, **delta_write_options, diff --git a/py-polars/tests/unit/io/test_delta.py b/py-polars/tests/unit/io/test_delta.py index 46f097863a98..423c3b4eaa72 100644 --- a/py-polars/tests/unit/io/test_delta.py +++ b/py-polars/tests/unit/io/test_delta.py @@ -120,11 +120,15 @@ def test_write_delta(df: pl.DataFrame, tmp_path: Path) -> None: v1.write_delta(tmp_path) # Case: Overwrite with new version (version 1) - v1.write_delta(tmp_path, mode="overwrite", overwrite_schema=True) + v1.write_delta( + tmp_path, mode="overwrite", delta_write_options={"schema_mode": "overwrite"} + ) # Case: Error if schema contains unsupported columns with pytest.raises(TypeError): - df.write_delta(tmp_path, mode="overwrite", overwrite_schema=True) + df.write_delta( + tmp_path, mode="overwrite", delta_write_options={"schema_mode": "overwrite"} + ) partitioned_tbl_uri = (tmp_path / ".." / "partitioned_table").resolve() @@ -184,6 +188,17 @@ def test_write_delta(df: pl.DataFrame, tmp_path: Path) -> None: df_supported.write_delta(partitioned_tbl_uri, mode="overwrite") +@pytest.mark.write_disk() +def test_write_delta_overwrite_schema_deprecated( + df: pl.DataFrame, tmp_path: Path +) -> None: + df = df.select(pl.col(pl.Int64)) + with pytest.deprecated_call(): + df.write_delta(tmp_path, overwrite_schema=True) + result = pl.read_delta(str(tmp_path)) + assert_frame_equal(df, result) + + @pytest.mark.write_disk() @pytest.mark.parametrize( "series", @@ -360,18 +375,19 @@ def test_write_delta_with_schema_10540(tmp_path: Path) -> None: def test_write_delta_with_tz_in_df(expr: pl.Expr, tmp_path: Path) -> None: df = pl.select(expr) - pa_schema = pa.schema([("datetime", pa.timestamp("us"))]) + expected_dtype = pl.Datetime("us", "UTC") + expected = pl.select(expr.cast(expected_dtype)) df.write_delta(tmp_path, mode="append") # write second time because delta-rs also casts timestamp with tz to timestamp no tz df.write_delta(tmp_path, mode="append") + # Check schema of DeltaTable object tbl = DeltaTable(tmp_path) - assert pa_schema == tbl.schema().to_pyarrow() + assert tbl.schema().to_pyarrow() == expected.to_arrow().schema + # Check result result = pl.read_delta(str(tmp_path), version=0) - - expected = df.cast(pl.Datetime) assert_frame_equal(result, expected)