Skip to content

Commit

Permalink
depr(python): Deprecate overwrite_schema parameter for `DataFrame.w…
Browse files Browse the repository at this point in the history
…rite_delta` (#14879)
  • Loading branch information
stinodego authored Mar 6, 2024
1 parent 33b2286 commit 6a181f2
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 16 deletions.
32 changes: 22 additions & 10 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3665,7 +3665,7 @@ def write_delta(
target: str | Path | deltalake.DeltaTable,
*,
mode: Literal["error", "append", "overwrite", "ignore"] = ...,
overwrite_schema: bool = ...,
overwrite_schema: bool | None = ...,
storage_options: dict[str, str] | None = ...,
delta_write_options: dict[str, Any] | None = ...,
) -> None: ...
Expand All @@ -3676,7 +3676,7 @@ def write_delta(
target: str | Path | deltalake.DeltaTable,
*,
mode: Literal["merge"],
overwrite_schema: bool = ...,
overwrite_schema: bool | None = ...,
storage_options: dict[str, str] | None = ...,
delta_merge_options: dict[str, Any],
) -> deltalake.table.TableMerger: ...
Expand All @@ -3686,7 +3686,7 @@ def write_delta(
target: str | Path | deltalake.DeltaTable,
*,
mode: Literal["error", "append", "overwrite", "ignore", "merge"] = "error",
overwrite_schema: bool = False,
overwrite_schema: bool | None = None,
storage_options: dict[str, str] | None = None,
delta_write_options: dict[str, Any] | None = None,
delta_merge_options: dict[str, Any] | None = None,
Expand All @@ -3709,6 +3709,10 @@ def write_delta(
with the existing data.
overwrite_schema
If True, allows updating the schema of the table.
.. deprecated:: 0.20.14
Use the parameter `delta_write_options` instead and pass
`{"schema_mode": "overwrite"}`.
storage_options
Extra options for the storage backends supported by `deltalake`.
For cloud storages, this may include configurations for authentication etc.
Expand Down Expand Up @@ -3764,12 +3768,14 @@ def write_delta(
>>> df.write_delta(table_path, mode="append") # doctest: +SKIP
Overwrite a Delta Lake table as a new version.
If the schemas of the new and old data are the same, setting
`overwrite_schema` is not required.
If the schemas of the new and old data are the same, specifying the
`schema_mode` is not required.
>>> existing_table_path = "/path/to/delta-table/"
>>> df.write_delta(
... existing_table_path, mode="overwrite", overwrite_schema=True
... existing_table_path,
... mode="overwrite",
... delta_write_options={"schema_mode": "overwrite"},
... ) # doctest: +SKIP
Write a DataFrame as a Delta Lake table to a cloud object store like S3.
Expand Down Expand Up @@ -3799,9 +3805,6 @@ def write_delta(
For all `TableMerger` methods, check the deltalake docs
`here <https://delta-io.github.io/delta-rs/api/delta_table/delta_table_merger/>`__.
Schema evolution is not yet supported in by the `deltalake` package, therefore
`overwrite_schema` will not have any effect on a merge operation.
>>> df = pl.DataFrame(
... {
... "foo": [1, 2, 3, 4, 5],
Expand All @@ -3825,6 +3828,13 @@ def write_delta(
... .execute()
... ) # doctest: +SKIP
"""
if overwrite_schema is not None:
issue_deprecation_warning(
"The parameter `overwrite_schema` for `write_delta` is deprecated."
' Use the parameter `delta_write_options` instead and pass `{"schema_mode": "overwrite"}`.',
version="0.20.14",
)

from polars.io.delta import (
_check_for_unsupported_types,
_check_if_delta_available,
Expand Down Expand Up @@ -3857,13 +3867,15 @@ def write_delta(
if delta_write_options is None:
delta_write_options = {}

if overwrite_schema:
delta_write_options["schema_mode"] = "overwrite"

schema = delta_write_options.pop("schema", None)
write_deltalake(
table_or_uri=target,
data=data,
schema=schema,
mode=mode,
overwrite_schema=overwrite_schema,
storage_options=storage_options,
large_dtypes=True,
**delta_write_options,
Expand Down
28 changes: 22 additions & 6 deletions py-polars/tests/unit/io/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,15 @@ def test_write_delta(df: pl.DataFrame, tmp_path: Path) -> None:
v1.write_delta(tmp_path)

# Case: Overwrite with new version (version 1)
v1.write_delta(tmp_path, mode="overwrite", overwrite_schema=True)
v1.write_delta(
tmp_path, mode="overwrite", delta_write_options={"schema_mode": "overwrite"}
)

# Case: Error if schema contains unsupported columns
with pytest.raises(TypeError):
df.write_delta(tmp_path, mode="overwrite", overwrite_schema=True)
df.write_delta(
tmp_path, mode="overwrite", delta_write_options={"schema_mode": "overwrite"}
)

partitioned_tbl_uri = (tmp_path / ".." / "partitioned_table").resolve()

Expand Down Expand Up @@ -184,6 +188,17 @@ def test_write_delta(df: pl.DataFrame, tmp_path: Path) -> None:
df_supported.write_delta(partitioned_tbl_uri, mode="overwrite")


@pytest.mark.write_disk()
def test_write_delta_overwrite_schema_deprecated(
df: pl.DataFrame, tmp_path: Path
) -> None:
df = df.select(pl.col(pl.Int64))
with pytest.deprecated_call():
df.write_delta(tmp_path, overwrite_schema=True)
result = pl.read_delta(str(tmp_path))
assert_frame_equal(df, result)


@pytest.mark.write_disk()
@pytest.mark.parametrize(
"series",
Expand Down Expand Up @@ -360,18 +375,19 @@ def test_write_delta_with_schema_10540(tmp_path: Path) -> None:
def test_write_delta_with_tz_in_df(expr: pl.Expr, tmp_path: Path) -> None:
df = pl.select(expr)

pa_schema = pa.schema([("datetime", pa.timestamp("us"))])
expected_dtype = pl.Datetime("us", "UTC")
expected = pl.select(expr.cast(expected_dtype))

df.write_delta(tmp_path, mode="append")
# write second time because delta-rs also casts timestamp with tz to timestamp no tz
df.write_delta(tmp_path, mode="append")

# Check schema of DeltaTable object
tbl = DeltaTable(tmp_path)
assert pa_schema == tbl.schema().to_pyarrow()
assert tbl.schema().to_pyarrow() == expected.to_arrow().schema

# Check result
result = pl.read_delta(str(tmp_path), version=0)

expected = df.cast(pl.Datetime)
assert_frame_equal(result, expected)


Expand Down

0 comments on commit 6a181f2

Please sign in to comment.