Skip to content

Commit 2e140b9

Browse files
author
Tiansu Yu
committed
fix (issue-1079): allow update_column to set doc as ''
1 parent e2ed2eb commit 2e140b9

File tree

3 files changed

+30
-6
lines changed

3 files changed

+30
-6
lines changed

pyiceberg/table/__init__.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2492,21 +2492,22 @@ def update_column(
24922492
except ResolveError as e:
24932493
raise ValidationError(f"Cannot change column type: {full_name}: {field.field_type} -> {field_type}") from e
24942494

2495+
# if other updates for the same field exist in one transaction:
24952496
if updated := self._updates.get(field.field_id):
24962497
self._updates[field.field_id] = NestedField(
24972498
field_id=updated.field_id,
24982499
name=updated.name,
24992500
field_type=field_type or updated.field_type,
2500-
doc=doc or updated.doc,
2501-
required=updated.required,
2501+
doc=doc if doc is not None else updated.doc,
2502+
required=required or updated.required,
25022503
)
25032504
else:
25042505
self._updates[field.field_id] = NestedField(
25052506
field_id=field.field_id,
25062507
name=field.name,
25072508
field_type=field_type or field.field_type,
2508-
doc=doc or field.doc,
2509-
required=field.required,
2509+
doc=doc if doc is not None else field.doc,
2510+
required=required or field.required,
25102511
)
25112512

25122513
if required is not None:
@@ -2878,7 +2879,7 @@ def _update_column(self, field: NestedField, existing_field: NestedField) -> Non
28782879
if field.field_type.is_primitive and field.field_type != existing_field.field_type:
28792880
self.update_schema.update_column(full_name, field_type=field.field_type)
28802881

2881-
if field.doc is not None and not field.doc != existing_field.doc:
2882+
if field.doc is not None and field.doc != existing_field.doc:
28822883
self.update_schema.update_column(full_name, doc=field.doc)
28832884

28842885
def _find_field_type(self, field_id: int) -> IcebergType:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# under the License.
1717
[tool.poetry]
1818
name = "pyiceberg"
19-
version = "0.7.1"
19+
version = "0.7.2"
2020
readme = "README.md"
2121
homepage = "https://py.iceberg.apache.org/"
2222
repository = "https://github.com/apache/iceberg-python"

tests/table/test_init.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,29 @@ def test_add_column(table_v2: Table) -> None:
512512
assert apply_schema.highest_field_id == 4
513513

514514

515+
def test_update_column_doc(table_v1: Table, table_v2: Table) -> None:
516+
COMMENT2 = "comment2"
517+
for table in [table_v1, table_v2]:
518+
original_schema = table.schema()
519+
# update existing doc to a new doc
520+
assert original_schema.find_field("y").doc == "comment"
521+
new_schema = table.transaction().update_schema().update_column("y", doc=COMMENT2)._apply()
522+
assert new_schema.find_field("y").doc == COMMENT2, "failed to update existing field doc"
523+
524+
# update existing doc to an emtpy string
525+
assert new_schema.find_field("y").doc == COMMENT2
526+
new_schema2 = table.transaction().update_schema().update_column("y", doc="")._apply()
527+
assert new_schema2.find_field("y").doc == "", "failed to remove existing field doc"
528+
529+
# assert the above two updates also works with union_by_name
530+
assert (
531+
table.update_schema().union_by_name(new_schema)._apply() == new_schema
532+
), "failed to update existing field doc with union_by_name"
533+
assert (
534+
table.update_schema().union_by_name(new_schema2)._apply() == new_schema2
535+
), "failed to remove existing field doc with union_by_name"
536+
537+
515538
def test_add_primitive_type_column(table_v2: Table) -> None:
516539
primitive_type: Dict[str, PrimitiveType] = {
517540
"boolean": BooleanType(),

0 commit comments

Comments
 (0)