-
Notifications
You must be signed in to change notification settings - Fork 341
fix: add metadata_properties to _construct_parameters when update hive table #2013
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ff149e8
230d088
7e754d5
d5a26e0
985d77d
16f562e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -211,11 +211,18 @@ def _construct_hive_storage_descriptor( | |
DEFAULT_PROPERTIES = {TableProperties.PARQUET_COMPRESSION: TableProperties.PARQUET_COMPRESSION_DEFAULT} | ||
|
||
|
||
def _construct_parameters(metadata_location: str, previous_metadata_location: Optional[str] = None) -> Dict[str, Any]: | ||
def _construct_parameters( | ||
metadata_location: str, previous_metadata_location: Optional[str] = None, metadata_properties: Optional[Properties] = None | ||
) -> Dict[str, Any]: | ||
properties = {PROP_EXTERNAL: "TRUE", PROP_TABLE_TYPE: "ICEBERG", PROP_METADATA_LOCATION: metadata_location} | ||
if previous_metadata_location: | ||
properties[PROP_PREVIOUS_METADATA_LOCATION] = previous_metadata_location | ||
|
||
if metadata_properties: | ||
for key, value in metadata_properties.items(): | ||
if key not in properties: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 this is fine, it helps with not re-setting |
||
properties[key] = str(value) | ||
|
||
return properties | ||
|
||
|
||
|
@@ -360,7 +367,7 @@ def _convert_iceberg_into_hive(self, table: Table) -> HiveTable: | |
property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT), | ||
), | ||
tableType=EXTERNAL_TABLE, | ||
parameters=_construct_parameters(table.metadata_location), | ||
parameters=_construct_parameters(metadata_location=table.metadata_location, metadata_properties=table.properties), | ||
) | ||
|
||
def _create_hive_table(self, open_client: Client, hive_table: HiveTable) -> None: | ||
|
@@ -541,6 +548,7 @@ def commit_table( | |
hive_table.parameters = _construct_parameters( | ||
metadata_location=updated_staged_table.metadata_location, | ||
previous_metadata_location=current_table.metadata_location, | ||
metadata_properties=updated_staged_table.properties, | ||
kevinjqliu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
open_client.alter_table_with_environment_context( | ||
dbname=database_name, | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -48,7 +48,7 @@ | |||||||||||||||||||||||
pyarrow_to_schema, | ||||||||||||||||||||||||
) | ||||||||||||||||||||||||
from pyiceberg.schema import Schema | ||||||||||||||||||||||||
from pyiceberg.table import Table | ||||||||||||||||||||||||
from pyiceberg.table import Table, update_table_metadata | ||||||||||||||||||||||||
from pyiceberg.types import ( | ||||||||||||||||||||||||
BinaryType, | ||||||||||||||||||||||||
BooleanType, | ||||||||||||||||||||||||
|
@@ -59,6 +59,7 @@ | |||||||||||||||||||||||
TimestampType, | ||||||||||||||||||||||||
) | ||||||||||||||||||||||||
from pyiceberg.utils.concurrent import ExecutorFactory | ||||||||||||||||||||||||
from pyiceberg.table.update import SetPropertiesUpdate, RemovePropertiesUpdate | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
DEFAULT_PROPERTIES = {"write.parquet.compression-codec": "zstd"} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
|
@@ -111,6 +112,23 @@ def test_table_properties(catalog: Catalog) -> None: | |||||||||||||||||||||||
table.transaction().set_properties(property_name=None).commit_transaction() | ||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was wondering why the rest of these tests pass since we're not setting the properties in the HMS. Turns out the table properties are saved in the table metadata using its This is not what the table metadata's properties field should be used for,
This is a side affect of iceberg-python/pyiceberg/table/__init__.py Lines 1131 to 1134 in a67c559
iceberg-python/pyiceberg/catalog/hive.py Lines 338 to 344 in a67c559
We should fix this behavior and read/write properties using the HMS's table parameters. We can fix this separately from the current issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I opened #2064 to track this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR will save the properties in both the HMS's table parameter and table metadata's properties field There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your review and suggestion. Yes, this is also confuse me when I develop this PR. Then I found: So to make sure it did write to the HMS properties, I need to new a
instead of just test like:
I think I can also help with #2064. |
||||||||||||||||||||||||
assert "None type is not a supported value in properties: property_name" in str(exc_info.value) | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
if isinstance(catalog, HiveCatalog): | ||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a great test! could you move this into its own test function? with just hive catalog
|
||||||||||||||||||||||||
table.transaction().set_properties({"abc": "def", "p1": "123"}).commit_transaction() | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
hive_client: _HiveClient = _HiveClient(catalog.properties["uri"]) | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
with hive_client as open_client: | ||||||||||||||||||||||||
hive_table = open_client.get_table(*TABLE_NAME) | ||||||||||||||||||||||||
assert hive_table.parameters.get("abc") == "def" | ||||||||||||||||||||||||
assert hive_table.parameters.get("p1") == "123" | ||||||||||||||||||||||||
assert hive_table.parameters.get("not_exist_parameter") is None | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
table.transaction().remove_properties("abc").commit_transaction() | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
with hive_client as open_client: | ||||||||||||||||||||||||
hive_table = open_client.get_table(*TABLE_NAME) | ||||||||||||||||||||||||
assert hive_table.parameters.get("abc") is None | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
|
||||||||||||||||||||||||
@pytest.mark.integration | ||||||||||||||||||||||||
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) | ||||||||||||||||||||||||
|
Uh oh!
There was an error while loading. Please reload this page.