Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion task-sdk/src/airflow/sdk/definitions/asset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import attrs

from airflow.sdk.api.datamodels._generated import AssetProfile
from airflow.sdk.definitions.asset.normalizer import normalize_asset_metadata
from airflow.serialization.dag_dependency import DagDependency

if TYPE_CHECKING:
Expand Down Expand Up @@ -225,7 +226,7 @@ def _set_extra_default(extra: dict | None) -> dict:
"""
if extra is None:
return {}
return extra
return normalize_asset_metadata(extra)


class BaseAsset:
Expand Down
10 changes: 9 additions & 1 deletion task-sdk/src/airflow/sdk/definitions/asset/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,24 @@

import attrs

from airflow.sdk.definitions.asset.normalizer import normalize_asset_metadata

if TYPE_CHECKING:
from airflow.sdk.definitions.asset import Asset, AssetAlias

__all__ = ["Metadata"]


def _normalize_extra(extra: dict[str, Any] | None) -> dict[str, Any]:
if extra is None:
return {}
return normalize_asset_metadata(extra)


@attrs.define(init=True)
class Metadata:
"""Metadata to attach to an AssetEvent."""

asset: Asset
extra: dict[str, Any] = attrs.field(factory=dict)
extra: dict[str, Any] = attrs.field(factory=dict, converter=_normalize_extra)
alias: AssetAlias | None = None
54 changes: 54 additions & 0 deletions task-sdk/src/airflow/sdk/definitions/asset/normalizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from typing import Any


def normalize_asset_metadata(metadata_dict: dict[str, Any]) -> dict[str, Any]:
"""
Normalize Asset metadata to ensure JSON serializability.

This function should be called before creating AssetMetadata objects.

Location to integrate: airflow/assets.py or wherever AssetMetadata is created
"""
try:
import numpy as np
except ImportError:
np = None

def _normalize_value(value: Any) -> Any:
if isinstance(value, dict):
return {k: _normalize_value(v) for k, v in value.items()}
if isinstance(value, (list, tuple)):
return [_normalize_value(item) for item in value]
if np:
if isinstance(value, np.integer):
return int(value)
if isinstance(value, np.floating):
return float(value)
if isinstance(value, np.bool_):
return bool(value)
if isinstance(value, np.ndarray):
return value.tolist()
if isinstance(value, np.complexfloating):
return {"real": float(value.real), "imag": float(value.imag)}
return value

return _normalize_value(metadata_dict)
57 changes: 57 additions & 0 deletions task-sdk/tests/task_sdk/definitions/test_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from collections.abc import Callable
from unittest import mock

import numpy as np
import pytest

from airflow.providers.standard.operators.empty import EmptyOperator
Expand All @@ -38,6 +39,7 @@
_get_normalized_scheme,
_sanitize_uri,
)
from airflow.sdk.definitions.asset.metadata import Metadata
from airflow.sdk.definitions.dag import DAG
from airflow.sdk.io import ObjectStoragePath
from airflow.serialization.serialized_objects import SerializedDAG
Expand Down Expand Up @@ -480,3 +482,58 @@ def test_only_posarg(self, subcls, group, arg):
assert obj.name == arg
assert obj.uri == arg
assert obj.group == group


class TestAssetMetadataNormalization:
@pytest.mark.parametrize(
"metadata, expected",
[
({"np_int": np.int64(5)}, {"np_int": 5}),
({"np_float": np.float64(3.14)}, {"np_float": 3.14}),
({"np_bool": np.bool_(True)}, {"np_bool": True}),
({"np_array": np.array([1, 2, 3])}, {"np_array": [1, 2, 3]}),
(
{"np_complex": np.complex128(1 + 2j)},
{"np_complex": {"real": 1.0, "imag": 2.0}},
),
(
{
"nested": {
"np_int": np.int64(5),
"list": [np.float64(3.14), np.array([1, 2])],
}
},
{"nested": {"np_int": 5, "list": [3.14, [1, 2]]}},
),
],
)
def test_asset_with_numpy_metadata(self, metadata, expected):
asset = Asset("test_asset", extra=metadata)
assert asset.extra == expected

@pytest.mark.parametrize(
"metadata, expected",
[
({"np_int": np.int64(5)}, {"np_int": 5}),
({"np_float": np.float64(3.14)}, {"np_float": 3.14}),
({"np_bool": np.bool_(True)}, {"np_bool": True}),
({"np_array": np.array([1, 2, 3])}, {"np_array": [1, 2, 3]}),
(
{"np_complex": np.complex128(1 + 2j)},
{"np_complex": {"real": 1.0, "imag": 2.0}},
),
(
{
"nested": {
"np_int": np.int64(5),
"list": [np.float64(3.14), np.array([1, 2])],
}
},
{"nested": {"np_int": 5, "list": [3.14, [1, 2]]}},
),
],
)
def test_metadata_with_numpy_types(self, metadata, expected):
asset = Asset("test_asset")
meta = Metadata(asset=asset, extra=metadata)
assert meta.extra == expected