Skip to content

Commit

Permalink
Internal change.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 561693602
  • Loading branch information
marcenacp authored and The TensorFlow Datasets Authors committed Aug 31, 2023
1 parent 41ae3cf commit b65a808
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,17 @@ def _convert_value(hf_value: Any, feature: feature_lib.FeatureConnector) -> Any:
}
elif isinstance(feature, feature_lib.FeaturesDict):
if isinstance(hf_value, dict):
return hf_value
return {k: _convert_value(v, feature[k]) for k, v in hf_value.items()}
raise ValueError(f"The feature is {feature}, but the value is: {hf_value}")
elif isinstance(feature, feature_lib.Sequence):
if isinstance(hf_value, (dict, list)):
return hf_value
if isinstance(hf_value, dict):
# Should be a dict of lists:
return {
k: [_convert_value(el, feature.feature[k]) for el in v]
for k, v in hf_value.items()
}
if isinstance(hf_value, list):
return [_convert_value(v, feature.feature) for v in hf_value]
else:
return [hf_value]
elif isinstance(feature, feature_lib.Audio):
Expand All @@ -214,6 +220,8 @@ def _convert_value(hf_value: Any, feature: feature_lib.FeatureConnector) -> Any:
buffer = io.BytesIO()
hf_value.save(fp=buffer, format=_IMAGE_ENCODING_FORMAT)
return buffer.getvalue()
elif isinstance(feature, feature_lib.Tensor):
return hf_value
raise ValueError(
f"Type {type(hf_value)} of value {hf_value} "
f"for feature {type(feature)} is not supported."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,21 @@ def test_convert_value_sequence():
)


def test_convert_value_empty_sequence():
assert huggingface_dataset_builder._convert_value(
[None, "string"], feature_lib.Sequence(feature=np.str_)
) == [b"", "string"]


def test_convert_value_sequence_of_dict():
sequence_feature = feature_lib.Sequence(
{"someint": feature_lib.Scalar(dtype=np.str_)}
)
assert huggingface_dataset_builder._convert_value(
{"someint": [None, "string", None]}, sequence_feature
) == {"someint": [b"", "string", b""]}


def test_convert_value_image():
image_feature = feature_lib.Image()
image = lazy_imports_lib.lazy_imports.PIL_Image.new(mode="RGB", size=(4, 4))
Expand Down

0 comments on commit b65a808

Please sign in to comment.