Skip to content

Commit

Permalink
Explicitly convert event_name to strings in Impact.from_hdf5 (#894)
Browse files Browse the repository at this point in the history
* Try converting non-string impact event names from H5
* Only write H5 file if Impact.event_name is strings
* Fix bug where only the first event name was checked
* Update CHANGELOG.md

---------

Co-authored-by: emanuel-schmid <schmide@ethz.ch>
  • Loading branch information
peanutfun and emanuel-schmid authored Jul 8, 2024
1 parent 24f2af2 commit f50e148
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ CLIMADA tutorials. [#872](https://github.com/CLIMADA-project/climada_python/pull
- Improved error messages produced by `ImpactCalc.impact()` in case impact function in the exposures is not found in impf_set [#863](https://github.com/CLIMADA-project/climada_python/pull/863)
- Update the Holland et al. 2010 TC windfield model and introduce `model_kwargs` parameter to adjust model parameters [#846](https://github.com/CLIMADA-project/climada_python/pull/846)
- Changed module structure: `climada.hazard.Hazard` has been split into the modules `base`, `io` and `plot` [#871](https://github.com/CLIMADA-project/climada_python/pull/871)
- `Impact.from_hdf5` now calls `str` on `event_name` data that is not strings, and issue a warning then [#894](https://github.com/CLIMADA-project/climada_python/pull/894)
- `Impact.write_hdf5` now throws an error if `event_name` is does not contain strings exclusively [#894](https://github.com/CLIMADA-project/climada_python/pull/894)

### Fixed

Expand Down
26 changes: 18 additions & 8 deletions climada/engine/impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,11 +937,6 @@ def write_hdf5(self, file_path: Union[str, Path], dense_imp_mat: bool=False):
The impact matrix can be stored in a sparse or dense format.
Notes
-----
This writer does not support attributes with variable types. Please make sure
that ``event_name`` is a list of equally-typed values, e.g., all ``str``.
Parameters
----------
file_path : str or Path
Expand All @@ -950,6 +945,11 @@ def write_hdf5(self, file_path: Union[str, Path], dense_imp_mat: bool=False):
If ``True``, write the impact matrix as dense matrix that can be more easily
interpreted by common H5 file readers but takes up (vastly) more space.
Defaults to ``False``.
Raises
------
TypeError
If :py:attr:`event_name` does not contain strings exclusively.
"""
# Define writers for all types (will be filled later)
type_writers = dict()
Expand Down Expand Up @@ -983,7 +983,7 @@ def write(group: h5py.Group, name: str, value: Any):

def _str_type_helper(values: Collection):
"""Return string datatype if we assume 'values' contains strings"""
if isinstance(next(iter(values)), str):
if all((isinstance(val, str) for val in values)):
return h5py.string_dtype()
return None

Expand Down Expand Up @@ -1037,6 +1037,8 @@ def write_csr(group, name, value):
# Now write all attributes
# NOTE: Remove leading underscore to write '_tot_value' as regular attribute
for name, value in self.__dict__.items():
if name == "event_name" and _str_type_helper(value) is None:
raise TypeError("'event_name' must be a list of strings")
write(file, name.lstrip("_"), value)

def write_sparse_csr(self, file_name):
Expand Down Expand Up @@ -1240,10 +1242,18 @@ def from_hdf5(cls, file_path: Union[str, Path]):
).intersection(file.keys())
kwargs.update({attr: file[attr][:] for attr in array_attrs})

# Special handling for 'event_name' because it's a list of strings
# Special handling for 'event_name' because it should be a list of strings
if "event_name" in file:
# pylint: disable=no-member
kwargs["event_name"] = list(file["event_name"].asstr()[:])
try:
event_name = file["event_name"].asstr()[:]
except TypeError:
LOGGER.warning(
"'event_name' is not stored as strings. Trying to decode "
"values with 'str()' instead."
)
event_name = map(str, file["event_name"][:])
kwargs["event_name"] = list(event_name)

# Create the impact object
return cls(**kwargs)
Expand Down
19 changes: 15 additions & 4 deletions climada/engine/test/test_impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,8 @@ def test_select_event_identity_pass(self):
ent.exposures.assign_centroids(hazard)

# Compute the impact over the whole exposures
imp = ImpactCalc(ent.exposures, ent.impact_funcs, hazard).impact(save_mat=True, assign_centroids=False)
imp = ImpactCalc(ent.exposures, ent.impact_funcs, hazard).impact(
save_mat=True, assign_centroids=False)

sel_imp = imp.select(event_ids=imp.event_id,
event_names=imp.event_name,
Expand Down Expand Up @@ -1019,10 +1020,11 @@ def test_write_hdf5_without_imp_mat(self):

def test_write_hdf5_type_fail(self):
"""Test that writing attributes with varying types results in an error"""
self.impact.event_name = [1, "a", 1.0, "b", "c", "d"]
with self.assertRaises(TypeError) as cm:
self.impact.event_name = ["a", 1, 1.0, "b", "c", "d"]
with self.assertRaisesRegex(
TypeError, "'event_name' must be a list of strings"
):
self.impact.write_hdf5(self.filepath)
self.assertIn("No conversion path for dtype", str(cm.exception))

def test_cycle_hdf5(self):
"""Test writing and reading the same object"""
Expand Down Expand Up @@ -1120,6 +1122,15 @@ def test_read_hdf5_full(self):
impact = Impact.from_hdf5(self.filepath)
npt.assert_array_equal(impact.imp_mat.toarray(), [[0, 1, 2], [3, 0, 0]])

# Check with non-string event_name
event_name = [1.2, 2]
with h5py.File(self.filepath, "r+") as file:
del file["event_name"]
file.create_dataset("event_name", data=event_name)
with self.assertLogs("climada.engine.impact", "WARNING") as cm:
impact = Impact.from_hdf5(self.filepath)
self.assertIn("'event_name' is not stored as strings", cm.output[0])
self.assertListEqual(impact.event_name, ["1.2", "2.0"])

# Execute Tests
if __name__ == "__main__":
Expand Down

0 comments on commit f50e148

Please sign in to comment.