From f50e14844bb38f9c2a59234f896bfdbc0a9afbf5 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 8 Jul 2024 11:48:20 +0200 Subject: [PATCH] Explicitly convert `event_name` to strings in `Impact.from_hdf5` (#894) * Try converting non-string impact event names from H5 * Only write H5 file if Impact.event_name is strings * Fix bug where only the first event name was checked * Update CHANGELOG.md --------- Co-authored-by: emanuel-schmid --- CHANGELOG.md | 2 ++ climada/engine/impact.py | 26 ++++++++++++++++++-------- climada/engine/test/test_impact.py | 19 +++++++++++++++---- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 203f7230d5..c76e7ba2af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ CLIMADA tutorials. [#872](https://github.com/CLIMADA-project/climada_python/pull - Improved error messages produced by `ImpactCalc.impact()` in case impact function in the exposures is not found in impf_set [#863](https://github.com/CLIMADA-project/climada_python/pull/863) - Update the Holland et al. 2010 TC windfield model and introduce `model_kwargs` parameter to adjust model parameters [#846](https://github.com/CLIMADA-project/climada_python/pull/846) - Changed module structure: `climada.hazard.Hazard` has been split into the modules `base`, `io` and `plot` [#871](https://github.com/CLIMADA-project/climada_python/pull/871) +- `Impact.from_hdf5` now calls `str` on `event_name` data that is not strings, and issue a warning then [#894](https://github.com/CLIMADA-project/climada_python/pull/894) +- `Impact.write_hdf5` now throws an error if `event_name` is does not contain strings exclusively [#894](https://github.com/CLIMADA-project/climada_python/pull/894) ### Fixed diff --git a/climada/engine/impact.py b/climada/engine/impact.py index e9afda5903..97bbd6d90f 100644 --- a/climada/engine/impact.py +++ b/climada/engine/impact.py @@ -937,11 +937,6 @@ def write_hdf5(self, file_path: Union[str, Path], dense_imp_mat: bool=False): The impact matrix can be stored in a sparse or dense format. - Notes - ----- - This writer does not support attributes with variable types. Please make sure - that ``event_name`` is a list of equally-typed values, e.g., all ``str``. - Parameters ---------- file_path : str or Path @@ -950,6 +945,11 @@ def write_hdf5(self, file_path: Union[str, Path], dense_imp_mat: bool=False): If ``True``, write the impact matrix as dense matrix that can be more easily interpreted by common H5 file readers but takes up (vastly) more space. Defaults to ``False``. + + Raises + ------ + TypeError + If :py:attr:`event_name` does not contain strings exclusively. """ # Define writers for all types (will be filled later) type_writers = dict() @@ -983,7 +983,7 @@ def write(group: h5py.Group, name: str, value: Any): def _str_type_helper(values: Collection): """Return string datatype if we assume 'values' contains strings""" - if isinstance(next(iter(values)), str): + if all((isinstance(val, str) for val in values)): return h5py.string_dtype() return None @@ -1037,6 +1037,8 @@ def write_csr(group, name, value): # Now write all attributes # NOTE: Remove leading underscore to write '_tot_value' as regular attribute for name, value in self.__dict__.items(): + if name == "event_name" and _str_type_helper(value) is None: + raise TypeError("'event_name' must be a list of strings") write(file, name.lstrip("_"), value) def write_sparse_csr(self, file_name): @@ -1240,10 +1242,18 @@ def from_hdf5(cls, file_path: Union[str, Path]): ).intersection(file.keys()) kwargs.update({attr: file[attr][:] for attr in array_attrs}) - # Special handling for 'event_name' because it's a list of strings + # Special handling for 'event_name' because it should be a list of strings if "event_name" in file: # pylint: disable=no-member - kwargs["event_name"] = list(file["event_name"].asstr()[:]) + try: + event_name = file["event_name"].asstr()[:] + except TypeError: + LOGGER.warning( + "'event_name' is not stored as strings. Trying to decode " + "values with 'str()' instead." + ) + event_name = map(str, file["event_name"][:]) + kwargs["event_name"] = list(event_name) # Create the impact object return cls(**kwargs) diff --git a/climada/engine/test/test_impact.py b/climada/engine/test/test_impact.py index a0b458ca55..0e70e993b0 100644 --- a/climada/engine/test/test_impact.py +++ b/climada/engine/test/test_impact.py @@ -779,7 +779,8 @@ def test_select_event_identity_pass(self): ent.exposures.assign_centroids(hazard) # Compute the impact over the whole exposures - imp = ImpactCalc(ent.exposures, ent.impact_funcs, hazard).impact(save_mat=True, assign_centroids=False) + imp = ImpactCalc(ent.exposures, ent.impact_funcs, hazard).impact( + save_mat=True, assign_centroids=False) sel_imp = imp.select(event_ids=imp.event_id, event_names=imp.event_name, @@ -1019,10 +1020,11 @@ def test_write_hdf5_without_imp_mat(self): def test_write_hdf5_type_fail(self): """Test that writing attributes with varying types results in an error""" - self.impact.event_name = [1, "a", 1.0, "b", "c", "d"] - with self.assertRaises(TypeError) as cm: + self.impact.event_name = ["a", 1, 1.0, "b", "c", "d"] + with self.assertRaisesRegex( + TypeError, "'event_name' must be a list of strings" + ): self.impact.write_hdf5(self.filepath) - self.assertIn("No conversion path for dtype", str(cm.exception)) def test_cycle_hdf5(self): """Test writing and reading the same object""" @@ -1120,6 +1122,15 @@ def test_read_hdf5_full(self): impact = Impact.from_hdf5(self.filepath) npt.assert_array_equal(impact.imp_mat.toarray(), [[0, 1, 2], [3, 0, 0]]) + # Check with non-string event_name + event_name = [1.2, 2] + with h5py.File(self.filepath, "r+") as file: + del file["event_name"] + file.create_dataset("event_name", data=event_name) + with self.assertLogs("climada.engine.impact", "WARNING") as cm: + impact = Impact.from_hdf5(self.filepath) + self.assertIn("'event_name' is not stored as strings", cm.output[0]) + self.assertListEqual(impact.event_name, ["1.2", "2.0"]) # Execute Tests if __name__ == "__main__":