Skip to content

Commit 4f3b716

Browse files
Issue #1132 performance hfb (#1157)
Fixes #1132 # Description This PR adds the following: - Increase performance for writing the HFB files, by using new xugrid create_snap_to_grid_dataframe. - Fixes error when multiple barriers in a single dataset would be snapped to same edge. These lines are now aggregated. - Add some basic infrastructure for user acceptance test for writing LHM, the path to the LHM projectfile should be set in a .env as LHM_PRJ. This can later be extended to TeamCity. This reduces the time spent writing the HFB packages from 34 minutes to 12.5 minutes on my local machine. I've profiled the causes of the remaining bottlenecks to writing. I'll post a follow-up issue for that. # Checklist - [x] Links to correct issue - [x] Update changelog, if changes affect users - [x] PR title starts with ``Issue #nr``, e.g. ``Issue #737`` - [x] Unit tests were added - [ ] **If feature added**: Added/extended example
1 parent a230bc9 commit 4f3b716

File tree

8 files changed

+356
-84
lines changed

8 files changed

+356
-84
lines changed

docs/api/changelog.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ Fixed
1515
- Multiple ``HorizontalFlowBarrier`` objects attached to
1616
:class:`imod.mf6.GroundwaterFlowModel` are merged into a single horizontal
1717
flow barrier for MODFLOW 6
18-
18+
- Bug where error would be thrown when barriers in a ``HorizontalFlowBarrier``
19+
would be snapped to the same cell edge. These are now summed.
1920

2021
Changed
2122
~~~~~~~

imod/mf6/hfb.py

Lines changed: 69 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import cftime
1010
import numpy as np
11+
import numpy.typing as npt
1112
import pandas as pd
1213
import xarray as xr
1314
import xugrid as xu
@@ -27,6 +28,7 @@
2728
)
2829
from imod.schemata import EmptyIndexesSchema, MaxNUniqueValuesSchema
2930
from imod.typing import GeoDataFrameType, GridDataArray, LineStringType
31+
from imod.typing.grid import as_ugrid_dataarray
3032
from imod.util.imports import MissingOptionalModule
3133

3234
if TYPE_CHECKING:
@@ -374,6 +376,53 @@ def _prepare_barrier_dataset_for_mf6_adapter(dataset: xr.Dataset) -> xr.Dataset:
374376
return dataset
375377

376378

379+
def _snap_to_grid_and_aggregate(
380+
barrier_dataframe: GeoDataFrameType, grid2d: xu.Ugrid2d, vardict_agg: dict[str, str]
381+
) -> tuple[xu.UgridDataset, npt.NDArray]:
382+
"""
383+
Snap barrier dataframe to grid and aggregate multiple lines with a list of
384+
methods per variable.
385+
386+
Parameters
387+
----------
388+
barrier_dataframe: geopandas.GeoDataFrame
389+
GeoDataFrame with barriers, should have variable "line_index".
390+
grid2d: xugrid.Ugrid2d
391+
Grid to snap lines to
392+
vardict_agg: dict
393+
Mapping of variable name to aggregation method
394+
395+
Returns
396+
-------
397+
snapping_dataset: xugrid.UgridDataset
398+
Dataset with all variables snapped and aggregated to cell edges
399+
edge_index: numpy.array
400+
1D array with indices of cell edges that lines were snapped to
401+
"""
402+
snapping_df = xu.create_snap_to_grid_dataframe(
403+
barrier_dataframe, grid2d, max_snap_distance=0.5
404+
)
405+
# Map other variables to snapping_df with line indices
406+
line_index = snapping_df["line_index"]
407+
vars_to_snap = list(vardict_agg.keys())
408+
if "line_index" in vars_to_snap:
409+
vars_to_snap.remove("line_index") # snapping_df already has line_index
410+
for varname in vars_to_snap:
411+
snapping_df[varname] = barrier_dataframe[varname].iloc[line_index].to_numpy()
412+
# Aggregate to grid edges
413+
gb_edge = snapping_df.groupby("edge_index")
414+
# Initialize dataset and dataarray with the right shape and dims
415+
snapped_dataset = xu.UgridDataset(grids=[grid2d])
416+
new = xr.DataArray(np.full(grid2d.n_edge, np.nan), dims=[grid2d.edge_dimension])
417+
edge_index = np.array(list(gb_edge.indices.keys()))
418+
# Aggregate with different methods per variable
419+
for varname, method in vardict_agg.items():
420+
snapped_dataset[varname] = new.copy()
421+
snapped_dataset[varname].data[edge_index] = gb_edge[varname].aggregate(method)
422+
423+
return snapped_dataset, edge_index
424+
425+
377426
class BarrierType(Enum):
378427
HydraulicCharacteristic = 0
379428
Multiplier = 1
@@ -464,10 +513,9 @@ def _to_connected_cells_dataset(
464513
"""
465514
top, bottom = broadcast_to_full_domain(idomain, top, bottom)
466515
k = idomain * k
516+
# Enforce unstructured
467517
unstructured_grid, top, bottom, k = (
468-
self.__to_unstructured(idomain, top, bottom, k)
469-
if isinstance(idomain, xr.DataArray)
470-
else [idomain, top, bottom, k]
518+
as_ugrid_dataarray(grid) for grid in [idomain, top, bottom, k]
471519
)
472520
snapped_dataset, edge_index = self._snap_to_grid(idomain)
473521
edge_index = self.__remove_invalid_edges(unstructured_grid, edge_index)
@@ -736,61 +784,36 @@ def mask(self, _) -> Package:
736784
"""
737785
return deepcopy(self)
738786

739-
@staticmethod
740-
def __to_unstructured(
741-
idomain: xr.DataArray, top: xr.DataArray, bottom: xr.DataArray, k: xr.DataArray
742-
) -> Tuple[
743-
xu.UgridDataArray, xu.UgridDataArray, xu.UgridDataArray, xu.UgridDataArray
744-
]:
745-
unstruct = xu.UgridDataArray.from_structured(idomain)
746-
top = xu.UgridDataArray.from_structured(top)
747-
bottom = xu.UgridDataArray.from_structured(bottom)
748-
k = xu.UgridDataArray.from_structured(k)
749-
750-
return unstruct, top, bottom, k
751-
752787
def _snap_to_grid(
753788
self, idomain: GridDataArray
754789
) -> Tuple[xu.UgridDataset, np.ndarray]:
755-
if "layer" in self._get_vertical_variables():
756-
variable_names = [self._get_variable_name(), "geometry", "layer"]
790+
variable_name = self._get_variable_name()
791+
has_layer = "layer" in self._get_vertical_variables()
792+
# Create geodataframe with barriers
793+
if has_layer:
794+
varnames_for_df = [variable_name, "geometry", "layer"]
757795
else:
758-
variable_names = [self._get_variable_name(), "geometry"]
796+
varnames_for_df = [variable_name, "geometry"]
759797
barrier_dataframe = gpd.GeoDataFrame(
760-
self.dataset[variable_names].to_dataframe()
798+
self.dataset[varnames_for_df].to_dataframe()
761799
)
762-
763-
if "layer" not in self._get_vertical_variables():
800+
# Convert vertical polygon to linestring
801+
if not has_layer:
764802
lower, _ = _extract_hfb_bounds_from_zpolygons(barrier_dataframe)
765803
linestring = _create_zlinestring_from_bound_df(lower)
766804
barrier_dataframe["geometry"] = linestring["geometry"]
767-
805+
# Clip barriers outside domain
768806
barrier_dataframe = clip_line_gdf_by_grid(
769807
barrier_dataframe, idomain.sel(layer=1)
770808
)
771-
772-
# Work around issue where xu.snap_to_grid cannot handle snapping a
773-
# dataset with multiple lines appropriately. This can be later replaced
774-
# to a single call to xu.snap_to_grid if this bug is fixed:
775-
# <link_to_github_issue_here>
776-
snapped_dataset_rows: List[xr.Dataset] = []
777-
for index in barrier_dataframe.index:
778-
# Index with list to return pd.DataFrame instead of pd.Series for
779-
# xu.snap_to_grid.
780-
row_df = barrier_dataframe.loc[[index]]
781-
snapped_dataset_row, _ = typing.cast(
782-
xu.UgridDataset,
783-
xu.snap_to_grid(row_df, grid=idomain, max_snap_distance=0.5),
784-
)
785-
snapped_dataset_row["line_index"] += index[0] # Set to original line index.
786-
snapped_dataset_rows.append(snapped_dataset_row)
787-
snapped_dataset = xu.merge(snapped_dataset_rows)
788-
789-
edge_index = np.argwhere(
790-
snapped_dataset[self._get_variable_name()].notnull().values
791-
).ravel()
792-
793-
return snapped_dataset, edge_index
809+
# Prepare variable names and methods for aggregation
810+
vardict_agg = {"line_index": "first", variable_name: "sum"}
811+
if has_layer:
812+
vardict_agg["layer"] = "first"
813+
# Create grid from structured
814+
grid2d = as_ugrid_dataarray(idomain.sel(layer=1)).grid
815+
816+
return _snap_to_grid_and_aggregate(barrier_dataframe, grid2d, vardict_agg)
794817

795818
@staticmethod
796819
def __remove_invalid_edges(
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""
2+
LHM tests, these are pytest-marked with 'user_acceptance'.
3+
4+
These require the LHM model to be available on the local drive. The tests looks
5+
for the path to the projectfile needs to be included in a .env file, with the
6+
environmental variable "LHM_PRJ" with the path to the projectfile.
7+
"""
8+
9+
import os
10+
import sys
11+
12+
import pandas as pd
13+
import pytest
14+
15+
import imod
16+
from imod.formats.prj.prj import open_projectfile_data
17+
from imod.logging.config import LoggerType
18+
from imod.logging.loglevel import LogLevel
19+
from imod.mf6.oc import OutputControl
20+
from imod.mf6.regrid.regrid_schemes import (
21+
DiscretizationRegridMethod,
22+
NodePropertyFlowRegridMethod,
23+
StorageCoefficientRegridMethod,
24+
)
25+
from imod.mf6.simulation import Modflow6Simulation
26+
from imod.mf6.utilities.mf6hfb import merge_hfb_packages
27+
from imod.mf6.write_context import WriteContext
28+
from imod.prepare.topsystem.default_allocation_methods import (
29+
SimulationAllocationOptions,
30+
SimulationDistributingOptions,
31+
)
32+
33+
34+
# In function, not a fixture, to allow logging of the import.
35+
def LHM_imod5_data():
36+
lhm_prjfile = os.environ["LHM_PRJ"]
37+
data = open_projectfile_data(lhm_prjfile)
38+
39+
imod5_data = data[0]
40+
period_data = data[1]
41+
default_simulation_allocation_options = SimulationAllocationOptions
42+
default_simulation_distributing_options = SimulationDistributingOptions
43+
44+
regridding_option = {}
45+
regridding_option["npf"] = NodePropertyFlowRegridMethod()
46+
regridding_option["dis"] = DiscretizationRegridMethod()
47+
regridding_option["sto"] = StorageCoefficientRegridMethod()
48+
times = pd.date_range(start="1/1/2018", end="12/1/2018", freq="ME")
49+
50+
simulation = Modflow6Simulation.from_imod5_data(
51+
imod5_data,
52+
period_data,
53+
default_simulation_allocation_options,
54+
default_simulation_distributing_options,
55+
times,
56+
regridding_option,
57+
)
58+
simulation["imported_model"]["oc"] = OutputControl(
59+
save_head="last", save_budget="last"
60+
)
61+
return simulation
62+
63+
64+
@pytest.mark.user_acceptance
65+
def test_mf6_LHM_write_HFB(tmp_path):
66+
logfile_path = tmp_path / "logfile.txt"
67+
with open(logfile_path, "w") as sys.stdout:
68+
imod.logging.configure(
69+
LoggerType.PYTHON,
70+
log_level=LogLevel.DEBUG,
71+
add_default_file_handler=False,
72+
add_default_stream_handler=True,
73+
)
74+
simulation = LHM_imod5_data()
75+
model = simulation["imported_model"]
76+
77+
mf6_hfb_ls = []
78+
for key, pkg in model.items():
79+
if issubclass(type(pkg), imod.mf6.HorizontalFlowBarrierBase):
80+
mf6_hfb_ls.append(pkg)
81+
pkg.dataset.load()
82+
83+
top, bottom, idomain = model._Modflow6Model__get_domain_geometry()
84+
k = model._Modflow6Model__get_k()
85+
86+
mf6_hfb = merge_hfb_packages(mf6_hfb_ls, idomain, top, bottom, k)
87+
88+
times = pd.date_range(start="1/1/2018", end="12/1/2018", freq="ME")
89+
90+
out_dir = tmp_path / "LHM"
91+
out_dir.mkdir(parents=True, exist_ok=True)
92+
write_context = WriteContext(out_dir, use_binary=True, use_absolute_paths=False)
93+
94+
mf6_hfb.write("hfb", times, write_context)

imod/tests/test_mf6/test_mf6_hfb.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
_extract_mean_hfb_bounds_from_dataframe,
2323
_make_linestring_from_polygon,
2424
_prepare_barrier_dataset_for_mf6_adapter,
25+
_snap_to_grid_and_aggregate,
2526
to_connected_cells_dataset,
2627
)
2728
from imod.mf6.ims import SolutionPresetSimple
@@ -697,6 +698,77 @@ def test_hfb_from_imod5(imod5_dataset, tmp_path):
697698
assert list(np.unique(hfb_package["layer"].values)) == [7]
698699

699700

701+
@pytest.mark.usefixtures("structured_flow_model")
702+
def test_snap_to_grid_and_aggregate(structured_flow_model):
703+
idomain = structured_flow_model["dis"]["idomain"]
704+
grid2d = xu.Ugrid2d.from_structured(idomain)
705+
706+
barrier_y = [11.0, 5.0, -1.0]
707+
barrier_x = [5.0, 5.0, 5.0]
708+
line = linestrings(barrier_x, barrier_y)
709+
layer = [1, 1, 1]
710+
711+
geometry_triple = gpd.GeoDataFrame(
712+
geometry=[line, line, line],
713+
data={
714+
"resistance": [400.0, 400.0, 400.0],
715+
"layer": layer,
716+
"line_index": [0, 1, 2],
717+
},
718+
)
719+
geometry_triple = geometry_triple.set_index("line_index")
720+
721+
vardict_agg = {"resistance": "sum", "layer": "first"}
722+
723+
snapped_dataset, edge_index = _snap_to_grid_and_aggregate(
724+
geometry_triple, grid2d, vardict_agg
725+
)
726+
727+
argwhere_summed_expected = np.array([7, 20, 33, 46, 59, 72], dtype=np.int64)
728+
argwhere_summed_actual = np.nonzero((snapped_dataset["resistance"] == 1200).values)[
729+
0
730+
]
731+
732+
np.testing.assert_array_equal(argwhere_summed_actual, argwhere_summed_expected)
733+
np.testing.assert_array_equal(edge_index, argwhere_summed_expected)
734+
735+
736+
@pytest.mark.usefixtures("structured_flow_model")
737+
def test_combine_linestrings(structured_flow_model):
738+
dis = structured_flow_model["dis"]
739+
top, bottom, idomain = (
740+
dis["top"],
741+
dis["bottom"],
742+
dis["idomain"],
743+
)
744+
k = xr.ones_like(idomain)
745+
746+
barrier_y = [11.0, 5.0, -1.0]
747+
barrier_x = [5.0, 5.0, 5.0]
748+
line = linestrings(barrier_x, barrier_y)
749+
750+
geometry_single = gpd.GeoDataFrame(
751+
geometry=[line],
752+
data={
753+
"resistance": [1200.0],
754+
"layer": [1],
755+
},
756+
)
757+
geometry_triple = gpd.GeoDataFrame(
758+
geometry=[line, line, line],
759+
data={
760+
"resistance": [400.0, 400.0, 400.0],
761+
"layer": [1, 1, 1],
762+
},
763+
)
764+
hfb_single = SingleLayerHorizontalFlowBarrierResistance(geometry_single)
765+
hfb_triple = SingleLayerHorizontalFlowBarrierResistance(geometry_triple)
766+
mf6_hfb_single = hfb_single.to_mf6_pkg(idomain, top, bottom, k)
767+
mf6_hfb_triple = hfb_triple.to_mf6_pkg(idomain, top, bottom, k)
768+
769+
xr.testing.assert_equal(mf6_hfb_single.dataset, mf6_hfb_triple.dataset)
770+
771+
700772
@pytest.mark.usefixtures("structured_flow_model")
701773
def test_run_multiple_hfbs(tmp_path, structured_flow_model):
702774
# Single layered model

0 commit comments

Comments
 (0)