Skip to content

Commit df307c9

Browse files
Issue #1548 fix performance ipf many wells (#1552)
Fixes #1548 # Description - Fix issue with poor performance ``GridAgnosticWell.from_imod5_data`` when there are >10K wells in an IPF by moving an index explicitly beforehand. This requires one loop over all well groups, after which we can just use pandas/xarray functionality to do reqruired data transformations. - Properly type annotate helper functions in imod.mf6.wel that accept a sequence of pandas groupby objects. # Checklist <!--- Before requesting review, please go through this checklist: --> - [x] Links to correct issue - [x] Update changelog, if changes affect users - [x] PR title starts with ``Issue #nr``, e.g. ``Issue #737`` - [ ] Unit tests were added - [ ] **If feature added**: Added/extended example
1 parent 67d33bc commit df307c9

File tree

4 files changed

+25
-29
lines changed

4 files changed

+25
-29
lines changed

docs/api/changelog.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ Fixed
5252
unstructured discretization.
5353
- Fixed bug in :func:`imod.formats.prj.open_projectfile_data` which caused an
5454
error when a periods keyword was used having an upper case.
55+
- Poor performance of :meth:`imod.mf6.Well.from_imod5_data` and
56+
:meth:`imod.mf6.LayeredWell.from_imod5_data` when the ``imod5_data`` contained
57+
a well system with a large number of wells (>10k).
5558
- :meth:`imod.mf6.River.from_imod5_data`,
5659
:meth:`imod.mf6.Drainage.from_imod5_data`,
5760
:meth:`imod.mf6.GeneralHeadBoundary.from_imod5_data` can now deal with

imod/mf6/wel.py

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import warnings
77
from collections.abc import Iterable
88
from datetime import datetime
9-
from typing import Any, Callable, Optional, Self, Tuple, Union, cast
9+
from typing import Any, Callable, Optional, Self, Sequence, Tuple, Union, cast
1010

1111
import cftime
1212
import numpy as np
@@ -16,7 +16,6 @@
1616
import xugrid as xu
1717

1818
import imod
19-
import imod.mf6.utilities
2019
from imod.common.interfaces.ipointdatapackage import IPointDataPackage
2120
from imod.common.utilities.grid import broadcast_to_full_domain
2221
from imod.common.utilities.layer import create_layered_top
@@ -82,36 +81,25 @@ def mask_2D(package: GridAgnosticWell, domain_2d: GridDataArray) -> GridAgnostic
8281

8382

8483
def _df_groups_to_da_rates(
85-
unique_well_groups: pd.api.typing.DataFrameGroupBy,
84+
unique_well_groups: Sequence[pd.api.typing.DataFrameGroupBy],
8685
) -> xr.DataArray:
8786
# Convert dataframes all groups to DataArrays
88-
is_steady_state = "time" not in unique_well_groups[0].columns
89-
if is_steady_state:
90-
da_groups = [
91-
xr.DataArray(df_group["rate"].sum()) for df_group in unique_well_groups
92-
]
87+
columns = list(unique_well_groups[0].columns)
88+
columns.remove("rate")
89+
is_transient = "time" in columns
90+
gb_and_summed = pd.concat(unique_well_groups).groupby(columns).sum()
91+
if is_transient:
92+
index_names = ["time", "index"]
9393
else:
94-
da_groups = [
95-
xr.DataArray(
96-
df_group["rate"], dims=("time"), coords={"time": df_group["time"]}
97-
)
98-
for df_group in unique_well_groups
99-
]
100-
# Groupby time and sum to aggregate wells with the exact same x, y, and
101-
# filter top/bottom.
102-
da_groups = [da_group.groupby("time").sum() for da_group in da_groups]
103-
# Assign index coordinates
104-
da_groups = [
105-
da_group.expand_dims(dim="index").assign_coords(index=[i])
106-
for i, da_group in enumerate(da_groups)
107-
]
108-
# Concatenate datarrays along index dimension
109-
return xr.concat(da_groups, dim="index")
94+
index_names = ["index"]
95+
# Unset multi-index, then set index to index_names
96+
df_temp = gb_and_summed.reset_index().set_index(index_names)
97+
return df_temp["rate"].to_xarray()
11098

11199

112100
def _prepare_well_rates_from_groups(
113101
pkg_data: dict,
114-
unique_well_groups: pd.api.typing.DataFrameGroupBy,
102+
unique_well_groups: Sequence[pd.api.typing.DataFrameGroupBy],
115103
start_times: StressPeriodTimesType,
116104
) -> xr.DataArray:
117105
"""
@@ -690,8 +678,12 @@ def from_imod5_data(
690678
# Associated wells need additional grouping by id
691679
if pkg_data["has_associated"]:
692680
colnames_group.append("id")
693-
wel_index, unique_well_groups = zip(*df.groupby(colnames_group))
694-
681+
wel_index, well_groups_untagged = zip(*df.groupby(colnames_group))
682+
# Explictly sign an index to each group, so that the
683+
# DataArray of rates can be created with a unique index.
684+
unique_well_groups = [
685+
group.assign(index=i) for i, group in enumerate(well_groups_untagged)
686+
]
695687
# Unpack wel indices by zipping
696688
varnames = [("x", float), ("y", float)] + cls._depth_colnames
697689
index_values = zip(*wel_index)

imod/tests/test_mf6/test_utilities/test_resampling.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def initialize_timeseries(times: list[datetime], rates: list[float]) -> pd.DataF
1717
timeseries["id"] = "ID"
1818
timeseries["filt_top"] = 20
1919
timeseries["filt_bot"] = 10
20+
timeseries["index"] = 0
2021

2122
return timeseries
2223

@@ -195,7 +196,7 @@ def test_mean_timeseries():
195196
dummy_times = [datetime(1989, 1, 1)]
196197
expected_rates = np.mean(rates)
197198
expected_timeseries = initialize_timeseries(dummy_times, expected_rates)
198-
col_order = ["x", "y", "id", "filt_top", "filt_bot", "rate"]
199+
col_order = ["x", "y", "id", "filt_top", "filt_bot", "index", "rate"]
199200
expected_timeseries = expected_timeseries[col_order]
200201

201202
pd.testing.assert_frame_equal(

imod/util/expand_repetitions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def resample_timeseries(
104104
# The entries before the start of the well timeseries do not have data yet,
105105
# so we fill them in here. Keep rate to zero and pad the location columns with
106106
# the first entry.
107-
location_columns = ["x", "y", "id", "filt_top", "filt_bot"]
107+
location_columns = ["x", "y", "id", "filt_top", "filt_bot", "index"]
108108
time_before_start_input = (
109109
intermediate_df["time"].values < well_rate["time"].values[0]
110110
)

0 commit comments

Comments
 (0)