Skip to content

Issue #1548 fix performance ipf many wells #1552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/api/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ Fixed
unstructured discretization.
- Fixed bug in :func:`imod.formats.prj.open_projectfile_data` which caused an
error when a periods keyword was used having an upper case.
- Poor performance of :meth:`imod.mf6.Well.from_imod5_data` and
:meth:`imod.mf6.LayeredWell.from_imod5_data` when the ``imod5_data`` contained
a well system with a large number of wells (>10k).
- :meth:`imod.mf6.River.from_imod5_data`,
:meth:`imod.mf6.Drainage.from_imod5_data`,
:meth:`imod.mf6.GeneralHeadBoundary.from_imod5_data` can now deal with
Expand Down
46 changes: 19 additions & 27 deletions imod/mf6/wel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import warnings
from collections.abc import Iterable
from datetime import datetime
from typing import Any, Callable, Optional, Self, Tuple, Union, cast
from typing import Any, Callable, Optional, Self, Sequence, Tuple, Union, cast

import cftime
import numpy as np
Expand All @@ -16,7 +16,6 @@
import xugrid as xu

import imod
import imod.mf6.utilities
from imod.common.interfaces.ipointdatapackage import IPointDataPackage
from imod.common.utilities.grid import broadcast_to_full_domain
from imod.common.utilities.layer import create_layered_top
Expand Down Expand Up @@ -82,36 +81,25 @@ def mask_2D(package: GridAgnosticWell, domain_2d: GridDataArray) -> GridAgnostic


def _df_groups_to_da_rates(
unique_well_groups: pd.api.typing.DataFrameGroupBy,
unique_well_groups: Sequence[pd.api.typing.DataFrameGroupBy],
) -> xr.DataArray:
# Convert dataframes all groups to DataArrays
is_steady_state = "time" not in unique_well_groups[0].columns
if is_steady_state:
da_groups = [
xr.DataArray(df_group["rate"].sum()) for df_group in unique_well_groups
]
columns = list(unique_well_groups[0].columns)
columns.remove("rate")
is_transient = "time" in columns
gb_and_summed = pd.concat(unique_well_groups).groupby(columns).sum()
if is_transient:
index_names = ["time", "index"]
else:
da_groups = [
xr.DataArray(
df_group["rate"], dims=("time"), coords={"time": df_group["time"]}
)
for df_group in unique_well_groups
]
# Groupby time and sum to aggregate wells with the exact same x, y, and
# filter top/bottom.
da_groups = [da_group.groupby("time").sum() for da_group in da_groups]
# Assign index coordinates
da_groups = [
da_group.expand_dims(dim="index").assign_coords(index=[i])
for i, da_group in enumerate(da_groups)
]
# Concatenate datarrays along index dimension
return xr.concat(da_groups, dim="index")
index_names = ["index"]
# Unset multi-index, then set index to index_names
df_temp = gb_and_summed.reset_index().set_index(index_names)
return df_temp["rate"].to_xarray()


def _prepare_well_rates_from_groups(
pkg_data: dict,
unique_well_groups: pd.api.typing.DataFrameGroupBy,
unique_well_groups: Sequence[pd.api.typing.DataFrameGroupBy],
start_times: StressPeriodTimesType,
) -> xr.DataArray:
"""
Expand Down Expand Up @@ -690,8 +678,12 @@ def from_imod5_data(
# Associated wells need additional grouping by id
if pkg_data["has_associated"]:
colnames_group.append("id")
wel_index, unique_well_groups = zip(*df.groupby(colnames_group))

wel_index, well_groups_untagged = zip(*df.groupby(colnames_group))
# Explictly sign an index to each group, so that the
# DataArray of rates can be created with a unique index.
unique_well_groups = [
group.assign(index=i) for i, group in enumerate(well_groups_untagged)
]
# Unpack wel indices by zipping
varnames = [("x", float), ("y", float)] + cls._depth_colnames
index_values = zip(*wel_index)
Expand Down
3 changes: 2 additions & 1 deletion imod/tests/test_mf6/test_utilities/test_resampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def initialize_timeseries(times: list[datetime], rates: list[float]) -> pd.DataF
timeseries["id"] = "ID"
timeseries["filt_top"] = 20
timeseries["filt_bot"] = 10
timeseries["index"] = 0

return timeseries

Expand Down Expand Up @@ -195,7 +196,7 @@ def test_mean_timeseries():
dummy_times = [datetime(1989, 1, 1)]
expected_rates = np.mean(rates)
expected_timeseries = initialize_timeseries(dummy_times, expected_rates)
col_order = ["x", "y", "id", "filt_top", "filt_bot", "rate"]
col_order = ["x", "y", "id", "filt_top", "filt_bot", "index", "rate"]
expected_timeseries = expected_timeseries[col_order]

pd.testing.assert_frame_equal(
Expand Down
2 changes: 1 addition & 1 deletion imod/util/expand_repetitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def resample_timeseries(
# The entries before the start of the well timeseries do not have data yet,
# so we fill them in here. Keep rate to zero and pad the location columns with
# the first entry.
location_columns = ["x", "y", "id", "filt_top", "filt_bot"]
location_columns = ["x", "y", "id", "filt_top", "filt_bot", "index"]
time_before_start_input = (
intermediate_df["time"].values < well_rate["time"].values[0]
)
Expand Down