Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
1b58b40
issue #439
Mar 19, 2024
3cdc024
Resolved the requested points in the pull request review.
Mar 25, 2024
dae71a0
Update verde/tests/test_utils.py
Phssilva Apr 11, 2024
b547d40
Update verde/tests/test_utils.py
Phssilva Apr 11, 2024
cfd2b31
Update verde/tests/test_utils.py
Phssilva Apr 11, 2024
40e9290
Update verde/utils.py
Phssilva Apr 11, 2024
05fe6c7
Update verde/utils.py
Phssilva Apr 11, 2024
cb812cc
Update verde/utils.py
Phssilva Apr 11, 2024
e985876
Merge branch 'main' into issue#439
mdtanker Mar 17, 2026
2c0e967
update fill_nans to accept any number of variables and preserve coord…
mdtanker Mar 17, 2026
8cda09b
Merge branch 'main' into issue#439
mdtanker Mar 17, 2026
c39dbc0
use np.testing
mdtanker Mar 17, 2026
925a020
return dataset with 1 variable if passed
mdtanker Mar 17, 2026
2e42f0d
fix mistakes in fill_na and tests
mdtanker Mar 18, 2026
28cd648
add test for k=3
mdtanker Mar 18, 2026
6911c68
fill values directly to preserve metadata
mdtanker Mar 18, 2026
7b22774
all using any verde interpolator for filling nans
mdtanker Mar 18, 2026
f18f1d3
reduce rtol for assert_allclose
mdtanker Mar 18, 2026
6d1e25a
add Trend option to fill_nans
mdtanker Mar 18, 2026
788c59f
add test that interpolator isn't already fitted
mdtanker Mar 18, 2026
3e75fac
reset rtol
mdtanker Mar 18, 2026
943be7d
use relative imports in tests
mdtanker Mar 20, 2026
7f0602d
use relative imports in utils
mdtanker Mar 20, 2026
07208a6
rename fill_nans to fill_missing
mdtanker Mar 20, 2026
5531be7
default of 5 nearest neighbors
mdtanker Mar 20, 2026
da0abfc
remove isfitted check for interpolators
mdtanker Mar 20, 2026
a448611
warn for any case of remaining NaNs, and suggest alternative interpol…
mdtanker Mar 20, 2026
77ea980
fix relative imports
mdtanker Mar 20, 2026
af7e2d5
reorder imports
mdtanker Mar 20, 2026
341dc42
use direct import to fix circular ImportError
mdtanker Mar 20, 2026
5035eed
return to absolute import
mdtanker Mar 20, 2026
91420e3
only predict at nans
mdtanker Mar 20, 2026
b1b8848
add `SplineCV` to potential interpolators
mdtanker Mar 20, 2026
662bc60
move `fill_missing` to new module `fill`
mdtanker Mar 20, 2026
8a3be2e
skip variable if no nans
mdtanker Mar 20, 2026
1036cea
add `maxdist` parameter to `fill_missing`
mdtanker Mar 20, 2026
4e8054f
Merge branch 'main' into issue#439
mdtanker May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ Utilities
grid_to_table
make_xarray_grid
median_distance
fill_missing

Input/Output
------------
Expand Down
1 change: 1 addition & 0 deletions verde/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
scatter_points,
)
from .distances import median_distance
from .fill import fill_missing
from .io import load_surfer
from .mask import convexhull_mask, distance_mask
from .model_selection import (
Expand Down
133 changes: 133 additions & 0 deletions verde/fill.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Copyright (c) 2017 The Verde Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
General utilities.
"""
import copy
import warnings

import numpy as np
import xarray as xr

from .base.utils import n_1d_arrays
from .neighbors import KNeighbors
from .utils import grid_to_table, kdtree


def fill_missing(
grid,
interpolator=None,
maxdist=None,
):
"""
Fill missing values in a grid with a choice of interpolation method.
This will fill missing values for all variables in the supplied grid.
To speed up the interpolation, use can choose to only use points within
maxdist of the missing values during fitting of the interpolator.

Interpolation methods include nearest neighbor, linear, trend, cubic,
or splines.

Parameters
----------
grid : :class:`xarray.DataArray` | :class:`xarray.Dataset`
A 2D grid with one or more data variable, some of which may have
missing values (NaNs).
interpolator : class | None
The verde interpolator class instance to use for filling missing
values. Can be one of the following :class:`verde.KNeighbors`,
:class:`verde.Linear`, :class:`verde.Cubic`, :class:`verde.Spline`,
:class:`verde.SplineCV`, :class:`verde.Trend`, by default is
class:`verde.KNeighbors` using the nearest 5 neighbors.
maxdist : float
Only use data within this distance to the nearest missing data when
fitting the interpolator. For expensive interpolators, such as
:class:`verde.SplineCV`, this can significantly speed up the function.

Returns
-------
filled_grid : :class:`xarray.DataArray` | :class:`xarray.Dataset`
A 2D grid with the NaN values filled for each variable.
"""
grid = grid.copy()

if interpolator is None:
interpolator = KNeighbors(k=5)

# if input was a datarray turn into dataset
if isinstance(grid, xr.DataArray):
ds = grid.to_dataset()
else:
ds = grid

# get grid coordinate names
coord_names = list(ds.coords)

# iterate over variables
for var_name, var_da in ds.items():

# turn grid into dataframe
df = grid_to_table(var_da)

# if no nans, continue without change original grid
if not df[var_name].isna().any():
continue

# get dataframes of nans and no-nans
df_no_nans = df[df[var_name].notna()].copy()
df_nans = df[df[var_name].isna()].copy()

# only use nearby non-nan points for fitting
if maxdist is not None:
targets = (df_nans.iloc[:, 1], df_nans.iloc[:, 0])
tree = kdtree(targets)
coords = (df_no_nans.iloc[:, 1], df_no_nans.iloc[:, 0])
distances, _indices = tree.query(np.transpose(n_1d_arrays(coords, 2)), k=1)
# only retain non-nan points which are closer than maxdist to
# nearest nan
df_no_nans.loc[:, "tmp_dist"] = distances
df_no_nans = df_no_nans[df_no_nans.tmp_dist <= maxdist]
assert (
len(df_no_nans) > 0
), "maxdist resulted in no points, increase the value"
# get coordinate columns (first two columns)
coords_no_nans = (df_no_nans.iloc[:, 1], df_no_nans.iloc[:, 0])

interp = copy.deepcopy(interpolator)

interp.fit(coords_no_nans, df_no_nans[var_name])

# predict only at NaNs and add to dataframe
predicted = interp.predict((df_nans.iloc[:, 1], df_nans.iloc[:, 0]))
df.loc[df_nans.index, var_name] = predicted

# convert to dataarray
filled_da = df.set_index([coord_names[0], coord_names[1]]).to_xarray()[var_name]

# warn if still nans due to no extrapolation allowed for
# `Cubic` and `Linear` interpolators
if filled_da.isnull().any():
msg = (
"NaNs are still present in this grid! This may be due "
f"to the choice of interpolator {type(interp)}, "
"some of which don't allow extrapolation. To fill the "
"remaining values run `fill_missing()` again with an "
"interpolator which allows extrpolation. We recommend "
"`vd.KNeighbors` if you have a large grid (>~10,000 points) "
"or `vd.Spline` or `vd.SplineCV` if you have a a smaller grid "
"or require smoother results."
)
warnings.warn(msg, UserWarning, stacklevel=2)

# if input was a datarray, fill nans with new values and return that
# if it was a dataset, update each variable
if isinstance(grid, xr.DataArray):
grid = grid.where(grid.notnull(), filled_da)
else:
grid[var_name] = grid[var_name].where(grid[var_name].notnull(), filled_da)

return grid
Loading
Loading