Skip to content

Commit 75ed33f

Browse files
committed
fix: proper fillna management for vector features
1 parent 65c45f6 commit 75ed33f

File tree

3 files changed

+44
-12
lines changed

3 files changed

+44
-12
lines changed

focalpy/core.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ def _compute_features(
4646
feature_dfs,
4747
axis="rows",
4848
)
49-
.fillna(0)
5049
.set_index("buffer_dist", append=True)
5150
.sort_index()
5251
)
@@ -59,6 +58,7 @@ def compute_vector_features(
5958
buffer_dists: float | Sequence[float],
6059
*gb_reduce_args: Sequence,
6160
gb_reduce_method: str = "agg",
61+
fillna: utils.FillnaType = None,
6262
**gb_reduce_kwargs: utils.KwargsType,
6363
) -> pd.DataFrame:
6464
"""Compute multi-scale vector aggregation features.
@@ -78,6 +78,10 @@ def compute_vector_features(
7878
The group-by reduce-like method to apply to the data. This can be any method
7979
available on the `pandas.core.groupby.DataFrameGroupBy` object, e.g.,
8080
"sum", "mean", "median", "min", "max", or "agg".
81+
fillna : numeric, mapping, bool, optional
82+
Value to use to fill NaN values in the resulting features DataFrame, passed to
83+
`pandas.DataFrame.fillna`. If `False`, no filling is performed. If `None`, the
84+
default value set in `settings.VECTOR_FEATURES_FILLNA` is used.
8185
**gb_reduce_kwargs : mapping, optional
8286
Keyword arguments to pass to the group-by reduce-like method.
8387
@@ -104,16 +108,22 @@ def compute_vector_features(
104108
sites = sites.geometry
105109

106110
def _gb_reduce(buffers):
107-
return getattr(
108-
buffers.to_frame(name="geometry")
109-
.sjoin(gdf)
110-
# remove right index resulting column in the sjoin data frame
111-
# see https://github.com/geopandas/geopandas/issues/498
112-
.drop(columns=["geometry", gdf.index.name], errors="ignore")
113-
.reset_index(sites.index.name)
114-
.groupby(by=sites.index.name),
115-
gb_reduce_method,
116-
)(*gb_reduce_args, **gb_reduce_kwargs)
111+
return (
112+
getattr(
113+
buffers.to_frame(name="geometry")
114+
.sjoin(gdf)
115+
# remove right index resulting column in the sjoin data frame
116+
# see https://github.com/geopandas/geopandas/issues/498
117+
.drop(columns=["geometry", gdf.index.name], errors="ignore")
118+
.reset_index(sites.index.name)
119+
.groupby(by=sites.index.name),
120+
gb_reduce_method,
121+
)(*gb_reduce_args, **gb_reduce_kwargs)
122+
# ACHTUNG: use `reindex` to ensure that all sites with no overlapping
123+
# geometries are included too (which will have NaN values that we can
124+
# subsequently manage with `fillna`)
125+
.reindex(sites.index)
126+
)
117127

118128
if gb_reduce_method != "agg":
119129

@@ -151,6 +161,11 @@ def _gb_reduce_to_frame(buffers):
151161
columns=lambda col: f"{col}_{gb_reduce_func_arg}"
152162
)
153163

164+
if fillna is None:
165+
fillna = settings.VECTOR_FEATURES_FILLNA
166+
if fillna == 0 or fillna:
167+
vector_features_df = vector_features_df.fillna(fillna)
168+
154169
return vector_features_df
155170

156171

@@ -161,6 +176,7 @@ def compute_raster_features(
161176
buffer_dists: float | Sequence[float],
162177
*,
163178
affine: affine.Affine | None = None,
179+
fillna: utils.FillnaType = None,
164180
**zonal_stats_kwargs: utils.KwargsType,
165181
):
166182
"""Compute multi-scale raster statistics features.
@@ -176,6 +192,10 @@ def compute_raster_features(
176192
The buffer distances to compute features, in the same units as the raster CRS.
177193
affine: `affine.Affine`, optional
178194
Affine transform. Ignored if `raster` is a path-like object.
195+
fillna : numeric, mapping, bool, optional
196+
Value to use to fill NaN values in the resulting features DataFrame, passed to
197+
`pandas.DataFrame.fillna`. If `False`, no filling is performed. If `None`, the
198+
default value set in `settings.RASTER_FEATURES_FILLNA` is used.
179199
**zonal_stats_kwargs : mapping, optional
180200
Keyword arguments to pass to `rasterstats.zonal_stats`.
181201
@@ -191,7 +211,7 @@ def _zonal_stats(buffers, *args, **kwargs):
191211
rasterstats.zonal_stats(buffers, *args, **kwargs), index=buffers.index
192212
)
193213

194-
return _compute_features(
214+
raster_features_df = _compute_features(
195215
raster,
196216
sites,
197217
buffer_dists,
@@ -202,6 +222,10 @@ def _zonal_stats(buffers, *args, **kwargs):
202222
**zonal_stats_kwargs,
203223
)
204224

225+
if fillna is None:
226+
fillna = settings.RASTER_FEATURES_FILLNA
227+
if fillna == 0 or fillna:
228+
raster_features_df = raster_features_df.fillna(fillna)
205229

206230
def _compute_features_df(
207231
sites,
@@ -242,6 +266,7 @@ def _prefix_rename_dict(feature):
242266
for feature_col, buffer_dist in features_df.columns.values
243267
]
244268
return features_df
269+
return raster_features_df
245270

246271

247272
def _fit_transform(X, transformer, **transformer_kwargs):

focalpy/settings.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
import statsmodels.api as sm
44
from sklearn import decomposition, preprocessing
55

6+
# compute features
7+
# TODO: dict-like fillna for different statistics?
8+
VECTOR_FEATURES_FILLNA = 0
9+
RASTER_FEATURES_FILLNA = 0
10+
611
# focal analysis
712
FEATURE_PREPROCESSOR = preprocessing.StandardScaler
813
FEATURE_DECOMPOSER = decomposition.PCA

focalpy/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@
99
PathType = str | os.PathLike
1010
# type hint for keyword arguments
1111
KwargsType = Mapping | None
12+
# type hint for fillna arguments
13+
FillnaType = float | Mapping | bool | None

0 commit comments

Comments
 (0)