Skip to content

Commit

Permalink
Updated documentation and corrected output names and function names.
Browse files Browse the repository at this point in the history
  • Loading branch information
petebunting committed Jun 27, 2024
1 parent 0ff1291 commit 85b3c61
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 22 deletions.
30 changes: 15 additions & 15 deletions python/rsgislib/tools/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ def decompose_bias_variance(y_true, y_pred):
return mse, bias_squared, variance, noise


def calc_variogram(
def calc_semivariogram(
pts_coords: numpy.array,
data_vals: numpy.array,
out_data_file: str = None,
Expand All @@ -1086,17 +1086,17 @@ def calc_variogram(
normalize_vals: bool = False,
):
"""
A function which calculates variogram for the data provided using the
A function which calculates semi-variance for the data provided using the
skgstat module (https://scikit-gstat.readthedocs.io/)
:param pts_coords: the x/y coordinates of the points for which the variogram is
:param pts_coords: the x/y coordinates of the points for which the semi-variance is
calculated. Shape must be [n, 2] where n is the number of points
:param data_vals: the data values of the points for which the variogram is
:param data_vals: the data values of the points for which the semi-variance is
calculated.
:param out_data_file: Optionally output a CSV file with the lag_bins, variance
:param out_data_file: Optionally output a CSV file with the distance, semi-variance
and count. Default is None but if file path provided the
output will be produced.
:param out_plot_file: Optionally output a plot file of the lag_bins, variance
:param out_plot_file: Optionally output a plot file of the distance, semi-variance
and count. Default is None but if file path provided the
output will be produced.
:param max_lag: Can specify the maximum lag distance directly by giving a value
Expand Down Expand Up @@ -1134,8 +1134,8 @@ def calc_variogram(
n_count = numpy.fromiter((g.size for g in vario_obj.lag_classes()), dtype=int)

vario_data = {
"lag_bins": variogram_data[0],
"variance": variogram_data[1],
"distance": variogram_data[0],
"semivariance": variogram_data[1],
"count": n_count,
}

Expand All @@ -1150,28 +1150,28 @@ def calc_variogram(
ax2 = plt.subplot2grid((5, 1), (0, 0), sharex=ax1)
fig.subplots_adjust(hspace=0)

ax1.scatter(x=vario_out_df["lag_bins"], y=vario_out_df["variance"])
ax1.set_xlabel("Lag bins")
ax1.set_ylabel("Variance")
ax1.scatter(x=vario_out_df["distance"], y=vario_out_df["semivariance"])
ax1.set_xlabel("Distance")
ax1.set_ylabel("Semi-Variance")

if normalize_vals:
ax1.set_xlim([0, 1.05])
ax1.set_ylim([0, 1.05])

ax1.grid(False)
ax1.vlines(
vario_out_df["lag_bins"],
vario_out_df["distance"],
*ax1.axes.get_ybound(),
colors=(0.85, 0.85, 0.85),
linestyles="dashed"
)

# set the sum of hist bar widths to 70% of the x-axis space
w = (numpy.max(vario_out_df["lag_bins"]) * 0.7) / len(vario_out_df["count"])
w = (numpy.max(vario_out_df["distance"]) * 0.7) / len(vario_out_df["count"])

# plot bar chart with count of number of pairs
ax2.bar(
vario_out_df["lag_bins"],
vario_out_df["distance"],
vario_out_df["count"],
width=w,
align="center",
Expand All @@ -1183,7 +1183,7 @@ def calc_variogram(

ax2.grid(False)
ax2.vlines(
vario_out_df["lag_bins"],
vario_out_df["distance"],
*ax2.axes.get_ybound(),
colors=(0.85, 0.85, 0.85),
linestyles="dashed"
Expand Down
14 changes: 7 additions & 7 deletions python/rsgislib/vectorstats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import rsgislib.vectorutils


def calc_empirical_variogram(
def calc_empirical_semivariogram(
vec_file: str,
vec_lyr: str,
vals_col: str,
Expand All @@ -20,23 +20,23 @@ def calc_empirical_variogram(
normalize_vals: bool = False,
):
"""
A function which calculates variogram for the vector layer provided using the
A function which calculates semi-variance for the vector layer provided using the
skgstat module (https://scikit-gstat.readthedocs.io/). The layer needs to be
use point geometries and projected so they have X and Y coordinates.
:param vec_file: Input vector layer file
:param vec_lyr: Input vector layer name
:param vals_col: Column name with values to calculate variogram
:param vals_col: Column name with values to calculate semi-variance
:param pts_sel_col: Optionally (Default: None) column name used to subset the
points within the vector layer for which the variogram is
points within the vector layer for which the semi-variance is
calculated (e.g., just points within a particular angular
range)
:param pts_sel_val: Optionally (Default: None) value to subset the points within
the vector layer using the pts_sel_col.
:param out_data_file: Optionally output a CSV file with the lag_bins, variance
:param out_data_file: Optionally output a CSV file with the distance, semi-variance
and count. Default is None but if file path provided the
output will be produced.
:param out_plot_file: Optionally output a plot file of the lag_bins, variance
:param out_plot_file: Optionally output a plot file of the distance, semi-variance
and count. Default is None but if file path provided the
output will be produced.
:param max_lag: Can specify the maximum lag distance directly by giving a value
Expand Down Expand Up @@ -80,7 +80,7 @@ def calc_empirical_variogram(

pts_coords = numpy.stack([x, y]).T

vario_out_df = rsgislib.tools.stats.calc_variogram(
vario_out_df = rsgislib.tools.stats.calc_semivariogram(
pts_coords=pts_coords,
data_vals=vals,
out_data_file=out_data_file,
Expand Down

0 comments on commit 85b3c61

Please sign in to comment.