Merge remote-tracking branch 'origin/xarray/main' into xarray/convert…

…-dimension
pySTEPS · Jul 30, 2024 · ee4bbed · ee4bbed
2 parents df23451 + 51ddfe9
commit ee4bbed
Show file tree

Hide file tree

Showing 22 changed files with 859 additions and 156 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 24.3.0
+    rev: 24.4.2
     hooks:
     - id: black
       language_version: python3
diff --git a/examples/anvil_nowcast.py b/examples/anvil_nowcast.py
@@ -99,7 +99,7 @@
     np.array([0.5]), metadata, threshold=0.1, zerovalue=-15.0
 )
 forecast_sprog = sprog.forecast(
-    rainrate_field_db[-3:], velocity, 3, n_cascade_levels=8, R_thr=rainrate_thr[0]
+    rainrate_field_db[-3:], velocity, 3, n_cascade_levels=6, R_thr=rainrate_thr[0]
 )
 forecast_sprog, _ = transformation.dB_transform(
     forecast_sprog, threshold=-10.0, inverse=True

diff --git a/examples/thunderstorm_detection_and_tracking.py b/examples/thunderstorm_detection_and_tracking.py
@@ -90,6 +90,17 @@
 # Properties of one of the identified cells:
 print(cells_id.iloc[0])
 
+###############################################################################
+# Optionally, one can also ask to consider splits and merges of thunderstorm cells.
+# A cell at time t is considered to split if it will verlap more than 10% with more than
+# one cell at time t+1. Conversely, a cell is considered to be a merge, if more
+# than one cells fron time t will overlap more than 10% with it.
+
+cells_id, labels = tstorm_detect.detection(
+    input_image, time=time, output_splits_merges=True
+)
+print(cells_id.iloc[0])
+
 ###############################################################################
 # Example of thunderstorm tracking over a timeseries
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py
@@ -22,7 +22,7 @@
 import numpy as np
 
 
-def get_default_skill(n_cascade_levels=8, n_models=1):
+def get_default_skill(n_cascade_levels=6, n_models=1):
     """
     Get the default climatological skill values as given in :cite:`BPS2006`.
     Take subset of n_cascade_levels or add entries with small values (1e-4) if
@@ -31,7 +31,7 @@ def get_default_skill(n_cascade_levels=8, n_models=1):
     Parameters
     ----------
     n_cascade_levels: int, optional
-      Number of cascade levels. Defaults to 8.
+      Number of cascade levels. Defaults to 6, see issue #385 on GitHub.
     n_models: int, optional
       Number of NWP models. Defaults to 1.
 
@@ -157,7 +157,7 @@ def save_skill(
 
 def calc_clim_skill(
     outdir_path,
-    n_cascade_levels=8,
+    n_cascade_levels=6,
     n_models=1,
     window_length=30,
 ):
@@ -168,7 +168,7 @@ def calc_clim_skill(
     Parameters
     ----------
     n_cascade_levels: int, optional
-      Number of cascade levels.
+      Number of cascade levels. Defaults to 6, see issue #385 on GitHub.
     outdir_path: string
       Path to folder where the historical skill are stored. Defaults to
       path_workdir from rcparams.

diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py
diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py
@@ -16,6 +16,7 @@
     compute_store_nwp_motion
     load_NWP
     check_norain
+    compute_smooth_dilated_mask
 """
 
 import datetime
@@ -35,6 +36,13 @@
 except ImportError:
     NETCDF4_IMPORTED = False
 
+try:
+    import cv2
+
+    CV2_IMPORTED = True
+except ImportError:
+    CV2_IMPORTED = False
+
 
 def stack_cascades(R_d, donorm=True):
     """Stack the given cascades into a larger array.
@@ -557,3 +565,92 @@ def check_norain(precip_arr, precip_thr=None, norain_thr=0.0):
         f"Rain fraction is: {str(rain_pixels.size / precip_arr.size)}, while minimum fraction is {str(norain_thr)}"
     )
     return norain
+
+
+def compute_smooth_dilated_mask(
+    original_mask,
+    max_padding_size_in_px=0,
+    gaussian_kernel_size=9,
+    inverted=False,
+    non_linear_growth_kernel_sizes=False,
+):
+    """
+    Compute a smooth dilated mask using Gaussian blur and dilation with varying kernel sizes.
+
+    Parameters
+    ----------
+    original_mask : array_like
+        Two-dimensional boolean array containing the input mask.
+    max_padding_size_in_px : int
+        The maximum size of the padding in pixels. Default is 100.
+    gaussian_kernel_size : int, optional
+        Size of the Gaussian kernel to use for blurring, this should be an uneven number. This option ensures
+        that the nan-fields are large enough to start the smoothing. Without it, the method will also be applied
+        to local nan-values in the radar domain. Default is 9, which is generally a recommended number to work
+        with.
+    inverted : bool, optional
+        Typically, the smoothed mask works from the outside of the radar domain inward, using the
+        max_padding_size_in_px. If set to True, it works from the edge of the radar domain outward
+        (generally not recommended). Default is False.
+    non_linear_growth_kernel_sizes : bool, optional
+        If True, use non-linear growth for kernel sizes. Default is False.
+
+    Returns
+    -------
+    final_mask : array_like
+        The smooth dilated mask normalized to the range [0,1].
+    """
+    if not CV2_IMPORTED:
+        raise MissingOptionalDependency(
+            "CV2 package is required to transform the mask into a smoot mask."
+            " Please install it using `pip install opencv-python`."
+        )
+
+    if max_padding_size_in_px < 0:
+        raise ValueError("max_padding_size_in_px must be greater than or equal to 0.")
+
+    # Check if gaussian_kernel_size is an uneven number
+    assert gaussian_kernel_size % 2
+
+    # Convert the original mask to uint8 numpy array and invert if needed
+    array_2d = np.array(original_mask, dtype=np.uint8)
+    if inverted:
+        array_2d = np.bitwise_not(array_2d)
+
+    # Rescale the 2D array values to 0-255 (black or white)
+    rescaled_array = array_2d * 255
+
+    # Apply Gaussian blur to the rescaled array
+    blurred_image = cv2.GaussianBlur(
+        rescaled_array, (gaussian_kernel_size, gaussian_kernel_size), 0
+    )
+
+    # Apply binary threshold to negate the blurring effect
+    _, binary_image = cv2.threshold(blurred_image, 128, 255, cv2.THRESH_BINARY)
+
+    # Define kernel sizes
+    if non_linear_growth_kernel_sizes:
+        lin_space = np.linspace(0, np.sqrt(max_padding_size_in_px), 10)
+        non_lin_space = np.power(lin_space, 2)
+        kernel_sizes = list(set(non_lin_space.astype(np.uint8)))
+    else:
+        kernel_sizes = np.linspace(0, max_padding_size_in_px, 10, dtype=np.uint8)
+
+    # Process each kernel size
+    final_mask = np.zeros_like(binary_image, dtype=np.float64)
+    for kernel_size in kernel_sizes:
+        if kernel_size == 0:
+            dilated_image = binary_image
+        else:
+            kernel = cv2.getStructuringElement(
+                cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)
+            )
+            dilated_image = cv2.dilate(binary_image, kernel)
+
+        # Convert the dilated image to a binary array
+        _, binary_array = cv2.threshold(dilated_image, 128, 1, cv2.THRESH_BINARY)
+        final_mask += binary_array
+
+    final_mask = final_mask / final_mask.max()
+
+    return final_mask
diff --git a/pysteps/converters.py b/pysteps/converters.py
@@ -61,6 +61,24 @@ def _convert_proj4_to_grid_mapping(proj4str):
         v1 = d["lat_1"] if "lat_1" in d else float(0)
         v2 = d["lat_2"] if "lat_2" in d else float(0)
         params["standard_parallel"] = (float(v1), float(v2))
+    elif d["proj"] == "lcc":
+        grid_mapping_var_name = "lcc"
+        grid_mapping_name = "lambert_conformal_conic"
+        params["false_easting"] = float(d["x_0"]) if "x_0" in d else float(0)
+        params["false_northing"] = float(d["y_0"]) if "y_0" in d else float(0)
+        v = d["lon_0"] if "lon_0" in d else float(0)
+        params["longitude_of_central_meridian"] = float(v)
+        v = d["lat_0"] if "lat_0" in d else float(0)
+        params["latitude_of_projection_origin"] = float(v)
+        v1 = d["lat_1"] if "lat_1" in d else float(0)
+        v2 = d["lat_2"] if "lat_2" in d else float(0)
+        params["standard_parallel"] = (float(v1), float(v2))
+        v = d["ellps"] if "ellps" in d else ""
+        if len(v):
+            params["reference_ellipsoid_name"] = v
+        v = d["towgs84"] if "towgs84" in d else ""
+        if len(v):
+            params["towgs84"] = v
     else:
         print("unknown projection", d["proj"])
         return None, None, None

diff --git a/pysteps/feature/tstorm.py b/pysteps/feature/tstorm.py
@@ -58,6 +58,7 @@ def detection(
     minmax=41,
     mindis=10,
     output_feat=False,
+    output_splits_merges=False,
     time="000000000",
 ):
     """
@@ -93,6 +94,10 @@ def detection(
         smaller distance will be merged. The default is 10 km.
     output_feat: bool, optional
         Set to True to return only the cell coordinates.
+    output_split_merge: bool, optional
+        Set to True to return additional columns in the dataframe for describing the
+        splitting and merging of cells. Note that columns are initialized with None,
+        and the information needs to be analyzed while tracking.
     time: string, optional
         Date and time as string. Used to label time in the resulting dataframe.
         The default is '000000000'.
@@ -166,7 +171,15 @@ def detection(
 
     areas, lines = breakup(input_image, np.nanmin(input_image.flatten()), maxima_dis)
 
-    cells_id, labels = get_profile(areas, binary, input_image, loc_max, time, minref)
+    cells_id, labels = get_profile(
+        areas,
+        binary,
+        input_image,
+        loc_max,
+        time,
+        minref,
+        output_splits_merges=output_splits_merges,
+    )
 
     if max_num_features is not None:
         idx = np.argsort(cells_id.area.to_numpy())[::-1]
@@ -225,10 +238,12 @@ def longdistance(loc_max, mindis):
     return new_max
 
 
-def get_profile(areas, binary, ref, loc_max, time, minref):
+def get_profile(areas, binary, ref, loc_max, time, minref, output_splits_merges=False):
     """
     This function returns the identified cells in a dataframe including their x,y
     locations, location of their maxima, maximum reflectivity and contours.
+    Optionally, the dataframe can include columns for storing information regarding
+    splitting and merging of cells.
     """
     cells = areas * binary
     cell_labels = cells[loc_max]
@@ -255,11 +270,47 @@ def get_profile(areas, binary, ref, loc_max, time, minref):
                 "area": len(x),
             }
         )
+        if output_splits_merges:
+            cells_id[-1].update(
+                {
+                    "splitted": None,
+                    "split_IDs": None,
+                    "merged": None,
+                    "merged_IDs": None,
+                    "results_from_split": None,
+                    "will_merge": None,
+                }
+            )
         labels[cells == cell_labels[n]] = this_id
+
+    columns = [
+        "ID",
+        "time",
+        "x",
+        "y",
+        "cen_x",
+        "cen_y",
+        "max_ref",
+        "cont",
+        "area",
+    ]
+    if output_splits_merges:
+        columns.extend(
+            [
+                "splitted",
+                "split_IDs",
+                "merged",
+                "merged_IDs",
+                "results_from_split",
+                "will_merge",
+            ]
+        )
     cells_id = pd.DataFrame(
         data=cells_id,
         index=range(len(cell_labels)),
-        columns=["ID", "time", "x", "y", "cen_x", "cen_y", "max_ref", "cont", "area"],
+        columns=columns,
     )
-
+    if output_splits_merges:
+        cells_id["split_IDs"] = cells_id["split_IDs"].astype("object")
+        cells_id["merged_IDs"] = cells_id["merged_IDs"].astype("object")
     return cells_id, labels
diff --git a/pysteps/io/exporters.py b/pysteps/io/exporters.py
@@ -919,6 +919,24 @@ def _convert_proj4_to_grid_mapping(proj4str):
         v1 = d["lat_1"] if "lat_1" in d else float(0)
         v2 = d["lat_2"] if "lat_2" in d else float(0)
         params["standard_parallel"] = (float(v1), float(v2))
+    elif d["proj"] == "lcc":
+        grid_mapping_var_name = "lcc"
+        grid_mapping_name = "lambert_conformal_conic"
+        params["false_easting"] = float(d["x_0"]) if "x_0" in d else float(0)
+        params["false_northing"] = float(d["y_0"]) if "y_0" in d else float(0)
+        v = d["lon_0"] if "lon_0" in d else float(0)
+        params["longitude_of_central_meridian"] = float(v)
+        v = d["lat_0"] if "lat_0" in d else float(0)
+        params["latitude_of_projection_origin"] = float(v)
+        v1 = d["lat_1"] if "lat_1" in d else float(0)
+        v2 = d["lat_2"] if "lat_2" in d else float(0)
+        params["standard_parallel"] = (float(v1), float(v2))
+        v = d["ellps"] if "ellps" in d else ""
+        if len(v):
+            params["reference_ellipsoid_name"] = v
+        v = d["towgs84"] if "towgs84" in d else ""
+        if len(v):
+            params["towgs84"] = v
     else:
         print("unknown projection", d["proj"])
         return None, None, None

diff --git a/pysteps/nowcasts/anvil.py b/pysteps/nowcasts/anvil.py
@@ -40,7 +40,7 @@ def forecast(
     velocity,
     timesteps,
     rainrate=None,
-    n_cascade_levels=8,
+    n_cascade_levels=6,
     extrap_method="semilagrangian",
     ar_order=2,
     ar_window_radius=50,
@@ -87,7 +87,8 @@ def forecast(
         field. If set to None, no R(VIL) conversion is done and the outputs
         are in the same units as the inputs.
     n_cascade_levels: int, optional
-        The number of cascade levels to use.
+        The number of cascade levels to use. Defaults to 6, see issue #385
+        on GitHub.
     extrap_method: str, optional
         Name of the extrapolation method to use. See the documentation of
         pysteps.extrapolation.interface.

diff --git a/pysteps/nowcasts/sprog.py b/pysteps/nowcasts/sprog.py
@@ -71,7 +71,8 @@ def forecast(
     precip_thr: float, required
         The threshold value for minimum observable precipitation intensity.
     n_cascade_levels: int, optional
-        The number of cascade levels to use.
+        The number of cascade levels to use. Defaults to 6, see issue #385
+        on GitHub.
     extrap_method: str, optional
         Name of the extrapolation method to use. See the documentation of
         pysteps.extrapolation.interface.

diff --git a/pysteps/nowcasts/sseps.py b/pysteps/nowcasts/sseps.py
@@ -104,7 +104,8 @@ def forecast(
     n_ens_members: int
         The number of ensemble members to generate.
     n_cascade_levels: int
-        The number of cascade levels to use.
+        The number of cascade levels to use. Defaults to 6, see issue #385
+         on GitHub.
     extrap_method: {'semilagrangian'}
         Name of the extrapolation method to use. See the documentation of
         pysteps.extrapolation.interface.

diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py
@@ -87,7 +87,8 @@ def forecast(
     n_ens_members: int, optional
         The number of ensemble members to generate.
     n_cascade_levels: int, optional
-        The number of cascade levels to use.
+        The number of cascade levels to use. Defaults to 6, see issue #385
+         on GitHub.
     precip_thr: float, optional
         Specifies the threshold value for minimum observable precipitation
         intensity. Required if mask_method is not None or conditional is True.

diff --git a/pysteps/nowcasts/utils.py b/pysteps/nowcasts/utils.py
@@ -22,7 +22,6 @@
 
 from pysteps import extrapolation
 
-
 try:
     import dask