diff --git a/pysteps/decorators.py b/pysteps/decorators.py index ee421977e..69c9945bc 100644 --- a/pysteps/decorators.py +++ b/pysteps/decorators.py @@ -22,7 +22,7 @@ import numpy as np -from pysteps.converters import convert_to_xarray_dataset +from pysteps.xarray_helpers import convert_input_to_xarray_dataset def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text): @@ -90,7 +90,9 @@ def _import_with_postprocessing(*args, **kwargs): mask = ~np.isfinite(precip) precip[mask] = _fillna - return convert_to_xarray_dataset(precip.astype(_dtype), quality, metadata) + return convert_input_to_xarray_dataset( + precip.astype(_dtype), quality, metadata + ) extra_kwargs_doc = """ Other Parameters @@ -126,7 +128,9 @@ def new_function(*args, **kwargs): target motion_method_func function. """ - input_images = args[0] + dataset = args[0] + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values if input_images.ndim != 3: raise ValueError( "input_images dimension mismatch.\n" diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py index f61d4b25b..71fe6dc78 100644 --- a/pysteps/io/importers.py +++ b/pysteps/io/importers.py @@ -74,22 +74,27 @@ .. tabularcolumns:: |p{2cm}|L| -+---------------+-------------------------------------------------------------------------------------------+ -| Coordinate | Description | -+===============+===========================================================================================+ -| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | -+---------------+-------------------------------------------------------------------------------------------+ -| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | -+---------------+-------------------------------------------------------------------------------------------+ -| lat | latitude coordinate in degrees | -+---------------+-------------------------------------------------------------------------------------------+ -| lon | longitude coordinate in degrees | -+---------------+-------------------------------------------------------------------------------------------+ -| time | forecast time in seconds since forecast start time | -+---------------+-------------------------------------------------------------------------------------------+ -| member | ensemble member number (integer) | -+---------------+-------------------------------------------------------------------------------------------+ - ++--------------------+-------------------------------------------------------------------------------------------+ +| Coordinate | Description | ++====================+===========================================================================================+ +| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | ++--------------------+-------------------------------------------------------------------------------------------+ +| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` | ++--------------------+-------------------------------------------------------------------------------------------+ +| lat | latitude coordinate in degrees | ++--------------------+-------------------------------------------------------------------------------------------+ +| lon | longitude coordinate in degrees | ++--------------------+-------------------------------------------------------------------------------------------+ +| time | forecast time in seconds since forecast start time | ++--------------------+-------------------------------------------------------------------------------------------+ +| ens_number | ensemble member number (integer) | ++--------------------+-------------------------------------------------------------------------------------------+ +| direction | used by proesmans to return the forward and backward advection and consistency fields | ++--------------------+-------------------------------------------------------------------------------------------+ + +The time, x and y dimensions all MUST be regularly spaced, with the stepsize included +in a ``stepsize`` attribute. The stepsize is given in the unit of the dimension (this +is alwyas seconds for the time dimension). The dataset can contain the following data variables: @@ -102,8 +107,14 @@ | precip_accum | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, | | or reflectivity | the attributes of this variable contain metadata relevant to this attribute (see below) | +-------------------+-----------------------------------------------------------------------------------------------------------+ +| velocity_x | x-component of the advection field in cartesian_unit per timestep | ++-------------------+-----------------------------------------------------------------------------------------------------------+ +| velocity_y | y-component of the advection field in cartesian_unit per timestep | ++-------------------+-----------------------------------------------------------------------------------------------------------+ | quality | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything | +-------------------+-----------------------------------------------------------------------------------------------------------+ +| velocity_quality | value between 0 and 1 denoting the quality of the velocity data, currently only returned by proesmans | ++-------------------+-----------------------------------------------------------------------------------------------------------+ Some of the metadata in the metadata dictionary is not explicitely stored in the dataset, but is still implicitly present. For example ``x1`` can easily be found by taking the first diff --git a/pysteps/io/readers.py b/pysteps/io/readers.py index 7295b5ff7..30c4d4fc0 100644 --- a/pysteps/io/readers.py +++ b/pysteps/io/readers.py @@ -15,7 +15,7 @@ import xarray as xr -def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: +def read_timeseries(inputfns, importer, timestep=None, **kwargs) -> xr.Dataset | None: """ Read a time series of input files using the methods implemented in the :py:mod:`pysteps.io.importers` module and stack them into a 3d xarray @@ -28,6 +28,9 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: :py:mod:`pysteps.io.archive` module. importer: function A function implemented in the :py:mod:`pysteps.io.importers` module. + timestep: int, optional + The timestep in seconds, this value is optional if more than 1 inputfns + are given. kwargs: dict Optional keyword arguments for the importer. @@ -58,6 +61,16 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: return None startdate = min(inputfns[1]) + sorted_dates = sorted(inputfns[1]) + timestep_dates = int((sorted_dates[1] - sorted_dates[0]).total_seconds()) + + if timestep is None: + timestep = timestep_dates + if timestep != timestep_dates: + raise ValueError("given timestep does not match inputfns") + for i in range(len(sorted_dates) - 1): + if int((sorted_dates[i + 1] - sorted_dates[i]).total_seconds()) != timestep: + raise ValueError("supplied dates are not evenly spaced") datasets = [] for i, ifn in enumerate(inputfns[0]): @@ -73,6 +86,7 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None: { "long_name": "forecast time", "units": f"seconds since {startdate:%Y-%m-%d %H:%M:%S}", + "stepsize": timestep, }, ) ) diff --git a/pysteps/motion/constant.py b/pysteps/motion/constant.py index a5c153616..a26831ac0 100644 --- a/pysteps/motion/constant.py +++ b/pysteps/motion/constant.py @@ -14,27 +14,32 @@ import numpy as np import scipy.optimize as op +import xarray as xr from scipy.ndimage import map_coordinates -def constant(R, **kwargs): +def constant(dataset: xr.Dataset, **kwargs): """ Compute a constant advection field by finding a translation vector that maximizes the correlation between two successive images. Parameters ---------- - R: array_like - Array of shape (T,m,n) containing a sequence of T two-dimensional input - images of shape (m,n). If T > 2, two last elements along axis 0 are used. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. If the size of this dimension + is larger than 2, the last 2 entries of this dimension are used. Returns ------- - out: array_like - The constant advection field having shape (2, m, n), where out[0, :, :] - contains the x-components of the motion vectors and out[1, :, :] - contains the y-components. + out: xarray.Dataset + The input dataset with the constant advection field added in the ``velocity_x`` + and ``velocity_y`` data variables. """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + R = dataset[precip_var].values m, n = R.shape[1:] X, Y = np.meshgrid(np.arange(n), np.arange(m)) @@ -51,4 +56,7 @@ def f(v): options = {"initial_simplex": (np.array([(0, 1), (1, 0), (1, 1)]))} result = op.minimize(f, (1, 1), method="Nelder-Mead", options=options) - return np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))]) + output = np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))]) + dataset["velocity_x"] = (["y", "x"], output[0]) + dataset["velocity_y"] = (["y", "x"], output[1]) + return dataset diff --git a/pysteps/motion/darts.py b/pysteps/motion/darts.py index 4e5050d48..4aac80cd3 100644 --- a/pysteps/motion/darts.py +++ b/pysteps/motion/darts.py @@ -11,8 +11,10 @@ DARTS """ -import numpy as np import time + +import numpy as np +import xarray as xr from numpy.linalg import lstsq, svd from pysteps import utils @@ -20,16 +22,17 @@ @check_input_frames(just_ndim=True) -def DARTS(input_images, **kwargs): +def DARTS(dataset: xr.Dataset, **kwargs): """ Compute the advection field from a sequence of input images by using the DARTS method. :cite:`RCW2011` Parameters ---------- - input_images: array-like - Array of shape (T,m,n) containing a sequence of T two-dimensional input - images of shape (m,n). + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. Other Parameters ---------------- @@ -67,13 +70,15 @@ def DARTS(input_images, **kwargs): Returns ------- - out: ndarray - Three-dimensional array (2,m,n) containing the dense x- and y-components - of the motion field in units of pixels / timestep as given by the input - array R. + out: xarray.Dataset + The input dataset with the advection field added in the ``velocity_x`` + and ``velocity_y`` data variables. """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values N_x = kwargs.get("N_x", 50) N_y = kwargs.get("N_y", 50) N_t = kwargs.get("N_t", 4) @@ -214,10 +219,14 @@ def DARTS(input_images, **kwargs): fft.ifft2(_fill(V, input_images.shape[0], input_images.shape[1], k_x, k_y)) ) + output = np.stack([U, V]) + dataset["velocity_x"] = (["y", "x"], output[0]) + dataset["velocity_y"] = (["y", "x"], output[1]) + if verbose: print("--- %s seconds ---" % (time.time() - t0)) - return np.stack([U, V]) + return dataset def _leastsq(A, B, y): diff --git a/pysteps/motion/lucaskanade.py b/pysteps/motion/lucaskanade.py index 133f860b7..b7a51a26b 100644 --- a/pysteps/motion/lucaskanade.py +++ b/pysteps/motion/lucaskanade.py @@ -22,22 +22,22 @@ dense_lucaskanade """ +import time + import numpy as np +import xarray as xr from numpy.ma.core import MaskedArray +from pysteps import feature, utils from pysteps.decorators import check_input_frames - -from pysteps import utils, feature from pysteps.tracking.lucaskanade import track_features from pysteps.utils.cleansing import decluster, detect_outliers from pysteps.utils.images import morph_opening -import time - @check_input_frames(2) def dense_lucaskanade( - input_images, + dataset: xr.Dataset, lk_kwargs=None, fd_method="shitomasi", fd_kwargs=None, @@ -73,18 +73,14 @@ def dense_lucaskanade( Parameters ---------- - input_images: ndarray_ or MaskedArray_ - Array of shape (T, m, n) containing a sequence of *T* two-dimensional - input images of shape (m, n). The indexing order in **input_images** is - assumed to be (time, latitude, longitude). - - *T* = 2 is the minimum required number of images. - With *T* > 2, all the resulting sparse vectors are pooled together for - the final interpolation on a regular grid. - - In case of ndarray_, invalid values (Nans or infs) are masked, - otherwise the mask of the MaskedArray_ is used. Such mask defines a - region where features are not detected for the tracking algorithm. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. The size of the time dimension needs to + be at least 2. If it is larger than 2, all the resulting sparse vectors are pooled + together for the final interpolation on a regular grid. Invalid values (Nans or infs) + are masked. This mask defines a region where features are not detected for the tracking + algorithm. lk_kwargs: dict, optional Optional dictionary containing keyword arguments for the `Lucas-Kanade`_ @@ -151,14 +147,10 @@ def dense_lucaskanade( Returns ------- - out: ndarray_ or tuple - If **dense=True** (the default), return the advection field having shape - (2, m, n), where out[0, :, :] contains the x-components of the motion - vectors and out[1, :, :] contains the y-components. - The velocities are in units of pixels / timestep, where timestep is the - time difference between the two input images. - Return a zero motion field of shape (2, m, n) when no motion is - detected. + out: xarray.Dataset or tuple + If **dense=True** (the default), return the input dataset with the advection + field added in the ``velocity_x`` and ``velocity_y`` data variables. + Return a zero motion field when no motion is detected. If **dense=False**, it returns a tuple containing the 2-dimensional arrays **xy** and **uv**, where x, y define the vector locations, @@ -179,7 +171,9 @@ def dense_lucaskanade( Understanding Workshop, pp. 121–130, 1981. """ - input_images = input_images.copy() + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values if verbose: print("Computing the motion field with the Lucas-Kanade method.") @@ -244,7 +238,10 @@ def dense_lucaskanade( # return zero motion field is no sparse vectors are found if xy.shape[0] == 0: if dense: - return np.zeros((2, domain_size[0], domain_size[1])) + uvgrid = np.zeros((2, domain_size[0], domain_size[1])) + dataset["velocity_x"] = (["y", "x"], uvgrid[0]) + dataset["velocity_y"] = (["y", "x"], uvgrid[1]) + return dataset else: return xy, uv @@ -266,14 +263,20 @@ def dense_lucaskanade( # return zero motion field if no sparse vectors are left for interpolation if xy.shape[0] == 0: - return np.zeros((2, domain_size[0], domain_size[1])) + uvgrid = np.zeros((2, domain_size[0], domain_size[1])) + dataset["velocity_x"] = (["y", "x"], uvgrid[0]) + dataset["velocity_y"] = (["y", "x"], uvgrid[1]) + return dataset # interpolation xgrid = np.arange(domain_size[1]) ygrid = np.arange(domain_size[0]) uvgrid = interpolation_method(xy, uv, xgrid, ygrid, **interp_kwargs) + dataset["velocity_x"] = (["y", "x"], uvgrid[0]) + dataset["velocity_y"] = (["y", "x"], uvgrid[1]) + if verbose: print("--- total time: %.2f seconds ---" % (time.time() - t0)) - return uvgrid + return dataset diff --git a/pysteps/motion/proesmans.py b/pysteps/motion/proesmans.py index 8760092ba..4b122a620 100644 --- a/pysteps/motion/proesmans.py +++ b/pysteps/motion/proesmans.py @@ -12,6 +12,7 @@ """ import numpy as np +import xarray as xr from scipy.ndimage import gaussian_filter from pysteps.decorators import check_input_frames @@ -20,7 +21,7 @@ @check_input_frames(2, 2) def proesmans( - input_images, + dataset: xr.Dataset, lam=50.0, num_iter=100, num_levels=6, @@ -34,8 +35,11 @@ def proesmans( Parameters ---------- - input_images: array_like - Array of shape (2, m, n) containing the first and second input image. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. The size of this dimension + has to be 2. lam: float Multiplier of the smoothness term. Smaller values give a smoother motion field. @@ -49,22 +53,20 @@ def proesmans( verbose: bool, optional Verbosity enabled if True (default). full_output: bool, optional - If True, the output is a two-element tuple containing the - forward-backward advection and consistency fields. The first element - is shape (2, 2, m, n), where the index along the first dimension refers - to the forward and backward advection fields. The second element is an - array of shape (2, m, n), where the index along the first dimension - refers to the forward and backward consistency fields. - Default: False. + If True, both the forward and backwards advection fields are returned + and the consistency fields are returned as well in the ``velocity_quality`` + data variable. Returns ------- out: ndarray - If full_output=False, the advection field having shape (2, m, n), where - out[0, :, :] contains the x-components of the motion vectors and - out[1, :, :] contains the y-components. The velocities are in units of - pixels / timestep, where timestep is the time difference between the - two input images. + The input dataset with the advection field added in the ``velocity_x`` + and ``velocity_y`` data variables. + + If full_output=True, a ``velocity_direction`` dimension + is added to the dataset, so that the velocity data can be returned containing + the forward and backwards advection fields. Also the ``velocity_quality`` data + coordinate is present containing the forward and backward consistency fields. References ---------- @@ -73,6 +75,9 @@ def proesmans( """ del verbose # Not used + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values im1 = input_images[-2, :, :].copy() im2 = input_images[-1, :, :].copy() @@ -89,6 +94,11 @@ def proesmans( advfield, quality = _compute_advection_field(im, lam, num_iter, num_levels) if not full_output: - return advfield[0] + dataset["velocity_x"] = (["y", "x"], advfield[0, 0]) + dataset["velocity_y"] = (["y", "x"], advfield[0, 1]) else: - return advfield, quality + dataset["velocity_x"] = (["direction", "y", "x"], advfield[:, 0]) + dataset["velocity_y"] = (["direction", "y", "x"], advfield[:, 1]) + dataset["velocity_quality"] = (["direction", "y", "x"], quality) + + return dataset diff --git a/pysteps/motion/vet.py b/pysteps/motion/vet.py index 391ebe189..f30703bee 100644 --- a/pysteps/motion/vet.py +++ b/pysteps/motion/vet.py @@ -35,12 +35,13 @@ """ import numpy +import xarray as xr from numpy.ma.core import MaskedArray from scipy.ndimage import zoom from scipy.optimize import minimize from pysteps.decorators import check_input_frames -from pysteps.motion._vet import _warp, _cost_function +from pysteps.motion._vet import _cost_function, _warp def round_int(scalar): @@ -301,7 +302,7 @@ def vet_cost_function( @check_input_frames(2, 3) def vet( - input_images, + dataset: xr.Dataset, sectors=((32, 16, 4, 2), (32, 16, 4, 2)), smooth_gain=1e6, first_guess=None, @@ -366,15 +367,13 @@ def vet( Parameters ---------- - input_images: ndarray_ or MaskedArray - Input images, sequence of 2D arrays, or 3D arrays. - The first dimension represents the images time dimension. - - The template_image (first element in first dimensions) denotes the + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable. + The dataset has to have a time dimension. The size of this dimension + has to be 2. The first element in the time dimension denotes the reference image used to obtain the displacement (2D array). The second is the target image. - - The expected dimensions are (2,ni,nj). sectors: list or array, optional Number of sectors on each dimension used in the scaling procedure. If dimension is 1, the same sectors will be used both image dimensions @@ -411,13 +410,11 @@ def vet( Returns ------- - displacement_field: ndarray_ - Displacement Field (2D array representing the transformation) that - warps the template image into the input image. - The dimensions are (2,ni,nj), where the first - dimension indicates the displacement along x (0) or y (1) in units of - pixels / timestep as given by the input_images array. - intermediate_steps: list of ndarray_ + out: xarray.Dataset + The input dataset with the displacement field that + warps the template image into the input image added in the ``velocity_x`` + and ``velocity_y`` data variables. + intermediate_steps: list of ndarray_, optional List with the first guesses obtained during the scaling procedure. References @@ -437,6 +434,9 @@ def vet( Nocedal, J, and S J Wright. 2006. Numerical Optimization. Springer New York. """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + input_images = dataset[precip_var].values if verbose: def debug_print(*args, **kwargs): @@ -642,7 +642,10 @@ def debug_print(*args, **kwargs): if padding > 0: first_guess = first_guess[:, padding:-padding, padding:-padding] + dataset["velocity_x"] = (["y", "x"], first_guess[0]) + dataset["velocity_y"] = (["y", "x"], first_guess[1]) + if intermediate_steps: - return first_guess, scaling_guesses + return dataset, scaling_guesses - return first_guess + return dataset diff --git a/pysteps/nowcasts/anvil.py b/pysteps/nowcasts/anvil.py index f5af038bb..88ed6b0af 100644 --- a/pysteps/nowcasts/anvil.py +++ b/pysteps/nowcasts/anvil.py @@ -21,11 +21,13 @@ import time import numpy as np +import xarray as xr from scipy.ndimage import gaussian_filter from pysteps import cascade, extrapolation, utils from pysteps.nowcasts.utils import nowcast_main_loop from pysteps.timeseries import autoregression +from pysteps.xarray_helpers import convert_output_to_xarray_dataset try: import dask @@ -36,10 +38,8 @@ def forecast( - vil, - velocity, + dataset: xr.Dataset, timesteps, - rainrate=None, n_cascade_levels=6, extrap_method="semilagrangian", ar_order=2, @@ -70,22 +70,21 @@ def forecast( Parameters ---------- - vil: array_like - Array of shape (ar_order+2,m,n) containing the input fields ordered by - timestamp from oldest to newest. The inputs are expected to contain VIL - or rain rate. The time steps between the inputs are assumed to be regular. - velocity: array_like - Array of shape (2,m,n) containing the x- and y-components of the - advection field. The velocities are assumed to represent one time step - between the inputs. All values are required to be finite. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as either VIL values in the + ``precip_accum`` data variable or rainrate in the ``precip_intensity`` + data variable. The time dimension of the dataset has to be size + ``ar_order + 2`` and the precipitation variable has to have this dimension. + When VIL values are supplied, optionally ``precip_accum`` can be supplied + as well without a time dimension, containing the most recently observed rain + rate field. If not supplied, no R(VIL) conversion is done and the outputs + are in the same units as the inputs. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. - rainrate: array_like - Array of shape (m,n) containing the most recently observed rain rate - field. If set to None, no R(VIL) conversion is done and the outputs - are in the same units as the inputs. n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. @@ -128,18 +127,28 @@ def forecast( Returns ------- - out: ndarray - A three-dimensional array of shape (num_timesteps,m,n) containing a time - series of forecast precipitation fields. The time series starts from - t0+timestep, where timestep is taken from the input VIL/rain rate - fields. If measure_time is True, the return value is a three-element - tuple containing the nowcast array, the initialization time of the - nowcast generator and the time used in the main loop (seconds). + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). References ---------- :cite:`PCLH2020` """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + vil = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) + rainrate = None + if precip_var == "precip_intensity" and "precip_accum" in dataset: + rainrate = dataset["precip_accum"].values + _check_inputs(vil, rainrate, velocity, timesteps, ar_order) if extrap_kwargs is None: @@ -292,8 +301,6 @@ def worker(vil, i): print("Starting nowcast computation.") - rainrate_f = [] - extrap_kwargs["return_displacement"] = True state = {"vil_dec": vil_dec} @@ -323,10 +330,11 @@ def worker(vil, i): if measure_time: rainrate_f, mainloop_time = rainrate_f + output_dataset = convert_output_to_xarray_dataset(dataset, timesteps, rainrate_f) if measure_time: - return np.stack(rainrate_f), init_time, mainloop_time + return output_dataset, init_time, mainloop_time else: - return np.stack(rainrate_f) + return output_dataset def _check_inputs(vil, rainrate, velocity, timesteps, ar_order): diff --git a/pysteps/nowcasts/extrapolation.py b/pysteps/nowcasts/extrapolation.py index 143a39d7c..a70b6985c 100644 --- a/pysteps/nowcasts/extrapolation.py +++ b/pysteps/nowcasts/extrapolation.py @@ -11,14 +11,16 @@ """ import time + import numpy as np +import xarray as xr from pysteps import extrapolation +from pysteps.xarray_helpers import convert_output_to_xarray_dataset def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, extrap_method="semilagrangian", extrap_kwargs=None, @@ -32,13 +34,11 @@ def forecast( Parameters ---------- - precip: array-like - Two-dimensional array of shape (m,n) containing the input precipitation - field. - velocity: array-like - Array of shape (2,m,n) containing the x- and y-components of the - advection field. The velocities are assumed to represent one time step - between the inputs. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any pecipitation data variable. + It should contain a time dimension of size 1. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements @@ -54,18 +54,25 @@ def forecast( Returns ------- - out: ndarray_ - Three-dimensional array of shape (num_timesteps, m, n) containing a time - series of nowcast precipitation fields. The time series starts from - t0 + timestep, where timestep is taken from the advection field velocity. - If *measure_time* is True, the return value is a two-element tuple - containing this array and the computation time (seconds). + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). See also -------- pysteps.extrapolation.interface """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values[0] + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps) if extrap_kwargs is None: @@ -95,10 +102,13 @@ def forecast( computation_time = time.time() - start_time print(f"{computation_time:.2f} seconds.") + output_dataset = convert_output_to_xarray_dataset( + dataset, timesteps, precip_forecast + ) if measure_time: - return precip_forecast, computation_time + return output_dataset, computation_time else: - return precip_forecast + return output_dataset def _check_inputs(precip, velocity, timesteps): diff --git a/pysteps/nowcasts/lagrangian_probability.py b/pysteps/nowcasts/lagrangian_probability.py index 727e94806..7bae440cc 100644 --- a/pysteps/nowcasts/lagrangian_probability.py +++ b/pysteps/nowcasts/lagrangian_probability.py @@ -12,20 +12,20 @@ """ import numpy as np +import xarray as xr from scipy.signal import convolve from pysteps.nowcasts import extrapolation def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, threshold, extrap_method="semilagrangian", extrap_kwargs=None, slope=5, -): +) -> xr.Dataset: """ Generate a probability nowcast by a local lagrangian approach. The ouput is the probability of exceeding a given intensity threshold, i.e. @@ -33,13 +33,11 @@ def forecast( Parameters ---------- - precip: array_like - Two-dimensional array of shape (m,n) containing the input precipitation - field. - velocity: array_like - Array of shape (2,m,n) containing the x- and y-components of the - advection field. The velocities are assumed to represent one time step - between the inputs. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any pecipitation data variable. + It should contain a time dimension of size 1. timesteps: int or list of floats Number of time steps to forecast or a sorted list of time steps for which the forecasts are computed (relative to the input time step). @@ -54,10 +52,15 @@ def forecast( Returns ------- - out: ndarray - Three-dimensional array of shape (num_timesteps, m, n) containing a time - series of nowcast exceedence probabilities. The time series starts from - t0 + timestep, where timestep is taken from the advection field velocity. + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). References ---------- @@ -68,16 +71,14 @@ def forecast( """ # Compute deterministic extrapolation forecast if isinstance(timesteps, int) and timesteps > 0: - timesteps = np.arange(1, timesteps + 1) + timesteps = list(range(1, timesteps + 1)) elif not isinstance(timesteps, list): raise ValueError(f"invalid value for argument 'timesteps': {timesteps}") - precip_forecast = extrapolation.forecast( - precip, - velocity, - timesteps, - extrap_method, - extrap_kwargs, + dataset_forecast = extrapolation.forecast( + dataset, timesteps, extrap_method, extrap_kwargs ) + precip_var = dataset_forecast.attrs["precip_var"] + precip_forecast = dataset_forecast[precip_var].values # Ignore missing values nanmask = np.isnan(precip_forecast) @@ -104,7 +105,8 @@ def forecast( precip_forecast[i, ...] /= kernel_sum precip_forecast = np.clip(precip_forecast, 0, 1) precip_forecast[nanmask] = np.nan - return precip_forecast + dataset_forecast[precip_var].data[:] = precip_forecast + return dataset_forecast def _get_kernel(size): diff --git a/pysteps/nowcasts/linda.py b/pysteps/nowcasts/linda.py index 7d737eaa9..2fc5dfd70 100644 --- a/pysteps/nowcasts/linda.py +++ b/pysteps/nowcasts/linda.py @@ -40,6 +40,8 @@ import time import warnings +from pysteps.xarray_helpers import convert_output_to_xarray_dataset + try: import dask @@ -47,28 +49,19 @@ except ImportError: DASK_IMPORTED = False import numpy as np +import xarray as xr +from scipy import optimize as opt +from scipy import stats from scipy.integrate import nquad from scipy.interpolate import interp1d -from scipy import optimize as opt from scipy.signal import convolve -from scipy import stats from pysteps import extrapolation, feature, noise -from pysteps.decorators import deprecate_args from pysteps.nowcasts.utils import nowcast_main_loop -@deprecate_args( - { - "precip_fields": "precip", - "advection_field": "velocity", - "num_ens_members": "n_ens_members", - }, - "1.8.0", -) def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, feature_method="blob", max_num_features=25, @@ -100,15 +93,13 @@ def forecast( Parameters ---------- - precip: array_like - Array of shape (ari_order + 2, m, n) containing the input rain rate - or reflectivity fields (in linear scale) ordered by timestamp from - oldest to newest. The time steps between the inputs are assumed to be - regular. - velocity: array_like - Array of shape (2, m, n) containing the x- and y-components of the - advection field. The velocities are assumed to represent one time step - between the inputs. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as either reflectivity values in the + ``reflectivity`` data variable (in linear scale) or rainrate in the ``precip_intensity`` + data variable. The time dimension of the dataset has to be size + ``ari_order + 2`` and the precipitation variable has to have this dimension. timesteps: int Number of time steps to forecast. feature_method: {'blob', 'domain' 'shitomasi'} @@ -202,16 +193,15 @@ def forecast( Returns ------- - out: numpy.ndarray - A four-dimensional array of shape (n_ens_members, timesteps, m, n) - containing a time series of forecast precipitation fields for each - ensemble member. If add_perturbations is False, the first dimension is - dropped. The time series starts from t0 + timestep, where timestep is - taken from the input fields. If measure_time is True, the return value - is a three-element tuple containing the nowcast array, the initialization - time of the nowcast generator and the time used in the main loop - (seconds). If return_output is set to False, a single None value is - returned instead. + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields for each ensemble member. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). Notes ----- @@ -224,6 +214,10 @@ def forecast( variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps, ari_order) if feature_kwargs is None: @@ -363,14 +357,21 @@ def forecast( callback, ) - if return_output: - if measure_time: - return precip_forecast[0], init_time, precip_forecast[1] - else: - return precip_forecast - else: + if not return_output: return None + if measure_time: + precip_forecast, mainloop_time = precip_forecast + + output_dataset = convert_output_to_xarray_dataset( + dataset, timesteps, precip_forecast + ) + + if measure_time: + return output_dataset, init_time, mainloop_time + else: + return output_dataset + def _check_inputs(precip, velocity, timesteps, ari_order): if ari_order not in [1, 2]: diff --git a/pysteps/nowcasts/sprog.py b/pysteps/nowcasts/sprog.py index 86c840dcb..2ebcfde41 100644 --- a/pysteps/nowcasts/sprog.py +++ b/pysteps/nowcasts/sprog.py @@ -10,17 +10,17 @@ forecast """ -import numpy as np import time -from pysteps import cascade -from pysteps import extrapolation -from pysteps import utils -from pysteps.decorators import deprecate_args +import numpy as np +import xarray as xr + +from pysteps import cascade, extrapolation, utils from pysteps.nowcasts import utils as nowcast_utils +from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation -from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop +from pysteps.xarray_helpers import convert_output_to_xarray_dataset try: import dask @@ -30,10 +30,8 @@ DASK_IMPORTED = False -@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0") def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, precip_thr=None, n_cascade_levels=6, @@ -55,15 +53,13 @@ def forecast( Parameters ---------- - precip: array-like - Array of shape (ar_order+1,m,n) containing the input precipitation fields - ordered by timestamp from oldest to newest. The time steps between - the inputs are assumed to be regular. - velocity: array-like - Array of shape (2,m,n) containing the x- and y-components of the - advection field. - The velocities are assumed to represent one time step between the - inputs. All values are required to be finite. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any precipitation data variable. + The time dimension of the dataset has to be size + ``ar_order + 1`` and the precipitation variable has to have this dimension. All + velocity values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements @@ -120,13 +116,15 @@ def forecast( Returns ------- - out: ndarray - A three-dimensional array of shape (num_timesteps,m,n) containing a time - series of forecast precipitation fields. The time series starts from - t0+timestep, where timestep is taken from the input precipitation fields - precip. If measure_time is True, the return value is a three-element - tuple containing the nowcast array, the initialization time of the - nowcast generator and the time used in the main loop (seconds). + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast + precipitation fields. Otherwise, a None value + is returned. The time series starts from t0+timestep, where timestep is + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). See also -------- @@ -137,6 +135,10 @@ def forecast( :cite:`Seed2003`, :cite:`PCH2019a` """ + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps, ar_order) if extrap_kwargs is None: @@ -327,8 +329,6 @@ def f(precip, i): print("Starting nowcast computation.") - precip_forecast = [] - state = {"precip_cascades": precip_cascades, "precip_decomp": precip_decomp} params = { "domain": domain, @@ -358,12 +358,14 @@ def f(precip, i): if measure_time: precip_forecast, mainloop_time = precip_forecast - precip_forecast = np.stack(precip_forecast) + output_dataset = convert_output_to_xarray_dataset( + dataset, timesteps, precip_forecast + ) if measure_time: - return precip_forecast, init_time, mainloop_time + return output_dataset, init_time, mainloop_time else: - return precip_forecast + return output_dataset def _check_inputs(precip, velocity, timesteps, ar_order): diff --git a/pysteps/nowcasts/sseps.py b/pysteps/nowcasts/sseps.py index a8848d3e3..1d083c04b 100644 --- a/pysteps/nowcasts/sseps.py +++ b/pysteps/nowcasts/sseps.py @@ -18,18 +18,17 @@ forecast """ -import numpy as np import time -from scipy.ndimage import generate_binary_structure, iterate_structure +import numpy as np +import xarray as xr +from scipy.ndimage import generate_binary_structure, iterate_structure -from pysteps import cascade -from pysteps import extrapolation -from pysteps import noise -from pysteps.decorators import deprecate_args +from pysteps import cascade, extrapolation, noise from pysteps.nowcasts import utils as nowcast_utils from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation +from pysteps.xarray_helpers import convert_output_to_xarray_dataset try: import dask @@ -39,11 +38,8 @@ dask_imported = False -@deprecate_args({"R": "precip", "V": "velocity"}, "1.8.0") def forecast( - precip, - metadata, - velocity, + dataset: xr.Dataset, timesteps, n_ens_members=24, n_cascade_levels=6, @@ -78,18 +74,14 @@ def forecast( Parameters ---------- - precip: array-like - Array of shape (ar_order+1,m,n) containing the input precipitation fields - ordered by timestamp from oldest to newest. The time steps between the inputs - are assumed to be regular, and the inputs are required to have finite values. - metadata: dict - Metadata dictionary containing the accutime, xpixelsize, threshold and - zerovalue attributes as described in the documentation of - :py:mod:`pysteps.io.importers`. xpixelsize is assumed to be in meters. - velocity: array-like - Array of shape (2,m,n) containing the x- and y-components of the advection - field. The velocities are assumed to represent one time step between the - inputs. All values are required to be finite. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any precipitation data variable. + The units and stepsize of ``y`` and ``x`` have to be the same and the only supported + units are meters and kilometers. The time dimension of the dataset has to be size + ``ar_order + 1`` and the precipitation variable has to have this dimension. All + velocity values are required to be finite. win_size: int or two-element sequence of ints Size-length of the localization window. overlap: float [0,1[ @@ -181,12 +173,15 @@ def forecast( Returns ------- - out: ndarray - If return_output is True, a four-dimensional array of shape - (n_ens_members,num_timesteps,m,n) containing a time series of forecast + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is - taken from the input precipitation fields. + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the + initialization time of the nowcast generator and the time used in the + main loop (seconds). See also -------- @@ -201,7 +196,20 @@ def forecast( ---------- :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`NBSG2017` """ - + timesteps_in = timesteps + x_units = dataset["x"].attrs["units"] + y_units = dataset["y"].attrs["units"] + x_stepsize = dataset["x"].attrs["stepsize"] + y_stepsize = dataset["y"].attrs["stepsize"] + if x_units != y_units or x_stepsize != y_stepsize: + raise ValueError("units and stepsize needs to be the same for x and y") + if x_units not in ["m", "km"]: + raise ValueError("only m and km supported as x and y units") + + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps, ar_order) if extrap_kwargs is None: @@ -237,8 +245,10 @@ def forecast( else: win_size = tuple([int(win_size[i]) for i in range(2)]) - timestep = metadata["accutime"] - kmperpixel = metadata["xpixelsize"] / 1000 + timestep = dataset["time"].attrs["stepsize"] / 60 + kmperpixel = x_stepsize + if x_units == "m": + kmperpixel = kmperpixel / 1000 print("Computing SSEPS nowcast") print("-----------------------") @@ -292,8 +302,8 @@ def forecast( f"velocity perturbations, perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}" ) - precip_thr = metadata["threshold"] - precip_min = metadata["zerovalue"] + precip_thr = dataset[precip_var].attrs["threshold"] + precip_min = dataset[precip_var].attrs["zerovalue"] num_ensemble_workers = n_ens_members if num_workers > n_ens_members else num_workers @@ -911,10 +921,12 @@ def worker(j): if return_output: outarr = np.stack([np.stack(precip_forecast[j]) for j in range(n_ens_members)]) + output_dataset = convert_output_to_xarray_dataset(dataset, timesteps_in, outarr) + if measure_time: - return outarr, init_time, mainloop_time + return output_dataset, init_time, mainloop_time else: - return outarr + return output_dataset else: return None diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py index 50c6a5d22..366f32f6d 100644 --- a/pysteps/nowcasts/steps.py +++ b/pysteps/nowcasts/steps.py @@ -11,19 +11,18 @@ forecast """ +import time + import numpy as np +import xarray as xr from scipy.ndimage import generate_binary_structure, iterate_structure -import time -from pysteps import cascade -from pysteps import extrapolation -from pysteps import noise -from pysteps import utils -from pysteps.decorators import deprecate_args +from pysteps import cascade, extrapolation, noise, utils from pysteps.nowcasts import utils as nowcast_utils +from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation -from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop +from pysteps.xarray_helpers import convert_output_to_xarray_dataset try: import dask @@ -33,10 +32,8 @@ DASK_IMPORTED = False -@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0") def forecast( - precip, - velocity, + dataset: xr.Dataset, timesteps, n_ens_members=24, n_cascade_levels=6, @@ -72,14 +69,13 @@ def forecast( Parameters ---------- - precip: array-like - Array of shape (ar_order+1,m,n) containing the input precipitation fields - ordered by timestamp from oldest to newest. The time steps between the - inputs are assumed to be regular. - velocity: array-like - Array of shape (2,m,n) containing the x- and y-components of the advection - field. The velocities are assumed to represent one time step between the - inputs. All values are required to be finite. + dataset: xarray.Dataset + Input dataset as described in the documentation of + :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and + ``velocity_y`` data variables, as well as any precipitation data variable. + The time dimension of the dataset has to be size + ``ar_order + 1`` and the precipitation variable has to have this dimension. All + velocity values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements @@ -241,13 +237,13 @@ def forecast( Returns ------- - out: ndarray - If return_output is True, a four-dimensional array of shape - (n_ens_members,num_timesteps,m,n) containing a time series of forecast + out: xarray.Dataset + If return_output is True, a dataset as described in the documentation of + :py:mod:`pysteps.io.importers` is returned containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is - taken from the input precipitation fields. If measure_time is True, the - return value is a three-element tuple containing the nowcast array, the + taken from the metadata of the time coordinate. If measure_time is True, the + return value is a three-element tuple containing the nowcast dataset, the initialization time of the nowcast generator and the time used in the main loop (seconds). @@ -261,6 +257,11 @@ def forecast( :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ + timesteps_in = timesteps + dataset = dataset.copy(deep=True) + precip_var = dataset.attrs["precip_var"] + precip = dataset[precip_var].values + velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]]) _check_inputs(precip, velocity, timesteps, ar_order) if extrap_kwargs is None: @@ -574,8 +575,6 @@ def f(precip, i): else: velocity_perturbators = None - precip_forecast = [[] for _ in range(n_ens_members)] - if probmatching_method == "mean": mu_0 = np.mean(precip[-1, :, :][precip[-1, :, :] >= precip_thr]) else: @@ -680,13 +679,13 @@ def f(precip, i): precip_forecast, mainloop_time = precip_forecast if return_output: - precip_forecast = np.stack( - [np.stack(precip_forecast[j]) for j in range(n_ens_members)] + output_dataset = convert_output_to_xarray_dataset( + dataset, timesteps_in, precip_forecast ) if measure_time: - return precip_forecast, init_time, mainloop_time + return output_dataset, init_time, mainloop_time else: - return precip_forecast + return output_dataset else: return None diff --git a/pysteps/nowcasts/utils.py b/pysteps/nowcasts/utils.py index fd111e28d..fed1c2f96 100644 --- a/pysteps/nowcasts/utils.py +++ b/pysteps/nowcasts/utils.py @@ -17,6 +17,7 @@ """ import time + import numpy as np from scipy.ndimage import binary_dilation, generate_binary_structure @@ -412,10 +413,10 @@ def worker2(i): if not ensemble: precip_forecast_out = precip_forecast_out[0, :] - if measure_time: - return precip_forecast_out, time.time() - starttime_total - else: - return precip_forecast_out + if measure_time: + return precip_forecast_out, time.time() - starttime_total + else: + return precip_forecast_out def print_ar_params(phi): diff --git a/pysteps/tests/test_motion_lk.py b/pysteps/tests/test_motion_lk.py index 871dcd98b..a8f640533 100644 --- a/pysteps/tests/test_motion_lk.py +++ b/pysteps/tests/test_motion_lk.py @@ -3,8 +3,8 @@ """ """ -import pytest import numpy as np +import pytest from pysteps import motion, verification from pysteps.tests.helpers import get_precipitation_fields @@ -61,19 +61,19 @@ def test_lk( pytest.importorskip("pandas") # inputs - precip, metadata = get_precipitation_fields( + dataset = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) - precip = precip.filled() + precip_var = dataset.attrs["precip_var"] # Retrieve motion field oflow_method = motion.get_method("LK") - output = oflow_method( - precip, + output_dataset = oflow_method( + dataset, lk_kwargs=lk_kwargs, fd_method=fd_method, dense=dense, @@ -86,13 +86,17 @@ def test_lk( # Check format of ouput if dense: + output = np.stack( + [output_dataset["velocity_x"].values, output_dataset["velocity_y"].values] + ) assert isinstance(output, np.ndarray) assert output.ndim == 3 assert output.shape[0] == 2 - assert output.shape[1:] == precip[0].shape + assert output.shape[1:] == dataset[precip_var].values[0].shape if nr_std_outlier == 0: assert output.sum() == 0 else: + output = output_dataset assert isinstance(output, tuple) assert len(output) == 2 assert isinstance(output[0], np.ndarray) diff --git a/pysteps/tests/test_nowcasts_anvil.py b/pysteps/tests/test_nowcasts_anvil.py index 14a130fb1..35d84e0f3 100644 --- a/pysteps/tests/test_nowcasts_anvil.py +++ b/pysteps/tests/test_nowcasts_anvil.py @@ -31,31 +31,28 @@ def test_anvil_rainrate( ): """Tests ANVIL nowcast using rain rate precipitation fields.""" # inputs - precip_input = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=4, num_next_files=0, return_raw=False, metadata=False, upscale=2000, ) - precip_input = precip_input.filled() - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 - )[1:, :, :] - precip_obs = precip_obs.filled() + ).isel(time=slice(1, None, None)) + precip_var = dataset_input.attrs["precip_var"] pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + dataset_w_motion = oflow_method(dataset_input) nowcast_method = nowcasts.get_method("anvil") output = nowcast_method( - precip_input[-(ar_order + 2) :], - retrieved_motion, + dataset_w_motion.isel(time=slice(-(ar_order + 2), None, None)), timesteps=timesteps, - rainrate=None, # no R(VIL) conversion is done n_cascade_levels=n_cascade_levels, ar_order=ar_order, ar_window_radius=ar_window_radius, @@ -63,9 +60,10 @@ def test_anvil_rainrate( measure_time=measure_time, ) if measure_time: - precip_forecast, __, __ = output + dataset_forecast, __, __ = output else: - precip_forecast = output + dataset_forecast = output + precip_forecast = dataset_forecast[precip_var].values assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == ( @@ -73,7 +71,7 @@ def test_anvil_rainrate( ) result = verification.det_cat_fct( - precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI" + precip_forecast[-1], dataset_obs[precip_var].values[-1], thr=0.1, scores="CSI" )["CSI"] assert result > min_csi, f"CSI={result:.2f}, required > {min_csi:.2f}" diff --git a/pysteps/tests/test_nowcasts_lagrangian_probability.py b/pysteps/tests/test_nowcasts_lagrangian_probability.py index 1ec352b0b..d75b29e87 100644 --- a/pysteps/tests/test_nowcasts_lagrangian_probability.py +++ b/pysteps/tests/test_nowcasts_lagrangian_probability.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- +from datetime import datetime, timezone + import numpy as np import pytest +import xarray as xr +from pysteps.motion.lucaskanade import dense_lucaskanade from pysteps.nowcasts.lagrangian_probability import forecast from pysteps.tests.helpers import get_precipitation_fields -from pysteps.motion.lucaskanade import dense_lucaskanade def test_numerical_example(): @@ -12,12 +15,23 @@ def test_numerical_example(): precip = np.zeros((20, 20)) precip[5:10, 5:10] = 1 velocity = np.zeros((2, *precip.shape)) + now = datetime.now(tz=timezone.utc).replace(tzinfo=None) + dataset_input = xr.Dataset( + data_vars={ + "precip_intensity": (["time", "y", "x"], [precip]), + "velocity_x": (["y", "x"], velocity[0]), + "velocity_y": (["y", "x"], velocity[1]), + }, + coords={"time": (["time"], [now], {"stepsize": 300})}, + attrs={"precip_var": "precip_intensity"}, + ) timesteps = 4 thr = 0.5 slope = 1 # pixels / timestep # compute probability forecast - fct = forecast(precip, velocity, timesteps, thr, slope=slope) + dataset_forecast = forecast(dataset_input, timesteps, thr, slope=slope) + fct = dataset_forecast["precip_intensity"].values assert fct.ndim == 3 assert fct.shape[0] == timesteps @@ -26,7 +40,8 @@ def test_numerical_example(): assert fct.min() >= 0.0 # slope = 0 should return a binary field - fct = forecast(precip, velocity, timesteps, thr, slope=0) + dataset_forecast = forecast(dataset_input, timesteps, thr, slope=0) + fct = dataset_forecast["precip_intensity"].values ref = (np.repeat(precip[None, ...], timesteps, axis=0) >= thr).astype(float) assert np.allclose(fct, fct.astype(bool)) assert np.allclose(fct, ref) @@ -37,12 +52,23 @@ def test_numerical_example_with_float_slope_and_float_list_timesteps(): precip = np.zeros((20, 20)) precip[5:10, 5:10] = 1 velocity = np.zeros((2, *precip.shape)) + now = datetime.now(tz=timezone.utc).replace(tzinfo=None) + dataset_input = xr.Dataset( + data_vars={ + "precip_intensity": (["time", "y", "x"], [precip]), + "velocity_x": (["y", "x"], velocity[0]), + "velocity_y": (["y", "x"], velocity[1]), + }, + coords={"time": (["time"], [now], {"stepsize": 300})}, + attrs={"precip_var": "precip_intensity"}, + ) timesteps = [1.0, 2.0, 5.0, 12.0] thr = 0.5 slope = 1.0 # pixels / timestep # compute probability forecast - fct = forecast(precip, velocity, timesteps, thr, slope=slope) + dataset_forecast = forecast(dataset_input, timesteps, thr, slope=slope) + fct = dataset_forecast["precip_intensity"].values assert fct.ndim == 3 assert fct.shape[0] == len(timesteps) @@ -56,16 +82,18 @@ def test_real_case(): pytest.importorskip("cv2") # inputs - precip, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) + precip_var = dataset_input.attrs["precip_var"] + metadata = dataset_input[precip_var].attrs # motion - motion = dense_lucaskanade(precip) + dataset_w_motion = dense_lucaskanade(dataset_input) # parameters timesteps = [1, 2, 3] @@ -74,13 +102,18 @@ def test_real_case(): # compute probability forecast extrap_kwargs = dict(allow_nonfinite_values=True) - fct = forecast( - precip[-1], motion, timesteps, thr, slope=slope, extrap_kwargs=extrap_kwargs + dataset_forecast = forecast( + dataset_w_motion.isel(time=slice(-1, None, None)), + timesteps, + thr, + slope=slope, + extrap_kwargs=extrap_kwargs, ) + fct = dataset_forecast["precip_intensity"].values assert fct.ndim == 3 assert fct.shape[0] == len(timesteps) - assert fct.shape[1:] == precip.shape[1:] + assert fct.shape[1:] == dataset_input[precip_var].values.shape[1:] assert np.nanmax(fct) <= 1.0 assert np.nanmin(fct) >= 0.0 @@ -89,11 +122,19 @@ def test_wrong_inputs(): # dummy inputs precip = np.zeros((3, 3)) velocity = np.zeros((2, *precip.shape)) + dataset_input = xr.Dataset( + data_vars={ + "precip_intensity": (["y", "x"], precip), + "velocity_x": (["y", "x"], velocity[0]), + "velocity_y": (["y", "x"], velocity[1]), + }, + attrs={"precip_var": "precip_intensity"}, + ) # timesteps must be > 0 with pytest.raises(ValueError): - forecast(precip, velocity, 0, 1) + forecast(dataset_input, 0, 1) # timesteps must be a sorted list with pytest.raises(ValueError): - forecast(precip, velocity, [2, 1], 1) + forecast(dataset_input, [2, 1], 1) diff --git a/pysteps/tests/test_nowcasts_linda.py b/pysteps/tests/test_nowcasts_linda.py index 2d5f03b71..a5b60611b 100644 --- a/pysteps/tests/test_nowcasts_linda.py +++ b/pysteps/tests/test_nowcasts_linda.py @@ -1,13 +1,13 @@ -from datetime import timedelta import os +from datetime import timedelta + import numpy as np import pytest +import xarray as xr from pysteps import io, motion, nowcasts, verification -from pysteps.nowcasts.linda import forecast from pysteps.tests.helpers import get_precipitation_fields - linda_arg_names = ( "add_perturbations", "kernel_type", @@ -42,7 +42,7 @@ def test_linda( pytest.importorskip("skimage") # inputs - precip_input, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, metadata=True, @@ -51,20 +51,23 @@ def test_linda( log_transform=False, ) - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, clip=(354000, 866000, -96000, 416000), upscale=4000, log_transform=False, - )[1:, :, :] + ).isel(time=slice(1, None, None)) + precip_var = dataset_input.attrs["precip_var"] + metadata = dataset_input[precip_var].attrs oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + dataset_w_motion = oflow_method(dataset_input) - precip_forecast = forecast( - precip_input, - retrieved_motion, + nowcast_method = nowcasts.get_method("linda") + + dataset_forecast = nowcast_method( + dataset_w_motion, 3, kernel_type=kernel_type, vel_pert_method=vel_pert_method, @@ -78,68 +81,82 @@ def test_linda( seed=42, ) if measure_time: - assert len(precip_forecast) == 3 - assert isinstance(precip_forecast[1], float) - precip_forecast = precip_forecast[0] + assert len(dataset_forecast) == 3 + assert isinstance(dataset_forecast[1], float) + dataset_forecast = dataset_forecast[0] + + precip_forecast = dataset_forecast[precip_var].values if not add_perturbations: assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == 3 - assert precip_forecast.shape[1:] == precip_input.shape[1:] + assert precip_forecast.shape[1:] == dataset_input[precip_var].values.shape[1:] csi = verification.det_cat_fct( - precip_forecast[-1], precip_obs[-1], thr=1.0, scores="CSI" + precip_forecast[-1], + dataset_obs[precip_var].values[-1], + thr=1.0, + scores="CSI", )["CSI"] assert csi > min_csi, f"CSI={csi:.1f}, required > {min_csi:.1f}" else: assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == 5 assert precip_forecast.shape[1] == 3 - assert precip_forecast.shape[2:] == precip_input.shape[1:] + assert precip_forecast.shape[2:] == dataset_input[precip_var].values.shape[1:] - crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1]) + crps = verification.probscores.CRPS( + precip_forecast[:, -1], dataset_obs[precip_var].values[-1] + ) assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}" def test_linda_wrong_inputs(): # dummy inputs - precip = np.zeros((3, 3, 3)) - velocity = np.zeros((2, 3, 3)) + dataset_input = xr.Dataset( + data_vars={ + "precip_intensity": (["time", "y", "x"], np.zeros((3, 3, 3))), + "velocity_x": (["y", "x"], np.zeros((3, 3))), + "velocity_y": (["y", "x"], np.zeros((3, 3))), + }, + attrs={"precip_var": "precip_intensity"}, + ) + dataset_input_4d = xr.Dataset( + data_vars={ + "precip_intensity": ( + ["ens_number", "time", "y", "x"], + np.zeros((3, 3, 3, 3)), + ), + "velocity_x": (["y", "x"], np.zeros((3, 3))), + "velocity_y": (["y", "x"], np.zeros((3, 3))), + }, + attrs={"precip_var": "precip_intensity"}, + ) + + nowcast_method = nowcasts.get_method("linda") # vel_pert_method is set but kmperpixel is None with pytest.raises(ValueError): - forecast(precip, velocity, 1, vel_pert_method="bps", kmperpixel=None) + nowcast_method(dataset_input, 1, vel_pert_method="bps", kmperpixel=None) # vel_pert_method is set but timestep is None with pytest.raises(ValueError): - forecast( - precip, velocity, 1, vel_pert_method="bps", kmperpixel=1, timestep=None + nowcast_method( + dataset_input, 1, vel_pert_method="bps", kmperpixel=1, timestep=None ) # fractional time steps not yet implemented # timesteps is not an integer with pytest.raises(ValueError): - forecast(precip, velocity, [1.0, 2.0]) + nowcast_method(dataset_input, [1.0, 2.0]) # ari_order 1 or 2 required with pytest.raises(ValueError): - forecast(precip, velocity, 1, ari_order=3) + nowcast_method(dataset_input, 1, ari_order=3) # precip_fields must be a three-dimensional array with pytest.raises(ValueError): - forecast(np.zeros((3, 3, 3, 3)), velocity, 1) - - # precip_fields.shape[0] < ari_order+2 - with pytest.raises(ValueError): - forecast(np.zeros((2, 3, 3)), velocity, 1, ari_order=1) - - # advection_field must be a three-dimensional array - with pytest.raises(ValueError): - forecast(precip, velocity[0], 1) - - # dimension mismatch between precip_fields and advection_field - with pytest.raises(ValueError): - forecast(np.zeros((3, 2, 3)), velocity, 1) + nowcast_method(dataset_input_4d, 1) def test_linda_callback(tmp_path): diff --git a/pysteps/tests/test_nowcasts_sprog.py b/pysteps/tests/test_nowcasts_sprog.py index 1077c3edd..f64900cd8 100644 --- a/pysteps/tests/test_nowcasts_sprog.py +++ b/pysteps/tests/test_nowcasts_sprog.py @@ -30,29 +30,28 @@ def test_sprog( ): """Tests SPROG nowcast.""" # inputs - precip_input, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) - precip_input = precip_input.filled() - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 - )[1:, :, :] - precip_obs = precip_obs.filled() + ).isel(time=slice(1, None, None)) + precip_var = dataset_input.attrs["precip_var"] + metadata = dataset_input[precip_var].attrs pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + dataset_w_motion = oflow_method(dataset_input) nowcast_method = nowcasts.get_method("sprog") - precip_forecast = nowcast_method( - precip_input, - retrieved_motion, + dataset_forecast = nowcast_method( + dataset_w_motion, timesteps=timesteps, precip_thr=metadata["threshold"], n_cascade_levels=n_cascade_levels, @@ -60,6 +59,7 @@ def test_sprog( probmatching_method=probmatching_method, domain=domain, ) + precip_forecast = dataset_forecast[precip_var].values assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == ( @@ -67,7 +67,7 @@ def test_sprog( ) result = verification.det_cat_fct( - precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI" + precip_forecast[-1], dataset_obs[precip_var].values[-1], thr=0.1, scores="CSI" )["CSI"] assert result > min_csi, f"CSI={result:.1f}, required > {min_csi:.1f}" diff --git a/pysteps/tests/test_nowcasts_sseps.py b/pysteps/tests/test_nowcasts_sseps.py index 4d89fd33a..6d3a3c9c0 100644 --- a/pysteps/tests/test_nowcasts_sseps.py +++ b/pysteps/tests/test_nowcasts_sseps.py @@ -17,8 +17,8 @@ ) sseps_arg_values = [ - (5, 6, 2, "incremental", "cdf", 200, 3, 0.60), - (5, 6, 2, "incremental", "cdf", 200, [3], 0.60), + (5, 6, 2, "incremental", "cdf", 200, 3, 0.62), + (5, 6, 2, "incremental", "cdf", 200, [3], 0.62), ] @@ -35,32 +35,29 @@ def test_sseps( ): """Tests SSEPS nowcast.""" # inputs - precip_input, metadata = get_precipitation_fields( + dataset_input = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) - precip_input = precip_input.filled() + precip_var = dataset_input.attrs["precip_var"] - precip_obs = get_precipitation_fields( + dataset_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 - )[1:, :, :] - precip_obs = precip_obs.filled() + ).isel(time=slice(1, None, None)) pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_input) + dataset_w_motion = oflow_method(dataset_input) nowcast_method = nowcasts.get_method("sseps") - precip_forecast = nowcast_method( - precip_input, - metadata, - retrieved_motion, + dataset_forecast = nowcast_method( + dataset_w_motion, + timesteps, win_size=win_size, - timesteps=timesteps, n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, ar_order=ar_order, @@ -68,6 +65,7 @@ def test_sseps( mask_method=mask_method, probmatching_method=probmatching_method, ) + precip_forecast = dataset_forecast[precip_var].values assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == n_ens_members @@ -75,7 +73,9 @@ def test_sseps( timesteps if isinstance(timesteps, int) else len(timesteps) ) - crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1]) + crps = verification.probscores.CRPS( + precip_forecast[:, -1], dataset_obs[precip_var].values[-1] + ) assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}" diff --git a/pysteps/tests/test_nowcasts_steps.py b/pysteps/tests/test_nowcasts_steps.py index adb6ea917..16d2e4de5 100644 --- a/pysteps/tests/test_nowcasts_steps.py +++ b/pysteps/tests/test_nowcasts_steps.py @@ -56,17 +56,15 @@ def test_steps_skill( ).isel(time=slice(1, None, None)) precip_var = dataset_input.attrs["precip_var"] metadata = dataset_input[precip_var].attrs - precip_data = dataset_input[precip_var].values pytest.importorskip("cv2") oflow_method = motion.get_method("LK") - retrieved_motion = oflow_method(precip_data) + dataset_w_motion = oflow_method(dataset_input) nowcast_method = nowcasts.get_method("steps") - precip_forecast = nowcast_method( - precip_data, - retrieved_motion, + dataset_forecast = nowcast_method( + dataset_w_motion, timesteps=timesteps, precip_thr=metadata["threshold"], kmperpixel=2.0, @@ -79,6 +77,7 @@ def test_steps_skill( probmatching_method=probmatching_method, domain=domain, ) + precip_forecast = dataset_forecast[precip_var].values assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == n_ens_members diff --git a/pysteps/tests/test_nowcasts_utils.py b/pysteps/tests/test_nowcasts_utils.py index 075225427..1dfeb27a9 100644 --- a/pysteps/tests/test_nowcasts_utils.py +++ b/pysteps/tests/test_nowcasts_utils.py @@ -26,17 +26,18 @@ def test_nowcast_main_loop( timesteps, ensemble, num_ensemble_members, velocity_perturbations ): """Test the nowcast_main_loop function.""" - precip = get_precipitation_fields( + dataset = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=False, upscale=2000, ) - precip = precip.filled() oflow_method = motion.get_method("LK") - velocity = oflow_method(precip) + dataset = oflow_method(dataset) + precip = dataset["precip_intensity"].values + velocity = np.stack([dataset["velocity_x"].values, dataset["velocity_y"].values]) precip = precip[-1] diff --git a/pysteps/tests/test_utils_dimension.py b/pysteps/tests/test_utils_dimension.py index 2bbb63f58..038b725a0 100644 --- a/pysteps/tests/test_utils_dimension.py +++ b/pysteps/tests/test_utils_dimension.py @@ -5,19 +5,17 @@ import numpy as np import pytest import xarray as xr -from numpy.testing import assert_array_equal +from numpy.testing import assert_array_almost_equal, assert_array_equal from pytest import raises -from pysteps.converters import convert_to_xarray_dataset from pysteps.utils import dimension +from pysteps.xarray_helpers import convert_input_to_xarray_dataset fillvalues_metadata = { "x1": 0, "x2": 4, "y1": 0, "y2": 4, - "xpixelsize": 1, - "ypixelsize": 1, "zerovalue": 0, "yorigin": "lower", "unit": "mm/h", @@ -32,7 +30,6 @@ } test_data_not_trim = ( - # "data, window_size, axis, method, expected" ( np.arange(12).reshape(2, 6), 2, @@ -94,7 +91,7 @@ def test_aggregate_fields(data, window_size, dim, method, expected): windows size does not divide the data dimensions. The length of each dimension should be larger than 2. """ - dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) + dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata) actual = dimension.aggregate_fields(dataset, window_size, dim=dim, method=method) assert_array_equal(actual["precip_intensity"].values, expected) @@ -104,7 +101,7 @@ def test_aggregate_fields(data, window_size, dim, method, expected): data = np.pad(data, ((0, 0), (0, 1))) else: data = np.pad(data, (0, 1)) - dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) + dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata) actual = dimension.aggregate_fields( dataset, window_size, dim=dim, method=method, trim=True @@ -115,13 +112,85 @@ def test_aggregate_fields(data, window_size, dim, method, expected): dimension.aggregate_fields(dataset, window_size, dim=dim, method=method) +test_data_agg_w_velocity = ( + ( + np.arange(12).reshape(2, 6), + np.arange(12).reshape(2, 6), + np.arange(12).reshape(2, 6), + np.arange(0, 1.2, 0.1).reshape(2, 6), + 2, + "x", + "mean", + "mean", + np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]), + np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]), + np.array([[0, 0.2, 0.4], [0.6, 0.8, 1]]), + ), + ( + np.arange(4 * 6).reshape(4, 6), + np.arange(4 * 6).reshape(4, 6), + np.arange(4 * 6).reshape(4, 6), + np.arange(0, 1.2, 0.05).reshape(4, 6), + (2, 3), + ("y", "x"), + "mean", + "sum", + np.array([[4, 7], [16, 19]]), + np.array([[24, 42], [96, 114]]), + np.array([[0, 0.15], [0.6, 0.75]]), + ), +) + + +@pytest.mark.parametrize( + "data, data_vx, data_vy, data_qual, window_size, dim, method, velocity_method, expected, expected_v, expected_qual", + test_data_agg_w_velocity, +) +def test_aggregate_fields_w_velocity( + data, + data_vx, + data_vy, + data_qual, + window_size, + dim, + method, + velocity_method, + expected, + expected_v, + expected_qual, +): + """ + Test the aggregate_fields function for dataset with velocity information. + The windows size must divide exactly the data dimensions. + Internally, additional test are generated for situations where the + windows size does not divide the data dimensions. + The length of each dimension should be larger than 2. + """ + dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata) + dataset = dataset.assign( + { + "velocity_x": (("y", "x"), data_vx), + "velocity_y": (("y", "x"), data_vy), + "quality": (("y", "x"), data_qual), + } + ) + + actual = dimension.aggregate_fields( + dataset, window_size, dim=dim, method=method, velocity_method=velocity_method + ) + assert_array_equal(actual["precip_intensity"].values, expected) + assert_array_equal(actual["velocity_x"].values, expected_v) + assert_array_equal(actual["velocity_y"].values, expected_v) + assert_array_almost_equal(actual["quality"].values, expected_qual) + + def test_aggregate_fields_errors(): """ Test that the errors are correctly captured in the aggregate_fields function. """ data = np.arange(4 * 6).reshape(4, 6) - dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata) + dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata) with raises(ValueError): dimension.aggregate_fields(dataset, -1, dim="y") @@ -160,7 +229,7 @@ def test_aggregate_fields_errors(): ) def test_aggregate_fields_time(data, metadata, time_window_min, ignore_nan, expected): """Test the aggregate_fields_time.""" - dataset_ref = convert_to_xarray_dataset( + dataset_ref = convert_input_to_xarray_dataset( data, None, {**fillvalues_metadata, **metadata} ) datasets = [] @@ -234,7 +303,9 @@ def test_aggregate_fields_time(data, metadata, time_window_min, ignore_nan, expe ) def test_aggregate_fields_space(data, metadata, space_window, ignore_nan, expected): """Test the aggregate_fields_space.""" - dataset = convert_to_xarray_dataset(data, None, {**fillvalues_metadata, **metadata}) + dataset = convert_input_to_xarray_dataset( + data, None, {**fillvalues_metadata, **metadata} + ) assert_array_equal( dimension.aggregate_fields_space(dataset, space_window, ignore_nan)[ "precip_intensity" if metadata["unit"] == "mm/h" else "precip_accum" @@ -271,7 +342,9 @@ def test_aggregate_fields_space(data, metadata, space_window, ignore_nan, expect @pytest.mark.parametrize("R, metadata, extent, expected", test_data_clip_domain) def test_clip_domain(R, metadata, extent, expected): """Test the clip_domain.""" - dataset = convert_to_xarray_dataset(R, None, {**fillvalues_metadata, **metadata}) + dataset = convert_input_to_xarray_dataset( + R, None, {**fillvalues_metadata, **metadata} + ) assert_array_equal( dimension.clip_domain(dataset, extent)["precip_intensity"].values, expected ) @@ -336,12 +409,67 @@ def test_clip_domain(R, metadata, extent, expected): @pytest.mark.parametrize("data, metadata, method, inverse, expected", test_data_square) def test_square_domain(data, metadata, method, inverse, expected): """Test the square_domain.""" - dataset = convert_to_xarray_dataset(data, None, {**fillvalues_metadata, **metadata}) - dataset["precip_intensity"].attrs = { - **dataset["precip_intensity"].attrs, - **metadata, - } + dataset = convert_input_to_xarray_dataset( + data, None, {**fillvalues_metadata, **metadata} + ) + if "square_method" in metadata: + dataset.attrs["square_method"] = metadata["square_method"] + if "orig_domain" in metadata: + dataset.attrs["orig_domain"] = metadata["orig_domain"] assert_array_equal( dimension.square_domain(dataset, method, inverse)["precip_intensity"].values, expected, ) + + +# square_domain +R = np.ones((4, 2)) +test_data_square_w_velocity = [ + # square by padding + ( + R, + {"x1": 0, "x2": 2, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1}, + "pad", + False, + np.array([[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]), + np.array([[0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0]]), + ) +] + + +@pytest.mark.parametrize( + "data, metadata, method, inverse, expected, expected_velqual", + test_data_square_w_velocity, +) +def test_square_w_velocity(data, metadata, method, inverse, expected, expected_velqual): + """Test the square_domain.""" + dataset = convert_input_to_xarray_dataset( + data, None, {**fillvalues_metadata, **metadata} + ) + dataset = dataset.assign( + { + "velocity_x": (("y", "x"), data), + "velocity_y": (("y", "x"), data), + "quality": (("y", "x"), data), + } + ) + if "square_method" in metadata: + dataset.attrs["square_method"] = metadata["square_method"] + if "orig_domain" in metadata: + dataset.attrs["orig_domain"] = metadata["orig_domain"] + assert_array_equal( + dimension.square_domain(dataset, method, inverse)["precip_intensity"].values, + expected, + ) + assert_array_equal( + dimension.square_domain(dataset, method, inverse)["velocity_x"].values, + expected_velqual, + ) + assert_array_equal( + dimension.square_domain(dataset, method, inverse)["velocity_y"].values, + expected_velqual, + ) + assert_array_equal( + dimension.square_domain(dataset, method, inverse)["quality"].values, + expected_velqual, + ) diff --git a/pysteps/utils/dimension.py b/pysteps/utils/dimension.py index efa459610..d039d8ef0 100644 --- a/pysteps/utils/dimension.py +++ b/pysteps/utils/dimension.py @@ -14,14 +14,23 @@ clip_domain square_domain """ +from typing import Any, Callable + import numpy as np import xarray as xr -from pysteps.converters import compute_lat_lon +from pysteps.xarray_helpers import compute_lat_lon -_aggregation_methods = dict( - sum=np.sum, mean=np.mean, nanmean=np.nanmean, nansum=np.nansum -) +_aggregation_methods: dict[str, Callable[..., Any]] = { + "sum": np.sum, + "mean": np.mean, + "min": np.min, + "max": np.max, + "nanmean": np.nanmean, + "nansum": np.nansum, + "nanmin": np.nanmin, + "nanmax": np.nanmax, +} def aggregate_fields_time( @@ -93,7 +102,9 @@ def aggregate_fields_time( if ignore_nan: method = "".join(("nan", method)) - return aggregate_fields(dataset, window_size, dim="time", method=method) + return aggregate_fields( + dataset, window_size, dim="time", method=method, velocity_method="sum" + ) def aggregate_fields_space( @@ -147,9 +158,8 @@ def aggregate_fields_space( if np.isscalar(space_window): space_window = (space_window, space_window) - # assumes that frames are evenly spaced - ydelta = dataset["y"].values[1] - dataset["y"].values[0] - xdelta = dataset["x"].values[1] - dataset["x"].values[0] + ydelta = dataset["y"].attrs["stepsize"] + xdelta = dataset["x"].attrs["stepsize"] if space_window[0] % ydelta > 1e-10 or space_window[1] % xdelta > 1e-10: raise ValueError("space_window does not equally split dataset") @@ -166,11 +176,16 @@ def aggregate_fields_space( window_size = (int(space_window[0] / ydelta), int(space_window[1] / xdelta)) - return aggregate_fields(dataset, window_size, ["y", "x"], method) + return aggregate_fields(dataset, window_size, ["y", "x"], method, "mean") def aggregate_fields( - dataset: xr.Dataset, window_size, dim="x", method="mean", trim=False + dataset: xr.Dataset, + window_size, + dim="x", + method="mean", + velocity_method="mean", + trim=False, ) -> xr.Dataset: """Aggregate fields along a given direction. @@ -201,7 +216,11 @@ def aggregate_fields( dims, instead of a single dim method: string, optional Optional argument that specifies the operation to use - to aggregate the values within the window. + to aggregate the precipitation values within the window. + Default to mean operator. + velocity_method: string, optional + Optional argument that specifies the operation to use + to aggregate the velocity values within the window. Default to mean operator. trim: bool In case that the ``data`` is not perfectly divisible by @@ -254,9 +273,8 @@ def aggregate_fields( f"dataset.sizes[dim]={dataset.sizes[d]}" ) - # FIXME: The aggregation method is applied to all DataArrays in the Dataset - # Fix to allow support for an aggregation method per DataArray - return ( + dataset_ref = dataset + dataset = ( dataset.rolling(dict(zip(dim, window_size))) .reduce(_aggregation_methods[method]) .isel( @@ -266,6 +284,50 @@ def aggregate_fields( } ) ) + if "velocity_x" in dataset_ref: + dataset["velocity_x"] = ( + dataset_ref["velocity_x"] + .rolling(dict(zip(dim, window_size))) + .reduce(_aggregation_methods[velocity_method]) + .isel( + { + d: slice( + ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws + ) + for d, ws in zip(dim, window_size) + } + ) + ) + if "velocity_y" in dataset_ref: + dataset["velocity_y"] = ( + dataset_ref["velocity_y"] + .rolling(dict(zip(dim, window_size))) + .reduce(_aggregation_methods[velocity_method]) + .isel( + { + d: slice( + ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws + ) + for d, ws in zip(dim, window_size) + } + ) + ) + if "quality" in dataset_ref: + dataset["quality"] = ( + dataset_ref["quality"] + .rolling(dict(zip(dim, window_size))) + .reduce(_aggregation_methods["min"]) + .isel( + { + d: slice( + ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws + ) + for d, ws in zip(dim, window_size) + } + ) + ) + + return dataset def clip_domain(dataset: xr.Dataset, extent=None): @@ -298,8 +360,7 @@ def clip_domain(dataset: xr.Dataset, extent=None): def _pad_domain( dataset: xr.Dataset, dim_to_pad: str, idx_buffer: int, zerovalue: float ) -> xr.Dataset: - # assumes that frames are evenly spaced - delta = dataset[dim_to_pad].values[1] - dataset[dim_to_pad].values[0] + delta = dataset[dim_to_pad].attrs["stepsize"] end_values = ( dataset[dim_to_pad].values[0] - delta * idx_buffer, dataset[dim_to_pad].values[-1] + delta * idx_buffer, @@ -307,8 +368,6 @@ def _pad_domain( dataset_ref = dataset - # FIXME: The same zerovalue is used for all DataArrays in the Dataset - # Fix to allow support for a zerovalue per DataArray dataset = dataset_ref.pad({dim_to_pad: idx_buffer}, constant_values=zerovalue) dataset[dim_to_pad] = dataset_ref[dim_to_pad].pad( {dim_to_pad: idx_buffer}, @@ -318,6 +377,24 @@ def _pad_domain( dataset.lat.data[:], dataset.lon.data[:] = compute_lat_lon( dataset.x.values, dataset.y.values, dataset.attrs["projection"] ) + if "velocity_x" in dataset_ref: + dataset["velocity_x"].data = ( + dataset_ref["velocity_x"] + .pad({dim_to_pad: idx_buffer}, constant_values=0.0) + .values + ) + if "velocity_y" in dataset_ref: + dataset["velocity_y"].data = ( + dataset_ref["velocity_y"] + .pad({dim_to_pad: idx_buffer}, constant_values=0.0) + .values + ) + if "quality" in dataset_ref: + dataset["quality"].data = ( + dataset_ref["quality"] + .pad({dim_to_pad: idx_buffer}, constant_values=0.0) + .values + ) return dataset @@ -332,14 +409,17 @@ def square_domain(dataset: xr.Dataset, method="pad", inverse=False): :py:mod:`pysteps.io.importers`. method: {'pad', 'crop'}, optional Either pad or crop. - If pad, an equal number of zeros is added to both ends of its shortest - side in order to produce a square domain. + If pad, an equal number of pixels + filled with the minimum value of the precipitation + field is added to both ends of the precipitation fields shortest + side in order to produce a square domain. The quality and velocity fields + are always padded with zeros. If crop, an equal number of pixels is removed to both ends of its longest side in order to produce a square domain. Note that the crop method involves an irreversible loss of data. inverse: bool, optional Perform the inverse method to recover the original domain shape. - After a crop, the inverse is performed by padding the field with zeros. + After a crop, the inverse is performed by doing a pad. Returns ------- diff --git a/pysteps/converters.py b/pysteps/xarray_helpers.py similarity index 76% rename from pysteps/converters.py rename to pysteps/xarray_helpers.py index 2825af612..a1049f32f 100644 --- a/pysteps/converters.py +++ b/pysteps/xarray_helpers.py @@ -3,7 +3,7 @@ pysteps.converters ================== -Module with data converter functions. +Module with xarray helper functions. .. autosummary:: :toctree: ../generated/ @@ -11,6 +11,8 @@ convert_to_xarray_dataset """ +from datetime import datetime, timedelta + import numpy as np import numpy.typing as npt import pyproj @@ -77,7 +79,7 @@ def compute_lat_lon( return lat.reshape(x_2d.shape), lon.reshape(x_2d.shape) -def convert_to_xarray_dataset( +def convert_input_to_xarray_dataset( precip: np.ndarray, quality: np.ndarray | None, metadata: dict[str, str | float | None], @@ -111,6 +113,21 @@ def convert_to_xarray_dataset( y_r = np.linspace(metadata["y1"], metadata["y2"], h + 1)[:-1] y_r += 0.5 * (y_r[1] - y_r[0]) + if "xpixelsize" in metadata: + xpixelsize = metadata["xpixelsize"] + else: + xpixelsize = x_r[1] - x_r[0] + + if "ypixelsize" in metadata: + ypixelsize = metadata["ypixelsize"] + else: + ypixelsize = y_r[1] - y_r[0] + + if x_r[1] - x_r[0] != xpixelsize: + raise ValueError("xpixelsize does not match x1, x2 and array shape") + if y_r[1] - y_r[0] != ypixelsize: + raise ValueError("ypixelsize does not match y1, y2 and array shape") + # flip yr vector if yorigin is upper if metadata["yorigin"] == "upper": y_r = np.flip(y_r) @@ -160,6 +177,7 @@ def convert_to_xarray_dataset( "long_name": "y-coordinate in Cartesian system", "standard_name": "projection_y_coordinate", "units": metadata["cartesian_unit"], + "stepsize": ypixelsize, }, ), "x": ( @@ -170,6 +188,7 @@ def convert_to_xarray_dataset( "long_name": "x-coordinate in Cartesian system", "standard_name": "projection_x_coordinate", "units": metadata["cartesian_unit"], + "stepsize": xpixelsize, }, ), "lon": ( @@ -178,7 +197,6 @@ def convert_to_xarray_dataset( { "long_name": "longitude coordinate", "standard_name": "longitude", - # TODO(converters): Don't hard-code the unit. "units": "degrees_east", }, ), @@ -188,7 +206,6 @@ def convert_to_xarray_dataset( { "long_name": "latitude coordinate", "standard_name": "latitude", - # TODO(converters): Don't hard-code the unit. "units": "degrees_north", }, ), @@ -207,3 +224,45 @@ def convert_to_xarray_dataset( } dataset = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) return dataset.sortby(["y", "x"]) + + +def convert_output_to_xarray_dataset( + dataset: xr.Dataset, timesteps: int | list[int], output: np.ndarray +) -> xr.Dataset: + precip_var = dataset.attrs["precip_var"] + metadata = dataset[precip_var].attrs + + last_timestamp = ( + dataset["time"][-1].values.astype("datetime64[us]").astype(datetime) + ) + time_metadata = dataset["time"].attrs + timestep_seconds = dataset["time"].attrs["stepsize"] + dataset = dataset.drop_vars([precip_var]).drop_dims(["time"]) + if isinstance(timesteps, int): + timesteps = list(range(1, timesteps + 1)) + next_timestamps = [ + last_timestamp + timedelta(seconds=timestep_seconds * i) for i in timesteps + ] + dataset = dataset.assign_coords( + {"time": (["time"], next_timestamps, time_metadata)} + ) + + if output.ndim == 4: + dataset = dataset.assign_coords( + { + "ens_number": ( + ["ens_number"], + list(range(1, output.shape[0] + 1)), + { + "long_name": "ensemble member", + "standard_name": "realization", + "units": "", + }, + ) + } + ) + dataset[precip_var] = (["ens_number", "time", "y", "x"], output, metadata) + else: + dataset[precip_var] = (["time", "y", "x"], output, metadata) + + return dataset