diff --git a/pysteps/decorators.py b/pysteps/decorators.py
index ee421977e..69c9945bc 100644
--- a/pysteps/decorators.py
+++ b/pysteps/decorators.py
@@ -22,7 +22,7 @@
 
 import numpy as np
 
-from pysteps.converters import convert_to_xarray_dataset
+from pysteps.xarray_helpers import convert_input_to_xarray_dataset
 
 
 def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text):
@@ -90,7 +90,9 @@ def _import_with_postprocessing(*args, **kwargs):
                     mask = ~np.isfinite(precip)
                     precip[mask] = _fillna
 
-            return convert_to_xarray_dataset(precip.astype(_dtype), quality, metadata)
+            return convert_input_to_xarray_dataset(
+                precip.astype(_dtype), quality, metadata
+            )
 
         extra_kwargs_doc = """
             Other Parameters
@@ -126,7 +128,9 @@ def new_function(*args, **kwargs):
             target motion_method_func function.
             """
 
-            input_images = args[0]
+            dataset = args[0]
+            precip_var = dataset.attrs["precip_var"]
+            input_images = dataset[precip_var].values
             if input_images.ndim != 3:
                 raise ValueError(
                     "input_images dimension mismatch.\n"
diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py
index f61d4b25b..71fe6dc78 100644
--- a/pysteps/io/importers.py
+++ b/pysteps/io/importers.py
@@ -74,22 +74,27 @@
 
 .. tabularcolumns:: |p{2cm}|L|
 
-+---------------+-------------------------------------------------------------------------------------------+
-|  Coordinate   |                Description                                                                |
-+===============+===========================================================================================+
-|   y           | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
-+---------------+-------------------------------------------------------------------------------------------+
-|   x           | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
-+---------------+-------------------------------------------------------------------------------------------+
-|   lat         | latitude coordinate in degrees                                                            |
-+---------------+-------------------------------------------------------------------------------------------+
-|   lon         | longitude coordinate in degrees                                                           |
-+---------------+-------------------------------------------------------------------------------------------+
-|   time        | forecast time in seconds since forecast start time                                        |
-+---------------+-------------------------------------------------------------------------------------------+
-|   member      | ensemble member number (integer)                                                          |
-+---------------+-------------------------------------------------------------------------------------------+
-
++--------------------+-------------------------------------------------------------------------------------------+
+|  Coordinate        |                Description                                                                |
++====================+===========================================================================================+
+|   y                | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
++--------------------+-------------------------------------------------------------------------------------------+
+|   x                | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
++--------------------+-------------------------------------------------------------------------------------------+
+|   lat              | latitude coordinate in degrees                                                            |
++--------------------+-------------------------------------------------------------------------------------------+
+|   lon              | longitude coordinate in degrees                                                           |
++--------------------+-------------------------------------------------------------------------------------------+
+|   time             | forecast time in seconds since forecast start time                                        |
++--------------------+-------------------------------------------------------------------------------------------+
+|   ens_number       | ensemble member number (integer)                                                          |
++--------------------+-------------------------------------------------------------------------------------------+
+|   direction        | used by proesmans to return the forward and backward advection and consistency fields     |
++--------------------+-------------------------------------------------------------------------------------------+
+
+The time, x and y dimensions all MUST be regularly spaced, with the stepsize included
+in a ``stepsize`` attribute. The stepsize is given in the unit of the dimension (this
+is alwyas seconds for the time dimension).
 
 The dataset can contain the following data variables:
 
@@ -102,8 +107,14 @@
 | precip_accum      | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, |
 | or reflectivity   | the attributes of this variable contain metadata relevant to this attribute (see below)                   |
 +-------------------+-----------------------------------------------------------------------------------------------------------+
+| velocity_x        | x-component of the advection field in cartesian_unit per timestep                                         |
++-------------------+-----------------------------------------------------------------------------------------------------------+
+| velocity_y        | y-component of the advection field in cartesian_unit per timestep                                         |
++-------------------+-----------------------------------------------------------------------------------------------------------+
 | quality           | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything     |
 +-------------------+-----------------------------------------------------------------------------------------------------------+
+| velocity_quality  | value between 0 and 1 denoting the quality of the velocity data, currently only returned by proesmans     |
++-------------------+-----------------------------------------------------------------------------------------------------------+
 
 Some of the metadata in the metadata dictionary is not explicitely stored in the dataset,
 but is still implicitly present. For example ``x1`` can easily be found by taking the first
diff --git a/pysteps/io/readers.py b/pysteps/io/readers.py
index 7295b5ff7..30c4d4fc0 100644
--- a/pysteps/io/readers.py
+++ b/pysteps/io/readers.py
@@ -15,7 +15,7 @@
 import xarray as xr
 
 
-def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
+def read_timeseries(inputfns, importer, timestep=None, **kwargs) -> xr.Dataset | None:
     """
     Read a time series of input files using the methods implemented in the
     :py:mod:`pysteps.io.importers` module and stack them into a 3d xarray
@@ -28,6 +28,9 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
         :py:mod:`pysteps.io.archive` module.
     importer: function
         A function implemented in the :py:mod:`pysteps.io.importers` module.
+    timestep: int, optional
+        The timestep in seconds, this value is optional if more than 1 inputfns
+        are given.
     kwargs: dict
         Optional keyword arguments for the importer.
 
@@ -58,6 +61,16 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
         return None
 
     startdate = min(inputfns[1])
+    sorted_dates = sorted(inputfns[1])
+    timestep_dates = int((sorted_dates[1] - sorted_dates[0]).total_seconds())
+
+    if timestep is None:
+        timestep = timestep_dates
+    if timestep != timestep_dates:
+        raise ValueError("given timestep does not match inputfns")
+    for i in range(len(sorted_dates) - 1):
+        if int((sorted_dates[i + 1] - sorted_dates[i]).total_seconds()) != timestep:
+            raise ValueError("supplied dates are not evenly spaced")
 
     datasets = []
     for i, ifn in enumerate(inputfns[0]):
@@ -73,6 +86,7 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
                 {
                     "long_name": "forecast time",
                     "units": f"seconds since {startdate:%Y-%m-%d %H:%M:%S}",
+                    "stepsize": timestep,
                 },
             )
         )
diff --git a/pysteps/motion/constant.py b/pysteps/motion/constant.py
index a5c153616..a26831ac0 100644
--- a/pysteps/motion/constant.py
+++ b/pysteps/motion/constant.py
@@ -14,27 +14,32 @@
 
 import numpy as np
 import scipy.optimize as op
+import xarray as xr
 from scipy.ndimage import map_coordinates
 
 
-def constant(R, **kwargs):
+def constant(dataset: xr.Dataset, **kwargs):
     """
     Compute a constant advection field by finding a translation vector that
     maximizes the correlation between two successive images.
 
     Parameters
     ----------
-    R: array_like
-      Array of shape (T,m,n) containing a sequence of T two-dimensional input
-      images of shape (m,n). If T > 2, two last elements along axis 0 are used.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. If the size of this dimension
+        is larger than 2, the last 2 entries of this dimension are used.
 
     Returns
     -------
-    out: array_like
-        The constant advection field having shape (2, m, n), where out[0, :, :]
-        contains the x-components of the motion vectors and out[1, :, :]
-        contains the y-components.
+    out: xarray.Dataset
+        The input dataset with the constant advection field added in the ``velocity_x``
+        and ``velocity_y`` data variables.
     """
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    R = dataset[precip_var].values
     m, n = R.shape[1:]
     X, Y = np.meshgrid(np.arange(n), np.arange(m))
 
@@ -51,4 +56,7 @@ def f(v):
     options = {"initial_simplex": (np.array([(0, 1), (1, 0), (1, 1)]))}
     result = op.minimize(f, (1, 1), method="Nelder-Mead", options=options)
 
-    return np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))])
+    output = np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))])
+    dataset["velocity_x"] = (["y", "x"], output[0])
+    dataset["velocity_y"] = (["y", "x"], output[1])
+    return dataset
diff --git a/pysteps/motion/darts.py b/pysteps/motion/darts.py
index 4e5050d48..4aac80cd3 100644
--- a/pysteps/motion/darts.py
+++ b/pysteps/motion/darts.py
@@ -11,8 +11,10 @@
     DARTS
 """
 
-import numpy as np
 import time
+
+import numpy as np
+import xarray as xr
 from numpy.linalg import lstsq, svd
 
 from pysteps import utils
@@ -20,16 +22,17 @@
 
 
 @check_input_frames(just_ndim=True)
-def DARTS(input_images, **kwargs):
+def DARTS(dataset: xr.Dataset, **kwargs):
     """
     Compute the advection field from a sequence of input images by using the
     DARTS method. :cite:`RCW2011`
 
     Parameters
     ----------
-    input_images: array-like
-      Array of shape (T,m,n) containing a sequence of T two-dimensional input
-      images of shape (m,n).
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension.
 
     Other Parameters
     ----------------
@@ -67,13 +70,15 @@ def DARTS(input_images, **kwargs):
 
     Returns
     -------
-    out: ndarray
-        Three-dimensional array (2,m,n) containing the dense x- and y-components
-        of the motion field in units of pixels / timestep as given by the input
-        array R.
+    out: xarray.Dataset
+        The input dataset with the advection field added in the ``velocity_x``
+        and ``velocity_y`` data variables.
 
     """
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    input_images = dataset[precip_var].values
     N_x = kwargs.get("N_x", 50)
     N_y = kwargs.get("N_y", 50)
     N_t = kwargs.get("N_t", 4)
@@ -214,10 +219,14 @@ def DARTS(input_images, **kwargs):
             fft.ifft2(_fill(V, input_images.shape[0], input_images.shape[1], k_x, k_y))
         )
 
+    output = np.stack([U, V])
+    dataset["velocity_x"] = (["y", "x"], output[0])
+    dataset["velocity_y"] = (["y", "x"], output[1])
+
     if verbose:
         print("--- %s seconds ---" % (time.time() - t0))
 
-    return np.stack([U, V])
+    return dataset
 
 
 def _leastsq(A, B, y):
diff --git a/pysteps/motion/lucaskanade.py b/pysteps/motion/lucaskanade.py
index 133f860b7..b7a51a26b 100644
--- a/pysteps/motion/lucaskanade.py
+++ b/pysteps/motion/lucaskanade.py
@@ -22,22 +22,22 @@
     dense_lucaskanade
 """
 
+import time
+
 import numpy as np
+import xarray as xr
 from numpy.ma.core import MaskedArray
 
+from pysteps import feature, utils
 from pysteps.decorators import check_input_frames
-
-from pysteps import utils, feature
 from pysteps.tracking.lucaskanade import track_features
 from pysteps.utils.cleansing import decluster, detect_outliers
 from pysteps.utils.images import morph_opening
 
-import time
-
 
 @check_input_frames(2)
 def dense_lucaskanade(
-    input_images,
+    dataset: xr.Dataset,
     lk_kwargs=None,
     fd_method="shitomasi",
     fd_kwargs=None,
@@ -73,18 +73,14 @@ def dense_lucaskanade(
 
     Parameters
     ----------
-    input_images: ndarray_ or MaskedArray_
-        Array of shape (T, m, n) containing a sequence of *T* two-dimensional
-        input images of shape (m, n). The indexing order in **input_images** is
-        assumed to be (time, latitude, longitude).
-
-        *T* = 2 is the minimum required number of images.
-        With *T* > 2, all the resulting sparse vectors are pooled together for
-        the final interpolation on a regular grid.
-
-        In case of ndarray_, invalid values (Nans or infs) are masked,
-        otherwise the mask of the MaskedArray_ is used. Such mask defines a
-        region where features are not detected for the tracking algorithm.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. The size of the time dimension needs to
+        be at least 2. If it is larger than 2, all the resulting sparse vectors are pooled
+        together for the final interpolation on a regular grid. Invalid values (Nans or infs)
+        are masked. This mask defines a region where features are not detected for the tracking
+        algorithm.
 
     lk_kwargs: dict, optional
         Optional dictionary containing keyword arguments for the `Lucas-Kanade`_
@@ -151,14 +147,10 @@ def dense_lucaskanade(
 
     Returns
     -------
-    out: ndarray_ or tuple
-        If **dense=True** (the default), return the advection field having shape
-        (2, m, n), where out[0, :, :] contains the x-components of the motion
-        vectors and out[1, :, :] contains the y-components.
-        The velocities are in units of pixels / timestep, where timestep is the
-        time difference between the two input images.
-        Return a zero motion field of shape (2, m, n) when no motion is
-        detected.
+    out: xarray.Dataset or tuple 
+        If **dense=True** (the default), return the input dataset with the advection
+        field added in the ``velocity_x`` and ``velocity_y`` data variables.
+        Return a zero motion field when no motion is detected.
 
         If **dense=False**, it returns a tuple containing the 2-dimensional
         arrays **xy** and **uv**, where x, y define the vector locations,
@@ -179,7 +171,9 @@ def dense_lucaskanade(
     Understanding Workshop, pp. 121–130, 1981.
     """
 
-    input_images = input_images.copy()
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    input_images = dataset[precip_var].values
 
     if verbose:
         print("Computing the motion field with the Lucas-Kanade method.")
@@ -244,7 +238,10 @@ def dense_lucaskanade(
     # return zero motion field is no sparse vectors are found
     if xy.shape[0] == 0:
         if dense:
-            return np.zeros((2, domain_size[0], domain_size[1]))
+            uvgrid = np.zeros((2, domain_size[0], domain_size[1]))
+            dataset["velocity_x"] = (["y", "x"], uvgrid[0])
+            dataset["velocity_y"] = (["y", "x"], uvgrid[1])
+            return dataset
         else:
             return xy, uv
 
@@ -266,14 +263,20 @@ def dense_lucaskanade(
 
     # return zero motion field if no sparse vectors are left for interpolation
     if xy.shape[0] == 0:
-        return np.zeros((2, domain_size[0], domain_size[1]))
+        uvgrid = np.zeros((2, domain_size[0], domain_size[1]))
+        dataset["velocity_x"] = (["y", "x"], uvgrid[0])
+        dataset["velocity_y"] = (["y", "x"], uvgrid[1])
+        return dataset
 
     # interpolation
     xgrid = np.arange(domain_size[1])
     ygrid = np.arange(domain_size[0])
     uvgrid = interpolation_method(xy, uv, xgrid, ygrid, **interp_kwargs)
 
+    dataset["velocity_x"] = (["y", "x"], uvgrid[0])
+    dataset["velocity_y"] = (["y", "x"], uvgrid[1])
+
     if verbose:
         print("--- total time: %.2f seconds ---" % (time.time() - t0))
 
-    return uvgrid
+    return dataset
diff --git a/pysteps/motion/proesmans.py b/pysteps/motion/proesmans.py
index 8760092ba..4b122a620 100644
--- a/pysteps/motion/proesmans.py
+++ b/pysteps/motion/proesmans.py
@@ -12,6 +12,7 @@
 """
 
 import numpy as np
+import xarray as xr
 from scipy.ndimage import gaussian_filter
 
 from pysteps.decorators import check_input_frames
@@ -20,7 +21,7 @@
 
 @check_input_frames(2, 2)
 def proesmans(
-    input_images,
+    dataset: xr.Dataset,
     lam=50.0,
     num_iter=100,
     num_levels=6,
@@ -34,8 +35,11 @@ def proesmans(
 
     Parameters
     ----------
-    input_images: array_like
-        Array of shape (2, m, n) containing the first and second input image.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. The size of this dimension
+        has to be 2.
     lam: float
         Multiplier of the smoothness term. Smaller values give a smoother motion
         field.
@@ -49,22 +53,20 @@ def proesmans(
     verbose: bool, optional
         Verbosity enabled if True (default).
     full_output: bool, optional
-        If True, the output is a two-element tuple containing the
-        forward-backward advection and consistency fields. The first element
-        is shape (2, 2, m, n), where the index along the first dimension refers
-        to the forward and backward advection fields. The second element is an
-        array of shape (2, m, n), where the index along the first dimension
-        refers to the forward and backward consistency fields.
-        Default: False.
+        If True, both the forward and backwards advection fields are returned
+        and the consistency fields are returned as well in the ``velocity_quality``
+        data variable.
 
     Returns
     -------
     out: ndarray
-        If full_output=False, the advection field having shape (2, m, n), where
-        out[0, :, :] contains the x-components of the motion vectors and
-        out[1, :, :] contains the y-components. The velocities are in units of
-        pixels / timestep, where timestep is the time difference between the
-        two input images.
+        The input dataset with the advection field added in the ``velocity_x``
+        and ``velocity_y`` data variables.
+
+        If full_output=True, a ``velocity_direction`` dimension
+        is added to the dataset, so that the velocity data can be returned containing
+        the forward and backwards advection fields. Also the ``velocity_quality`` data
+        coordinate is present containing the forward and backward consistency fields.
 
     References
     ----------
@@ -73,6 +75,9 @@ def proesmans(
     """
     del verbose  # Not used
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    input_images = dataset[precip_var].values
     im1 = input_images[-2, :, :].copy()
     im2 = input_images[-1, :, :].copy()
 
@@ -89,6 +94,11 @@ def proesmans(
     advfield, quality = _compute_advection_field(im, lam, num_iter, num_levels)
 
     if not full_output:
-        return advfield[0]
+        dataset["velocity_x"] = (["y", "x"], advfield[0, 0])
+        dataset["velocity_y"] = (["y", "x"], advfield[0, 1])
     else:
-        return advfield, quality
+        dataset["velocity_x"] = (["direction", "y", "x"], advfield[:, 0])
+        dataset["velocity_y"] = (["direction", "y", "x"], advfield[:, 1])
+        dataset["velocity_quality"] = (["direction", "y", "x"], quality)
+
+    return dataset
diff --git a/pysteps/motion/vet.py b/pysteps/motion/vet.py
index 391ebe189..f30703bee 100644
--- a/pysteps/motion/vet.py
+++ b/pysteps/motion/vet.py
@@ -35,12 +35,13 @@
 """
 
 import numpy
+import xarray as xr
 from numpy.ma.core import MaskedArray
 from scipy.ndimage import zoom
 from scipy.optimize import minimize
 
 from pysteps.decorators import check_input_frames
-from pysteps.motion._vet import _warp, _cost_function
+from pysteps.motion._vet import _cost_function, _warp
 
 
 def round_int(scalar):
@@ -301,7 +302,7 @@ def vet_cost_function(
 
 @check_input_frames(2, 3)
 def vet(
-    input_images,
+    dataset: xr.Dataset,
     sectors=((32, 16, 4, 2), (32, 16, 4, 2)),
     smooth_gain=1e6,
     first_guess=None,
@@ -366,15 +367,13 @@ def vet(
 
     Parameters
     ----------
-    input_images: ndarray_ or MaskedArray
-        Input images, sequence of 2D arrays, or 3D arrays.
-        The first dimension represents the images time dimension.
-
-        The template_image (first element in first dimensions) denotes the
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. The size of this dimension
+        has to be 2. The first element in the time dimension denotes the
         reference image used to obtain the displacement (2D array).
         The second is the target image.
-
-        The expected dimensions are (2,ni,nj).
     sectors: list or array, optional
         Number of sectors on each dimension used in the scaling procedure.
         If dimension is 1, the same sectors will be used both image dimensions
@@ -411,13 +410,11 @@ def vet(
 
     Returns
     -------
-    displacement_field: ndarray_
-        Displacement Field (2D array representing the transformation) that
-        warps the template image into the input image.
-        The dimensions are (2,ni,nj), where the first
-        dimension indicates the displacement along x (0) or y (1) in units of
-        pixels / timestep as given by the input_images array.
-    intermediate_steps: list of ndarray_
+    out: xarray.Dataset
+        The input dataset with the displacement field that
+        warps the template image into the input image added in the ``velocity_x``
+        and ``velocity_y`` data variables.
+    intermediate_steps: list of ndarray_, optional
         List with the first guesses obtained during the scaling procedure.
 
     References
@@ -437,6 +434,9 @@ def vet(
     Nocedal, J, and S J Wright. 2006. Numerical Optimization. Springer New York.
     """
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    input_images = dataset[precip_var].values
     if verbose:
 
         def debug_print(*args, **kwargs):
@@ -642,7 +642,10 @@ def debug_print(*args, **kwargs):
     if padding > 0:
         first_guess = first_guess[:, padding:-padding, padding:-padding]
 
+    dataset["velocity_x"] = (["y", "x"], first_guess[0])
+    dataset["velocity_y"] = (["y", "x"], first_guess[1])
+
     if intermediate_steps:
-        return first_guess, scaling_guesses
+        return dataset, scaling_guesses
 
-    return first_guess
+    return dataset
diff --git a/pysteps/nowcasts/anvil.py b/pysteps/nowcasts/anvil.py
index f5af038bb..88ed6b0af 100644
--- a/pysteps/nowcasts/anvil.py
+++ b/pysteps/nowcasts/anvil.py
@@ -21,11 +21,13 @@
 import time
 
 import numpy as np
+import xarray as xr
 from scipy.ndimage import gaussian_filter
 
 from pysteps import cascade, extrapolation, utils
 from pysteps.nowcasts.utils import nowcast_main_loop
 from pysteps.timeseries import autoregression
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 try:
     import dask
@@ -36,10 +38,8 @@
 
 
 def forecast(
-    vil,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
-    rainrate=None,
     n_cascade_levels=6,
     extrap_method="semilagrangian",
     ar_order=2,
@@ -70,22 +70,21 @@ def forecast(
 
     Parameters
     ----------
-    vil: array_like
-        Array of shape (ar_order+2,m,n) containing the input fields ordered by
-        timestamp from oldest to newest. The inputs are expected to contain VIL
-        or rain rate. The time steps between the inputs are assumed to be regular.
-    velocity: array_like
-        Array of shape (2,m,n) containing the x- and y-components of the
-        advection field. The velocities are assumed to represent one time step
-        between the inputs. All values are required to be finite.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as either VIL values in the
+        ``precip_accum`` data variable or rainrate in the ``precip_intensity``
+        data variable. The time dimension of the dataset has to be size
+        ``ar_order + 2`` and the precipitation variable has to have this dimension.
+        When VIL values are supplied, optionally ``precip_accum`` can be supplied
+        as well without a time dimension, containing the most recently observed rain
+        rate field. If not supplied, no R(VIL) conversion is done and the outputs
+        are in the same units as the inputs.
     timesteps: int or list of floats
         Number of time steps to forecast or a list of time steps for which the
         forecasts are computed (relative to the input time step). The elements
         of the list are required to be in ascending order.
-    rainrate: array_like
-        Array of shape (m,n) containing the most recently observed rain rate
-        field. If set to None, no R(VIL) conversion is done and the outputs
-        are in the same units as the inputs.
     n_cascade_levels: int, optional
         The number of cascade levels to use. Defaults to 6, see issue #385
         on GitHub.
@@ -128,18 +127,28 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        A three-dimensional array of shape (num_timesteps,m,n) containing a time
-        series of forecast precipitation fields. The time series starts from
-        t0+timestep, where timestep is taken from the input VIL/rain rate
-        fields. If measure_time is True, the return value is a three-element
-        tuple containing the nowcast array, the initialization time of the
-        nowcast generator and the time used in the main loop (seconds).
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     References
     ----------
     :cite:`PCLH2020`
     """
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    vil = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
+    rainrate = None
+    if precip_var == "precip_intensity" and "precip_accum" in dataset:
+        rainrate = dataset["precip_accum"].values
+
     _check_inputs(vil, rainrate, velocity, timesteps, ar_order)
 
     if extrap_kwargs is None:
@@ -292,8 +301,6 @@ def worker(vil, i):
 
     print("Starting nowcast computation.")
 
-    rainrate_f = []
-
     extrap_kwargs["return_displacement"] = True
 
     state = {"vil_dec": vil_dec}
@@ -323,10 +330,11 @@ def worker(vil, i):
     if measure_time:
         rainrate_f, mainloop_time = rainrate_f
 
+    output_dataset = convert_output_to_xarray_dataset(dataset, timesteps, rainrate_f)
     if measure_time:
-        return np.stack(rainrate_f), init_time, mainloop_time
+        return output_dataset, init_time, mainloop_time
     else:
-        return np.stack(rainrate_f)
+        return output_dataset
 
 
 def _check_inputs(vil, rainrate, velocity, timesteps, ar_order):
diff --git a/pysteps/nowcasts/extrapolation.py b/pysteps/nowcasts/extrapolation.py
index 143a39d7c..a70b6985c 100644
--- a/pysteps/nowcasts/extrapolation.py
+++ b/pysteps/nowcasts/extrapolation.py
@@ -11,14 +11,16 @@
 """
 
 import time
+
 import numpy as np
+import xarray as xr
 
 from pysteps import extrapolation
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     extrap_method="semilagrangian",
     extrap_kwargs=None,
@@ -32,13 +34,11 @@ def forecast(
 
     Parameters
     ----------
-    precip: array-like
-        Two-dimensional array of shape (m,n) containing the input precipitation
-        field.
-    velocity: array-like
-        Array of shape (2,m,n) containing the x- and y-components of the
-        advection field. The velocities are assumed to represent one time step
-        between the inputs.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any pecipitation data variable.
+        It should contain a time dimension of size 1.
     timesteps: int or list of floats
         Number of time steps to forecast or a list of time steps for which the
         forecasts are computed (relative to the input time step). The elements
@@ -54,18 +54,25 @@ def forecast(
 
     Returns
     -------
-    out: ndarray_
-      Three-dimensional array of shape (num_timesteps, m, n) containing a time
-      series of nowcast precipitation fields. The time series starts from
-      t0 + timestep, where timestep is taken from the advection field velocity.
-      If *measure_time* is True, the return value is a two-element tuple
-      containing this array and the computation time (seconds).
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     See also
     --------
     pysteps.extrapolation.interface
     """
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values[0]
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps)
 
     if extrap_kwargs is None:
@@ -95,10 +102,13 @@ def forecast(
         computation_time = time.time() - start_time
         print(f"{computation_time:.2f} seconds.")
 
+    output_dataset = convert_output_to_xarray_dataset(
+        dataset, timesteps, precip_forecast
+    )
     if measure_time:
-        return precip_forecast, computation_time
+        return output_dataset, computation_time
     else:
-        return precip_forecast
+        return output_dataset
 
 
 def _check_inputs(precip, velocity, timesteps):
diff --git a/pysteps/nowcasts/lagrangian_probability.py b/pysteps/nowcasts/lagrangian_probability.py
index 727e94806..7bae440cc 100644
--- a/pysteps/nowcasts/lagrangian_probability.py
+++ b/pysteps/nowcasts/lagrangian_probability.py
@@ -12,20 +12,20 @@
 """
 
 import numpy as np
+import xarray as xr
 from scipy.signal import convolve
 
 from pysteps.nowcasts import extrapolation
 
 
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     threshold,
     extrap_method="semilagrangian",
     extrap_kwargs=None,
     slope=5,
-):
+) -> xr.Dataset:
     """
     Generate a probability nowcast by a local lagrangian approach. The ouput is
     the probability of exceeding a given intensity threshold, i.e.
@@ -33,13 +33,11 @@ def forecast(
 
     Parameters
     ----------
-    precip: array_like
-       Two-dimensional array of shape (m,n) containing the input precipitation
-       field.
-    velocity: array_like
-       Array of shape (2,m,n) containing the x- and y-components of the
-       advection field. The velocities are assumed to represent one time step
-       between the inputs.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any pecipitation data variable.
+        It should contain a time dimension of size 1.
     timesteps: int or list of floats
        Number of time steps to forecast or a sorted list of time steps for which
        the forecasts are computed (relative to the input time step).
@@ -54,10 +52,15 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        Three-dimensional array of shape (num_timesteps, m, n) containing a time
-        series of nowcast exceedence probabilities. The time series starts from
-        t0 + timestep, where timestep is taken from the advection field velocity.
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     References
     ----------
@@ -68,16 +71,14 @@ def forecast(
     """
     # Compute deterministic extrapolation forecast
     if isinstance(timesteps, int) and timesteps > 0:
-        timesteps = np.arange(1, timesteps + 1)
+        timesteps = list(range(1, timesteps + 1))
     elif not isinstance(timesteps, list):
         raise ValueError(f"invalid value for argument 'timesteps': {timesteps}")
-    precip_forecast = extrapolation.forecast(
-        precip,
-        velocity,
-        timesteps,
-        extrap_method,
-        extrap_kwargs,
+    dataset_forecast = extrapolation.forecast(
+        dataset, timesteps, extrap_method, extrap_kwargs
     )
+    precip_var = dataset_forecast.attrs["precip_var"]
+    precip_forecast = dataset_forecast[precip_var].values
 
     # Ignore missing values
     nanmask = np.isnan(precip_forecast)
@@ -104,7 +105,8 @@ def forecast(
         precip_forecast[i, ...] /= kernel_sum
     precip_forecast = np.clip(precip_forecast, 0, 1)
     precip_forecast[nanmask] = np.nan
-    return precip_forecast
+    dataset_forecast[precip_var].data[:] = precip_forecast
+    return dataset_forecast
 
 
 def _get_kernel(size):
diff --git a/pysteps/nowcasts/linda.py b/pysteps/nowcasts/linda.py
index 7d737eaa9..2fc5dfd70 100644
--- a/pysteps/nowcasts/linda.py
+++ b/pysteps/nowcasts/linda.py
@@ -40,6 +40,8 @@
 import time
 import warnings
 
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
+
 try:
     import dask
 
@@ -47,28 +49,19 @@
 except ImportError:
     DASK_IMPORTED = False
 import numpy as np
+import xarray as xr
+from scipy import optimize as opt
+from scipy import stats
 from scipy.integrate import nquad
 from scipy.interpolate import interp1d
-from scipy import optimize as opt
 from scipy.signal import convolve
-from scipy import stats
 
 from pysteps import extrapolation, feature, noise
-from pysteps.decorators import deprecate_args
 from pysteps.nowcasts.utils import nowcast_main_loop
 
 
-@deprecate_args(
-    {
-        "precip_fields": "precip",
-        "advection_field": "velocity",
-        "num_ens_members": "n_ens_members",
-    },
-    "1.8.0",
-)
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     feature_method="blob",
     max_num_features=25,
@@ -100,15 +93,13 @@ def forecast(
 
     Parameters
     ----------
-    precip: array_like
-        Array of shape (ari_order + 2, m, n) containing the input rain rate
-        or reflectivity fields (in linear scale) ordered by timestamp from
-        oldest to newest. The time steps between the inputs are assumed to be
-        regular.
-    velocity: array_like
-        Array of shape (2, m, n) containing the x- and y-components of the
-        advection field. The velocities are assumed to represent one time step
-        between the inputs.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as either reflectivity values in the
+        ``reflectivity`` data variable (in linear scale) or rainrate in the ``precip_intensity``
+        data variable. The time dimension of the dataset has to be size
+        ``ari_order + 2`` and the precipitation variable has to have this dimension.
     timesteps: int
         Number of time steps to forecast.
     feature_method: {'blob', 'domain' 'shitomasi'}
@@ -202,16 +193,15 @@ def forecast(
 
     Returns
     -------
-    out: numpy.ndarray
-        A four-dimensional array of shape (n_ens_members, timesteps, m, n)
-        containing a time series of forecast precipitation fields for each
-        ensemble member. If add_perturbations is False, the first dimension is
-        dropped. The time series starts from t0 + timestep, where timestep is
-        taken from the input fields. If measure_time is True, the return value
-        is a three-element tuple containing the nowcast array, the initialization
-        time of the nowcast generator and the time used in the main loop
-        (seconds). If return_output is set to False, a single None value is
-        returned instead.
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields for each ensemble member. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     Notes
     -----
@@ -224,6 +214,10 @@ def forecast(
     variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many
     simultaneous threads.
     """
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps, ari_order)
 
     if feature_kwargs is None:
@@ -363,14 +357,21 @@ def forecast(
         callback,
     )
 
-    if return_output:
-        if measure_time:
-            return precip_forecast[0], init_time, precip_forecast[1]
-        else:
-            return precip_forecast
-    else:
+    if not return_output:
         return None
 
+    if measure_time:
+        precip_forecast, mainloop_time = precip_forecast
+
+    output_dataset = convert_output_to_xarray_dataset(
+        dataset, timesteps, precip_forecast
+    )
+
+    if measure_time:
+        return output_dataset, init_time, mainloop_time
+    else:
+        return output_dataset
+
 
 def _check_inputs(precip, velocity, timesteps, ari_order):
     if ari_order not in [1, 2]:
diff --git a/pysteps/nowcasts/sprog.py b/pysteps/nowcasts/sprog.py
index 86c840dcb..2ebcfde41 100644
--- a/pysteps/nowcasts/sprog.py
+++ b/pysteps/nowcasts/sprog.py
@@ -10,17 +10,17 @@
     forecast
 """
 
-import numpy as np
 import time
 
-from pysteps import cascade
-from pysteps import extrapolation
-from pysteps import utils
-from pysteps.decorators import deprecate_args
+import numpy as np
+import xarray as xr
+
+from pysteps import cascade, extrapolation, utils
 from pysteps.nowcasts import utils as nowcast_utils
+from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop
 from pysteps.postprocessing import probmatching
 from pysteps.timeseries import autoregression, correlation
-from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 try:
     import dask
@@ -30,10 +30,8 @@
     DASK_IMPORTED = False
 
 
-@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0")
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     precip_thr=None,
     n_cascade_levels=6,
@@ -55,15 +53,13 @@ def forecast(
 
     Parameters
     ----------
-    precip: array-like
-        Array of shape (ar_order+1,m,n) containing the input precipitation fields
-        ordered by timestamp from oldest to newest. The time steps between
-        the inputs are assumed to be regular.
-    velocity: array-like
-        Array of shape (2,m,n) containing the x- and y-components of the
-        advection field.
-        The velocities are assumed to represent one time step between the
-        inputs. All values are required to be finite.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any precipitation data variable.
+        The time dimension of the dataset has to be size
+        ``ar_order + 1`` and the precipitation variable has to have this dimension. All
+        velocity values are required to be finite.
     timesteps: int or list of floats
         Number of time steps to forecast or a list of time steps for which the
         forecasts are computed (relative to the input time step). The elements
@@ -120,13 +116,15 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        A three-dimensional array of shape (num_timesteps,m,n) containing a time
-        series of forecast precipitation fields. The time series starts from
-        t0+timestep, where timestep is taken from the input precipitation fields
-        precip. If measure_time is True, the return value is a three-element
-        tuple containing the nowcast array, the initialization time of the
-        nowcast generator and the time used in the main loop (seconds).
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
+        precipitation fields. Otherwise, a None value
+        is returned. The time series starts from t0+timestep, where timestep is
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     See also
     --------
@@ -137,6 +135,10 @@ def forecast(
     :cite:`Seed2003`, :cite:`PCH2019a`
     """
 
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps, ar_order)
 
     if extrap_kwargs is None:
@@ -327,8 +329,6 @@ def f(precip, i):
 
     print("Starting nowcast computation.")
 
-    precip_forecast = []
-
     state = {"precip_cascades": precip_cascades, "precip_decomp": precip_decomp}
     params = {
         "domain": domain,
@@ -358,12 +358,14 @@ def f(precip, i):
     if measure_time:
         precip_forecast, mainloop_time = precip_forecast
 
-    precip_forecast = np.stack(precip_forecast)
+    output_dataset = convert_output_to_xarray_dataset(
+        dataset, timesteps, precip_forecast
+    )
 
     if measure_time:
-        return precip_forecast, init_time, mainloop_time
+        return output_dataset, init_time, mainloop_time
     else:
-        return precip_forecast
+        return output_dataset
 
 
 def _check_inputs(precip, velocity, timesteps, ar_order):
diff --git a/pysteps/nowcasts/sseps.py b/pysteps/nowcasts/sseps.py
index a8848d3e3..1d083c04b 100644
--- a/pysteps/nowcasts/sseps.py
+++ b/pysteps/nowcasts/sseps.py
@@ -18,18 +18,17 @@
     forecast
 """
 
-import numpy as np
 import time
-from scipy.ndimage import generate_binary_structure, iterate_structure
 
+import numpy as np
+import xarray as xr
+from scipy.ndimage import generate_binary_structure, iterate_structure
 
-from pysteps import cascade
-from pysteps import extrapolation
-from pysteps import noise
-from pysteps.decorators import deprecate_args
+from pysteps import cascade, extrapolation, noise
 from pysteps.nowcasts import utils as nowcast_utils
 from pysteps.postprocessing import probmatching
 from pysteps.timeseries import autoregression, correlation
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 try:
     import dask
@@ -39,11 +38,8 @@
     dask_imported = False
 
 
-@deprecate_args({"R": "precip", "V": "velocity"}, "1.8.0")
 def forecast(
-    precip,
-    metadata,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     n_ens_members=24,
     n_cascade_levels=6,
@@ -78,18 +74,14 @@ def forecast(
 
     Parameters
     ----------
-    precip: array-like
-        Array of shape (ar_order+1,m,n) containing the input precipitation fields
-        ordered by timestamp from oldest to newest. The time steps between the inputs
-        are assumed to be regular, and the inputs are required to have finite values.
-    metadata: dict
-        Metadata dictionary containing the accutime, xpixelsize, threshold and
-        zerovalue attributes as described in the documentation of
-        :py:mod:`pysteps.io.importers`. xpixelsize is assumed to be in meters.
-    velocity: array-like
-        Array of shape (2,m,n) containing the x- and y-components of the advection
-        field. The velocities are assumed to represent one time step between the
-        inputs. All values are required to be finite.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any precipitation data variable.
+        The units and stepsize of ``y`` and ``x`` have to be the same and the only supported
+        units are meters and kilometers. The time dimension of the dataset has to be size
+        ``ar_order + 1`` and the precipitation variable has to have this dimension. All
+        velocity values are required to be finite.
     win_size: int or two-element sequence of ints
         Size-length of the localization window.
     overlap: float [0,1[
@@ -181,12 +173,15 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        If return_output is True, a four-dimensional array of shape
-        (n_ens_members,num_timesteps,m,n) containing a time series of forecast
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
         precipitation fields for each ensemble member. Otherwise, a None value
         is returned. The time series starts from t0+timestep, where timestep is
-        taken from the input precipitation fields.
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
+        initialization time of the nowcast generator and the time used in the
+        main loop (seconds).
 
     See also
     --------
@@ -201,7 +196,20 @@ def forecast(
     ----------
     :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`NBSG2017`
     """
-
+    timesteps_in = timesteps
+    x_units = dataset["x"].attrs["units"]
+    y_units = dataset["y"].attrs["units"]
+    x_stepsize = dataset["x"].attrs["stepsize"]
+    y_stepsize = dataset["y"].attrs["stepsize"]
+    if x_units != y_units or x_stepsize != y_stepsize:
+        raise ValueError("units and stepsize needs to be the same for x and y")
+    if x_units not in ["m", "km"]:
+        raise ValueError("only m and km supported as x and y units")
+
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps, ar_order)
 
     if extrap_kwargs is None:
@@ -237,8 +245,10 @@ def forecast(
     else:
         win_size = tuple([int(win_size[i]) for i in range(2)])
 
-    timestep = metadata["accutime"]
-    kmperpixel = metadata["xpixelsize"] / 1000
+    timestep = dataset["time"].attrs["stepsize"] / 60
+    kmperpixel = x_stepsize
+    if x_units == "m":
+        kmperpixel = kmperpixel / 1000
 
     print("Computing SSEPS nowcast")
     print("-----------------------")
@@ -292,8 +302,8 @@ def forecast(
             f"velocity perturbations, perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}"
         )
 
-    precip_thr = metadata["threshold"]
-    precip_min = metadata["zerovalue"]
+    precip_thr = dataset[precip_var].attrs["threshold"]
+    precip_min = dataset[precip_var].attrs["zerovalue"]
 
     num_ensemble_workers = n_ens_members if num_workers > n_ens_members else num_workers
 
@@ -911,10 +921,12 @@ def worker(j):
 
     if return_output:
         outarr = np.stack([np.stack(precip_forecast[j]) for j in range(n_ens_members)])
+        output_dataset = convert_output_to_xarray_dataset(dataset, timesteps_in, outarr)
+
         if measure_time:
-            return outarr, init_time, mainloop_time
+            return output_dataset, init_time, mainloop_time
         else:
-            return outarr
+            return output_dataset
     else:
         return None
 
diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py
index 50c6a5d22..366f32f6d 100644
--- a/pysteps/nowcasts/steps.py
+++ b/pysteps/nowcasts/steps.py
@@ -11,19 +11,18 @@
     forecast
 """
 
+import time
+
 import numpy as np
+import xarray as xr
 from scipy.ndimage import generate_binary_structure, iterate_structure
-import time
 
-from pysteps import cascade
-from pysteps import extrapolation
-from pysteps import noise
-from pysteps import utils
-from pysteps.decorators import deprecate_args
+from pysteps import cascade, extrapolation, noise, utils
 from pysteps.nowcasts import utils as nowcast_utils
+from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop
 from pysteps.postprocessing import probmatching
 from pysteps.timeseries import autoregression, correlation
-from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop
+from pysteps.xarray_helpers import convert_output_to_xarray_dataset
 
 try:
     import dask
@@ -33,10 +32,8 @@
     DASK_IMPORTED = False
 
 
-@deprecate_args({"R": "precip", "V": "velocity", "R_thr": "precip_thr"}, "1.8.0")
 def forecast(
-    precip,
-    velocity,
+    dataset: xr.Dataset,
     timesteps,
     n_ens_members=24,
     n_cascade_levels=6,
@@ -72,14 +69,13 @@ def forecast(
 
     Parameters
     ----------
-    precip: array-like
-        Array of shape (ar_order+1,m,n) containing the input precipitation fields
-        ordered by timestamp from oldest to newest. The time steps between the
-        inputs are assumed to be regular.
-    velocity: array-like
-        Array of shape (2,m,n) containing the x- and y-components of the advection
-        field. The velocities are assumed to represent one time step between the
-        inputs. All values are required to be finite.
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+        ``velocity_y`` data variables, as well as any precipitation data variable.
+        The time dimension of the dataset has to be size
+        ``ar_order + 1`` and the precipitation variable has to have this dimension. All
+        velocity values are required to be finite.
     timesteps: int or list of floats
         Number of time steps to forecast or a list of time steps for which the
         forecasts are computed (relative to the input time step). The elements
@@ -241,13 +237,13 @@ def forecast(
 
     Returns
     -------
-    out: ndarray
-        If return_output is True, a four-dimensional array of shape
-        (n_ens_members,num_timesteps,m,n) containing a time series of forecast
+    out: xarray.Dataset
+        If return_output is True, a dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
         precipitation fields for each ensemble member. Otherwise, a None value
         is returned. The time series starts from t0+timestep, where timestep is
-        taken from the input precipitation fields. If measure_time is True, the
-        return value is a three-element tuple containing the nowcast array, the
+        taken from the metadata of the time coordinate. If measure_time is True, the
+        return value is a three-element tuple containing the nowcast dataset, the
         initialization time of the nowcast generator and the time used in the
         main loop (seconds).
 
@@ -261,6 +257,11 @@ def forecast(
     :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b`
     """
 
+    timesteps_in = timesteps
+    dataset = dataset.copy(deep=True)
+    precip_var = dataset.attrs["precip_var"]
+    precip = dataset[precip_var].values
+    velocity = np.stack([dataset["velocity_x"], dataset["velocity_y"]])
     _check_inputs(precip, velocity, timesteps, ar_order)
 
     if extrap_kwargs is None:
@@ -574,8 +575,6 @@ def f(precip, i):
     else:
         velocity_perturbators = None
 
-    precip_forecast = [[] for _ in range(n_ens_members)]
-
     if probmatching_method == "mean":
         mu_0 = np.mean(precip[-1, :, :][precip[-1, :, :] >= precip_thr])
     else:
@@ -680,13 +679,13 @@ def f(precip, i):
         precip_forecast, mainloop_time = precip_forecast
 
     if return_output:
-        precip_forecast = np.stack(
-            [np.stack(precip_forecast[j]) for j in range(n_ens_members)]
+        output_dataset = convert_output_to_xarray_dataset(
+            dataset, timesteps_in, precip_forecast
         )
         if measure_time:
-            return precip_forecast, init_time, mainloop_time
+            return output_dataset, init_time, mainloop_time
         else:
-            return precip_forecast
+            return output_dataset
     else:
         return None
 
diff --git a/pysteps/nowcasts/utils.py b/pysteps/nowcasts/utils.py
index fd111e28d..fed1c2f96 100644
--- a/pysteps/nowcasts/utils.py
+++ b/pysteps/nowcasts/utils.py
@@ -17,6 +17,7 @@
 """
 
 import time
+
 import numpy as np
 from scipy.ndimage import binary_dilation, generate_binary_structure
 
@@ -412,10 +413,10 @@ def worker2(i):
         if not ensemble:
             precip_forecast_out = precip_forecast_out[0, :]
 
-    if measure_time:
-        return precip_forecast_out, time.time() - starttime_total
-    else:
-        return precip_forecast_out
+        if measure_time:
+            return precip_forecast_out, time.time() - starttime_total
+        else:
+            return precip_forecast_out
 
 
 def print_ar_params(phi):
diff --git a/pysteps/tests/test_motion_lk.py b/pysteps/tests/test_motion_lk.py
index 871dcd98b..a8f640533 100644
--- a/pysteps/tests/test_motion_lk.py
+++ b/pysteps/tests/test_motion_lk.py
@@ -3,8 +3,8 @@
 """
 """
 
-import pytest
 import numpy as np
+import pytest
 
 from pysteps import motion, verification
 from pysteps.tests.helpers import get_precipitation_fields
@@ -61,19 +61,19 @@ def test_lk(
         pytest.importorskip("pandas")
 
     # inputs
-    precip, metadata = get_precipitation_fields(
+    dataset = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
-    precip = precip.filled()
+    precip_var = dataset.attrs["precip_var"]
 
     # Retrieve motion field
     oflow_method = motion.get_method("LK")
-    output = oflow_method(
-        precip,
+    output_dataset = oflow_method(
+        dataset,
         lk_kwargs=lk_kwargs,
         fd_method=fd_method,
         dense=dense,
@@ -86,13 +86,17 @@ def test_lk(
 
     # Check format of ouput
     if dense:
+        output = np.stack(
+            [output_dataset["velocity_x"].values, output_dataset["velocity_y"].values]
+        )
         assert isinstance(output, np.ndarray)
         assert output.ndim == 3
         assert output.shape[0] == 2
-        assert output.shape[1:] == precip[0].shape
+        assert output.shape[1:] == dataset[precip_var].values[0].shape
         if nr_std_outlier == 0:
             assert output.sum() == 0
     else:
+        output = output_dataset
         assert isinstance(output, tuple)
         assert len(output) == 2
         assert isinstance(output[0], np.ndarray)
diff --git a/pysteps/tests/test_nowcasts_anvil.py b/pysteps/tests/test_nowcasts_anvil.py
index 14a130fb1..35d84e0f3 100644
--- a/pysteps/tests/test_nowcasts_anvil.py
+++ b/pysteps/tests/test_nowcasts_anvil.py
@@ -31,31 +31,28 @@ def test_anvil_rainrate(
 ):
     """Tests ANVIL nowcast using rain rate precipitation fields."""
     # inputs
-    precip_input = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=4,
         num_next_files=0,
         return_raw=False,
         metadata=False,
         upscale=2000,
     )
-    precip_input = precip_input.filled()
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
-    )[1:, :, :]
-    precip_obs = precip_obs.filled()
+    ).isel(time=slice(1, None, None))
+    precip_var = dataset_input.attrs["precip_var"]
 
     pytest.importorskip("cv2")
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
     nowcast_method = nowcasts.get_method("anvil")
 
     output = nowcast_method(
-        precip_input[-(ar_order + 2) :],
-        retrieved_motion,
+        dataset_w_motion.isel(time=slice(-(ar_order + 2), None, None)),
         timesteps=timesteps,
-        rainrate=None,  # no R(VIL) conversion is done
         n_cascade_levels=n_cascade_levels,
         ar_order=ar_order,
         ar_window_radius=ar_window_radius,
@@ -63,9 +60,10 @@ def test_anvil_rainrate(
         measure_time=measure_time,
     )
     if measure_time:
-        precip_forecast, __, __ = output
+        dataset_forecast, __, __ = output
     else:
-        precip_forecast = output
+        dataset_forecast = output
+    precip_forecast = dataset_forecast[precip_var].values
 
     assert precip_forecast.ndim == 3
     assert precip_forecast.shape[0] == (
@@ -73,7 +71,7 @@ def test_anvil_rainrate(
     )
 
     result = verification.det_cat_fct(
-        precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI"
+        precip_forecast[-1], dataset_obs[precip_var].values[-1], thr=0.1, scores="CSI"
     )["CSI"]
     assert result > min_csi, f"CSI={result:.2f}, required > {min_csi:.2f}"
 
diff --git a/pysteps/tests/test_nowcasts_lagrangian_probability.py b/pysteps/tests/test_nowcasts_lagrangian_probability.py
index 1ec352b0b..d75b29e87 100644
--- a/pysteps/tests/test_nowcasts_lagrangian_probability.py
+++ b/pysteps/tests/test_nowcasts_lagrangian_probability.py
@@ -1,10 +1,13 @@
 # -*- coding: utf-8 -*-
+from datetime import datetime, timezone
+
 import numpy as np
 import pytest
+import xarray as xr
 
+from pysteps.motion.lucaskanade import dense_lucaskanade
 from pysteps.nowcasts.lagrangian_probability import forecast
 from pysteps.tests.helpers import get_precipitation_fields
-from pysteps.motion.lucaskanade import dense_lucaskanade
 
 
 def test_numerical_example():
@@ -12,12 +15,23 @@ def test_numerical_example():
     precip = np.zeros((20, 20))
     precip[5:10, 5:10] = 1
     velocity = np.zeros((2, *precip.shape))
+    now = datetime.now(tz=timezone.utc).replace(tzinfo=None)
+    dataset_input = xr.Dataset(
+        data_vars={
+            "precip_intensity": (["time", "y", "x"], [precip]),
+            "velocity_x": (["y", "x"], velocity[0]),
+            "velocity_y": (["y", "x"], velocity[1]),
+        },
+        coords={"time": (["time"], [now], {"stepsize": 300})},
+        attrs={"precip_var": "precip_intensity"},
+    )
     timesteps = 4
     thr = 0.5
     slope = 1  # pixels / timestep
 
     # compute probability forecast
-    fct = forecast(precip, velocity, timesteps, thr, slope=slope)
+    dataset_forecast = forecast(dataset_input, timesteps, thr, slope=slope)
+    fct = dataset_forecast["precip_intensity"].values
 
     assert fct.ndim == 3
     assert fct.shape[0] == timesteps
@@ -26,7 +40,8 @@ def test_numerical_example():
     assert fct.min() >= 0.0
 
     # slope = 0 should return a binary field
-    fct = forecast(precip, velocity, timesteps, thr, slope=0)
+    dataset_forecast = forecast(dataset_input, timesteps, thr, slope=0)
+    fct = dataset_forecast["precip_intensity"].values
     ref = (np.repeat(precip[None, ...], timesteps, axis=0) >= thr).astype(float)
     assert np.allclose(fct, fct.astype(bool))
     assert np.allclose(fct, ref)
@@ -37,12 +52,23 @@ def test_numerical_example_with_float_slope_and_float_list_timesteps():
     precip = np.zeros((20, 20))
     precip[5:10, 5:10] = 1
     velocity = np.zeros((2, *precip.shape))
+    now = datetime.now(tz=timezone.utc).replace(tzinfo=None)
+    dataset_input = xr.Dataset(
+        data_vars={
+            "precip_intensity": (["time", "y", "x"], [precip]),
+            "velocity_x": (["y", "x"], velocity[0]),
+            "velocity_y": (["y", "x"], velocity[1]),
+        },
+        coords={"time": (["time"], [now], {"stepsize": 300})},
+        attrs={"precip_var": "precip_intensity"},
+    )
     timesteps = [1.0, 2.0, 5.0, 12.0]
     thr = 0.5
     slope = 1.0  # pixels / timestep
 
     # compute probability forecast
-    fct = forecast(precip, velocity, timesteps, thr, slope=slope)
+    dataset_forecast = forecast(dataset_input, timesteps, thr, slope=slope)
+    fct = dataset_forecast["precip_intensity"].values
 
     assert fct.ndim == 3
     assert fct.shape[0] == len(timesteps)
@@ -56,16 +82,18 @@ def test_real_case():
     pytest.importorskip("cv2")
 
     # inputs
-    precip, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
+    precip_var = dataset_input.attrs["precip_var"]
+    metadata = dataset_input[precip_var].attrs
 
     # motion
-    motion = dense_lucaskanade(precip)
+    dataset_w_motion = dense_lucaskanade(dataset_input)
 
     # parameters
     timesteps = [1, 2, 3]
@@ -74,13 +102,18 @@ def test_real_case():
 
     # compute probability forecast
     extrap_kwargs = dict(allow_nonfinite_values=True)
-    fct = forecast(
-        precip[-1], motion, timesteps, thr, slope=slope, extrap_kwargs=extrap_kwargs
+    dataset_forecast = forecast(
+        dataset_w_motion.isel(time=slice(-1, None, None)),
+        timesteps,
+        thr,
+        slope=slope,
+        extrap_kwargs=extrap_kwargs,
     )
+    fct = dataset_forecast["precip_intensity"].values
 
     assert fct.ndim == 3
     assert fct.shape[0] == len(timesteps)
-    assert fct.shape[1:] == precip.shape[1:]
+    assert fct.shape[1:] == dataset_input[precip_var].values.shape[1:]
     assert np.nanmax(fct) <= 1.0
     assert np.nanmin(fct) >= 0.0
 
@@ -89,11 +122,19 @@ def test_wrong_inputs():
     # dummy inputs
     precip = np.zeros((3, 3))
     velocity = np.zeros((2, *precip.shape))
+    dataset_input = xr.Dataset(
+        data_vars={
+            "precip_intensity": (["y", "x"], precip),
+            "velocity_x": (["y", "x"], velocity[0]),
+            "velocity_y": (["y", "x"], velocity[1]),
+        },
+        attrs={"precip_var": "precip_intensity"},
+    )
 
     # timesteps must be > 0
     with pytest.raises(ValueError):
-        forecast(precip, velocity, 0, 1)
+        forecast(dataset_input, 0, 1)
 
     # timesteps must be a sorted list
     with pytest.raises(ValueError):
-        forecast(precip, velocity, [2, 1], 1)
+        forecast(dataset_input, [2, 1], 1)
diff --git a/pysteps/tests/test_nowcasts_linda.py b/pysteps/tests/test_nowcasts_linda.py
index 2d5f03b71..a5b60611b 100644
--- a/pysteps/tests/test_nowcasts_linda.py
+++ b/pysteps/tests/test_nowcasts_linda.py
@@ -1,13 +1,13 @@
-from datetime import timedelta
 import os
+from datetime import timedelta
+
 import numpy as np
 import pytest
+import xarray as xr
 
 from pysteps import io, motion, nowcasts, verification
-from pysteps.nowcasts.linda import forecast
 from pysteps.tests.helpers import get_precipitation_fields
 
-
 linda_arg_names = (
     "add_perturbations",
     "kernel_type",
@@ -42,7 +42,7 @@ def test_linda(
     pytest.importorskip("skimage")
 
     # inputs
-    precip_input, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         metadata=True,
@@ -51,20 +51,23 @@ def test_linda(
         log_transform=False,
     )
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0,
         num_next_files=3,
         clip=(354000, 866000, -96000, 416000),
         upscale=4000,
         log_transform=False,
-    )[1:, :, :]
+    ).isel(time=slice(1, None, None))
+    precip_var = dataset_input.attrs["precip_var"]
+    metadata = dataset_input[precip_var].attrs
 
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
-    precip_forecast = forecast(
-        precip_input,
-        retrieved_motion,
+    nowcast_method = nowcasts.get_method("linda")
+
+    dataset_forecast = nowcast_method(
+        dataset_w_motion,
         3,
         kernel_type=kernel_type,
         vel_pert_method=vel_pert_method,
@@ -78,68 +81,82 @@ def test_linda(
         seed=42,
     )
     if measure_time:
-        assert len(precip_forecast) == 3
-        assert isinstance(precip_forecast[1], float)
-        precip_forecast = precip_forecast[0]
+        assert len(dataset_forecast) == 3
+        assert isinstance(dataset_forecast[1], float)
+        dataset_forecast = dataset_forecast[0]
+
+    precip_forecast = dataset_forecast[precip_var].values
 
     if not add_perturbations:
         assert precip_forecast.ndim == 3
         assert precip_forecast.shape[0] == 3
-        assert precip_forecast.shape[1:] == precip_input.shape[1:]
+        assert precip_forecast.shape[1:] == dataset_input[precip_var].values.shape[1:]
 
         csi = verification.det_cat_fct(
-            precip_forecast[-1], precip_obs[-1], thr=1.0, scores="CSI"
+            precip_forecast[-1],
+            dataset_obs[precip_var].values[-1],
+            thr=1.0,
+            scores="CSI",
         )["CSI"]
         assert csi > min_csi, f"CSI={csi:.1f}, required > {min_csi:.1f}"
     else:
         assert precip_forecast.ndim == 4
         assert precip_forecast.shape[0] == 5
         assert precip_forecast.shape[1] == 3
-        assert precip_forecast.shape[2:] == precip_input.shape[1:]
+        assert precip_forecast.shape[2:] == dataset_input[precip_var].values.shape[1:]
 
-        crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1])
+        crps = verification.probscores.CRPS(
+            precip_forecast[:, -1], dataset_obs[precip_var].values[-1]
+        )
         assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}"
 
 
 def test_linda_wrong_inputs():
     # dummy inputs
-    precip = np.zeros((3, 3, 3))
-    velocity = np.zeros((2, 3, 3))
+    dataset_input = xr.Dataset(
+        data_vars={
+            "precip_intensity": (["time", "y", "x"], np.zeros((3, 3, 3))),
+            "velocity_x": (["y", "x"], np.zeros((3, 3))),
+            "velocity_y": (["y", "x"], np.zeros((3, 3))),
+        },
+        attrs={"precip_var": "precip_intensity"},
+    )
+    dataset_input_4d = xr.Dataset(
+        data_vars={
+            "precip_intensity": (
+                ["ens_number", "time", "y", "x"],
+                np.zeros((3, 3, 3, 3)),
+            ),
+            "velocity_x": (["y", "x"], np.zeros((3, 3))),
+            "velocity_y": (["y", "x"], np.zeros((3, 3))),
+        },
+        attrs={"precip_var": "precip_intensity"},
+    )
+
+    nowcast_method = nowcasts.get_method("linda")
 
     # vel_pert_method is set but kmperpixel is None
     with pytest.raises(ValueError):
-        forecast(precip, velocity, 1, vel_pert_method="bps", kmperpixel=None)
+        nowcast_method(dataset_input, 1, vel_pert_method="bps", kmperpixel=None)
 
     # vel_pert_method is set but timestep is None
     with pytest.raises(ValueError):
-        forecast(
-            precip, velocity, 1, vel_pert_method="bps", kmperpixel=1, timestep=None
+        nowcast_method(
+            dataset_input, 1, vel_pert_method="bps", kmperpixel=1, timestep=None
         )
 
     # fractional time steps not yet implemented
     # timesteps is not an integer
     with pytest.raises(ValueError):
-        forecast(precip, velocity, [1.0, 2.0])
+        nowcast_method(dataset_input, [1.0, 2.0])
 
     # ari_order 1 or 2 required
     with pytest.raises(ValueError):
-        forecast(precip, velocity, 1, ari_order=3)
+        nowcast_method(dataset_input, 1, ari_order=3)
 
     # precip_fields must be a three-dimensional array
     with pytest.raises(ValueError):
-        forecast(np.zeros((3, 3, 3, 3)), velocity, 1)
-
-    # precip_fields.shape[0] < ari_order+2
-    with pytest.raises(ValueError):
-        forecast(np.zeros((2, 3, 3)), velocity, 1, ari_order=1)
-
-    # advection_field must be a three-dimensional array
-    with pytest.raises(ValueError):
-        forecast(precip, velocity[0], 1)
-
-    # dimension mismatch between precip_fields and advection_field
-    with pytest.raises(ValueError):
-        forecast(np.zeros((3, 2, 3)), velocity, 1)
+        nowcast_method(dataset_input_4d, 1)
 
 
 def test_linda_callback(tmp_path):
diff --git a/pysteps/tests/test_nowcasts_sprog.py b/pysteps/tests/test_nowcasts_sprog.py
index 1077c3edd..f64900cd8 100644
--- a/pysteps/tests/test_nowcasts_sprog.py
+++ b/pysteps/tests/test_nowcasts_sprog.py
@@ -30,29 +30,28 @@ def test_sprog(
 ):
     """Tests SPROG nowcast."""
     # inputs
-    precip_input, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
-    precip_input = precip_input.filled()
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
-    )[1:, :, :]
-    precip_obs = precip_obs.filled()
+    ).isel(time=slice(1, None, None))
+    precip_var = dataset_input.attrs["precip_var"]
+    metadata = dataset_input[precip_var].attrs
 
     pytest.importorskip("cv2")
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
     nowcast_method = nowcasts.get_method("sprog")
 
-    precip_forecast = nowcast_method(
-        precip_input,
-        retrieved_motion,
+    dataset_forecast = nowcast_method(
+        dataset_w_motion,
         timesteps=timesteps,
         precip_thr=metadata["threshold"],
         n_cascade_levels=n_cascade_levels,
@@ -60,6 +59,7 @@ def test_sprog(
         probmatching_method=probmatching_method,
         domain=domain,
     )
+    precip_forecast = dataset_forecast[precip_var].values
 
     assert precip_forecast.ndim == 3
     assert precip_forecast.shape[0] == (
@@ -67,7 +67,7 @@ def test_sprog(
     )
 
     result = verification.det_cat_fct(
-        precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI"
+        precip_forecast[-1], dataset_obs[precip_var].values[-1], thr=0.1, scores="CSI"
     )["CSI"]
     assert result > min_csi, f"CSI={result:.1f}, required > {min_csi:.1f}"
 
diff --git a/pysteps/tests/test_nowcasts_sseps.py b/pysteps/tests/test_nowcasts_sseps.py
index 4d89fd33a..6d3a3c9c0 100644
--- a/pysteps/tests/test_nowcasts_sseps.py
+++ b/pysteps/tests/test_nowcasts_sseps.py
@@ -17,8 +17,8 @@
 )
 
 sseps_arg_values = [
-    (5, 6, 2, "incremental", "cdf", 200, 3, 0.60),
-    (5, 6, 2, "incremental", "cdf", 200, [3], 0.60),
+    (5, 6, 2, "incremental", "cdf", 200, 3, 0.62),
+    (5, 6, 2, "incremental", "cdf", 200, [3], 0.62),
 ]
 
 
@@ -35,32 +35,29 @@ def test_sseps(
 ):
     """Tests SSEPS nowcast."""
     # inputs
-    precip_input, metadata = get_precipitation_fields(
+    dataset_input = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=True,
         upscale=2000,
     )
-    precip_input = precip_input.filled()
+    precip_var = dataset_input.attrs["precip_var"]
 
-    precip_obs = get_precipitation_fields(
+    dataset_obs = get_precipitation_fields(
         num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
-    )[1:, :, :]
-    precip_obs = precip_obs.filled()
+    ).isel(time=slice(1, None, None))
 
     pytest.importorskip("cv2")
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_input)
+    dataset_w_motion = oflow_method(dataset_input)
 
     nowcast_method = nowcasts.get_method("sseps")
 
-    precip_forecast = nowcast_method(
-        precip_input,
-        metadata,
-        retrieved_motion,
+    dataset_forecast = nowcast_method(
+        dataset_w_motion,
+        timesteps,
         win_size=win_size,
-        timesteps=timesteps,
         n_ens_members=n_ens_members,
         n_cascade_levels=n_cascade_levels,
         ar_order=ar_order,
@@ -68,6 +65,7 @@ def test_sseps(
         mask_method=mask_method,
         probmatching_method=probmatching_method,
     )
+    precip_forecast = dataset_forecast[precip_var].values
 
     assert precip_forecast.ndim == 4
     assert precip_forecast.shape[0] == n_ens_members
@@ -75,7 +73,9 @@ def test_sseps(
         timesteps if isinstance(timesteps, int) else len(timesteps)
     )
 
-    crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1])
+    crps = verification.probscores.CRPS(
+        precip_forecast[:, -1], dataset_obs[precip_var].values[-1]
+    )
     assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}"
 
 
diff --git a/pysteps/tests/test_nowcasts_steps.py b/pysteps/tests/test_nowcasts_steps.py
index adb6ea917..16d2e4de5 100644
--- a/pysteps/tests/test_nowcasts_steps.py
+++ b/pysteps/tests/test_nowcasts_steps.py
@@ -56,17 +56,15 @@ def test_steps_skill(
     ).isel(time=slice(1, None, None))
     precip_var = dataset_input.attrs["precip_var"]
     metadata = dataset_input[precip_var].attrs
-    precip_data = dataset_input[precip_var].values
 
     pytest.importorskip("cv2")
     oflow_method = motion.get_method("LK")
-    retrieved_motion = oflow_method(precip_data)
+    dataset_w_motion = oflow_method(dataset_input)
 
     nowcast_method = nowcasts.get_method("steps")
 
-    precip_forecast = nowcast_method(
-        precip_data,
-        retrieved_motion,
+    dataset_forecast = nowcast_method(
+        dataset_w_motion,
         timesteps=timesteps,
         precip_thr=metadata["threshold"],
         kmperpixel=2.0,
@@ -79,6 +77,7 @@ def test_steps_skill(
         probmatching_method=probmatching_method,
         domain=domain,
     )
+    precip_forecast = dataset_forecast[precip_var].values
 
     assert precip_forecast.ndim == 4
     assert precip_forecast.shape[0] == n_ens_members
diff --git a/pysteps/tests/test_nowcasts_utils.py b/pysteps/tests/test_nowcasts_utils.py
index 075225427..1dfeb27a9 100644
--- a/pysteps/tests/test_nowcasts_utils.py
+++ b/pysteps/tests/test_nowcasts_utils.py
@@ -26,17 +26,18 @@ def test_nowcast_main_loop(
     timesteps, ensemble, num_ensemble_members, velocity_perturbations
 ):
     """Test the nowcast_main_loop function."""
-    precip = get_precipitation_fields(
+    dataset = get_precipitation_fields(
         num_prev_files=2,
         num_next_files=0,
         return_raw=False,
         metadata=False,
         upscale=2000,
     )
-    precip = precip.filled()
 
     oflow_method = motion.get_method("LK")
-    velocity = oflow_method(precip)
+    dataset = oflow_method(dataset)
+    precip = dataset["precip_intensity"].values
+    velocity = np.stack([dataset["velocity_x"].values, dataset["velocity_y"].values])
 
     precip = precip[-1]
 
diff --git a/pysteps/tests/test_utils_dimension.py b/pysteps/tests/test_utils_dimension.py
index 2bbb63f58..038b725a0 100644
--- a/pysteps/tests/test_utils_dimension.py
+++ b/pysteps/tests/test_utils_dimension.py
@@ -5,19 +5,17 @@
 import numpy as np
 import pytest
 import xarray as xr
-from numpy.testing import assert_array_equal
+from numpy.testing import assert_array_almost_equal, assert_array_equal
 from pytest import raises
 
-from pysteps.converters import convert_to_xarray_dataset
 from pysteps.utils import dimension
+from pysteps.xarray_helpers import convert_input_to_xarray_dataset
 
 fillvalues_metadata = {
     "x1": 0,
     "x2": 4,
     "y1": 0,
     "y2": 4,
-    "xpixelsize": 1,
-    "ypixelsize": 1,
     "zerovalue": 0,
     "yorigin": "lower",
     "unit": "mm/h",
@@ -32,7 +30,6 @@
 }
 
 test_data_not_trim = (
-    # "data, window_size, axis, method, expected"
     (
         np.arange(12).reshape(2, 6),
         2,
@@ -94,7 +91,7 @@ def test_aggregate_fields(data, window_size, dim, method, expected):
     windows size does not divide the data dimensions.
     The length of each dimension should be larger than 2.
     """
-    dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata)
+    dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata)
 
     actual = dimension.aggregate_fields(dataset, window_size, dim=dim, method=method)
     assert_array_equal(actual["precip_intensity"].values, expected)
@@ -104,7 +101,7 @@ def test_aggregate_fields(data, window_size, dim, method, expected):
         data = np.pad(data, ((0, 0), (0, 1)))
     else:
         data = np.pad(data, (0, 1))
-    dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata)
+    dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata)
 
     actual = dimension.aggregate_fields(
         dataset, window_size, dim=dim, method=method, trim=True
@@ -115,13 +112,85 @@ def test_aggregate_fields(data, window_size, dim, method, expected):
         dimension.aggregate_fields(dataset, window_size, dim=dim, method=method)
 
 
+test_data_agg_w_velocity = (
+    (
+        np.arange(12).reshape(2, 6),
+        np.arange(12).reshape(2, 6),
+        np.arange(12).reshape(2, 6),
+        np.arange(0, 1.2, 0.1).reshape(2, 6),
+        2,
+        "x",
+        "mean",
+        "mean",
+        np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]),
+        np.array([[0.5, 2.5, 4.5], [6.5, 8.5, 10.5]]),
+        np.array([[0, 0.2, 0.4], [0.6, 0.8, 1]]),
+    ),
+    (
+        np.arange(4 * 6).reshape(4, 6),
+        np.arange(4 * 6).reshape(4, 6),
+        np.arange(4 * 6).reshape(4, 6),
+        np.arange(0, 1.2, 0.05).reshape(4, 6),
+        (2, 3),
+        ("y", "x"),
+        "mean",
+        "sum",
+        np.array([[4, 7], [16, 19]]),
+        np.array([[24, 42], [96, 114]]),
+        np.array([[0, 0.15], [0.6, 0.75]]),
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    "data, data_vx, data_vy, data_qual, window_size, dim, method, velocity_method, expected, expected_v, expected_qual",
+    test_data_agg_w_velocity,
+)
+def test_aggregate_fields_w_velocity(
+    data,
+    data_vx,
+    data_vy,
+    data_qual,
+    window_size,
+    dim,
+    method,
+    velocity_method,
+    expected,
+    expected_v,
+    expected_qual,
+):
+    """
+    Test the aggregate_fields function for dataset with velocity information.
+    The windows size must divide exactly the data dimensions.
+    Internally, additional test are generated for situations where the
+    windows size does not divide the data dimensions.
+    The length of each dimension should be larger than 2.
+    """
+    dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata)
+    dataset = dataset.assign(
+        {
+            "velocity_x": (("y", "x"), data_vx),
+            "velocity_y": (("y", "x"), data_vy),
+            "quality": (("y", "x"), data_qual),
+        }
+    )
+
+    actual = dimension.aggregate_fields(
+        dataset, window_size, dim=dim, method=method, velocity_method=velocity_method
+    )
+    assert_array_equal(actual["precip_intensity"].values, expected)
+    assert_array_equal(actual["velocity_x"].values, expected_v)
+    assert_array_equal(actual["velocity_y"].values, expected_v)
+    assert_array_almost_equal(actual["quality"].values, expected_qual)
+
+
 def test_aggregate_fields_errors():
     """
     Test that the errors are correctly captured in the aggregate_fields
     function.
     """
     data = np.arange(4 * 6).reshape(4, 6)
-    dataset = convert_to_xarray_dataset(data, None, fillvalues_metadata)
+    dataset = convert_input_to_xarray_dataset(data, None, fillvalues_metadata)
 
     with raises(ValueError):
         dimension.aggregate_fields(dataset, -1, dim="y")
@@ -160,7 +229,7 @@ def test_aggregate_fields_errors():
 )
 def test_aggregate_fields_time(data, metadata, time_window_min, ignore_nan, expected):
     """Test the aggregate_fields_time."""
-    dataset_ref = convert_to_xarray_dataset(
+    dataset_ref = convert_input_to_xarray_dataset(
         data, None, {**fillvalues_metadata, **metadata}
     )
     datasets = []
@@ -234,7 +303,9 @@ def test_aggregate_fields_time(data, metadata, time_window_min, ignore_nan, expe
 )
 def test_aggregate_fields_space(data, metadata, space_window, ignore_nan, expected):
     """Test the aggregate_fields_space."""
-    dataset = convert_to_xarray_dataset(data, None, {**fillvalues_metadata, **metadata})
+    dataset = convert_input_to_xarray_dataset(
+        data, None, {**fillvalues_metadata, **metadata}
+    )
     assert_array_equal(
         dimension.aggregate_fields_space(dataset, space_window, ignore_nan)[
             "precip_intensity" if metadata["unit"] == "mm/h" else "precip_accum"
@@ -271,7 +342,9 @@ def test_aggregate_fields_space(data, metadata, space_window, ignore_nan, expect
 @pytest.mark.parametrize("R, metadata, extent, expected", test_data_clip_domain)
 def test_clip_domain(R, metadata, extent, expected):
     """Test the clip_domain."""
-    dataset = convert_to_xarray_dataset(R, None, {**fillvalues_metadata, **metadata})
+    dataset = convert_input_to_xarray_dataset(
+        R, None, {**fillvalues_metadata, **metadata}
+    )
     assert_array_equal(
         dimension.clip_domain(dataset, extent)["precip_intensity"].values, expected
     )
@@ -336,12 +409,67 @@ def test_clip_domain(R, metadata, extent, expected):
 @pytest.mark.parametrize("data, metadata, method, inverse, expected", test_data_square)
 def test_square_domain(data, metadata, method, inverse, expected):
     """Test the square_domain."""
-    dataset = convert_to_xarray_dataset(data, None, {**fillvalues_metadata, **metadata})
-    dataset["precip_intensity"].attrs = {
-        **dataset["precip_intensity"].attrs,
-        **metadata,
-    }
+    dataset = convert_input_to_xarray_dataset(
+        data, None, {**fillvalues_metadata, **metadata}
+    )
+    if "square_method" in metadata:
+        dataset.attrs["square_method"] = metadata["square_method"]
+    if "orig_domain" in metadata:
+        dataset.attrs["orig_domain"] = metadata["orig_domain"]
     assert_array_equal(
         dimension.square_domain(dataset, method, inverse)["precip_intensity"].values,
         expected,
     )
+
+
+# square_domain
+R = np.ones((4, 2))
+test_data_square_w_velocity = [
+    # square by padding
+    (
+        R,
+        {"x1": 0, "x2": 2, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1},
+        "pad",
+        False,
+        np.array([[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]),
+        np.array([[0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 1, 1, 0]]),
+    )
+]
+
+
+@pytest.mark.parametrize(
+    "data, metadata, method, inverse, expected, expected_velqual",
+    test_data_square_w_velocity,
+)
+def test_square_w_velocity(data, metadata, method, inverse, expected, expected_velqual):
+    """Test the square_domain."""
+    dataset = convert_input_to_xarray_dataset(
+        data, None, {**fillvalues_metadata, **metadata}
+    )
+    dataset = dataset.assign(
+        {
+            "velocity_x": (("y", "x"), data),
+            "velocity_y": (("y", "x"), data),
+            "quality": (("y", "x"), data),
+        }
+    )
+    if "square_method" in metadata:
+        dataset.attrs["square_method"] = metadata["square_method"]
+    if "orig_domain" in metadata:
+        dataset.attrs["orig_domain"] = metadata["orig_domain"]
+    assert_array_equal(
+        dimension.square_domain(dataset, method, inverse)["precip_intensity"].values,
+        expected,
+    )
+    assert_array_equal(
+        dimension.square_domain(dataset, method, inverse)["velocity_x"].values,
+        expected_velqual,
+    )
+    assert_array_equal(
+        dimension.square_domain(dataset, method, inverse)["velocity_y"].values,
+        expected_velqual,
+    )
+    assert_array_equal(
+        dimension.square_domain(dataset, method, inverse)["quality"].values,
+        expected_velqual,
+    )
diff --git a/pysteps/utils/dimension.py b/pysteps/utils/dimension.py
index efa459610..d039d8ef0 100644
--- a/pysteps/utils/dimension.py
+++ b/pysteps/utils/dimension.py
@@ -14,14 +14,23 @@
     clip_domain
     square_domain
 """
+from typing import Any, Callable
+
 import numpy as np
 import xarray as xr
 
-from pysteps.converters import compute_lat_lon
+from pysteps.xarray_helpers import compute_lat_lon
 
-_aggregation_methods = dict(
-    sum=np.sum, mean=np.mean, nanmean=np.nanmean, nansum=np.nansum
-)
+_aggregation_methods: dict[str, Callable[..., Any]] = {
+    "sum": np.sum,
+    "mean": np.mean,
+    "min": np.min,
+    "max": np.max,
+    "nanmean": np.nanmean,
+    "nansum": np.nansum,
+    "nanmin": np.nanmin,
+    "nanmax": np.nanmax,
+}
 
 
 def aggregate_fields_time(
@@ -93,7 +102,9 @@ def aggregate_fields_time(
     if ignore_nan:
         method = "".join(("nan", method))
 
-    return aggregate_fields(dataset, window_size, dim="time", method=method)
+    return aggregate_fields(
+        dataset, window_size, dim="time", method=method, velocity_method="sum"
+    )
 
 
 def aggregate_fields_space(
@@ -147,9 +158,8 @@ def aggregate_fields_space(
     if np.isscalar(space_window):
         space_window = (space_window, space_window)
 
-    # assumes that frames are evenly spaced
-    ydelta = dataset["y"].values[1] - dataset["y"].values[0]
-    xdelta = dataset["x"].values[1] - dataset["x"].values[0]
+    ydelta = dataset["y"].attrs["stepsize"]
+    xdelta = dataset["x"].attrs["stepsize"]
 
     if space_window[0] % ydelta > 1e-10 or space_window[1] % xdelta > 1e-10:
         raise ValueError("space_window does not equally split dataset")
@@ -166,11 +176,16 @@ def aggregate_fields_space(
 
     window_size = (int(space_window[0] / ydelta), int(space_window[1] / xdelta))
 
-    return aggregate_fields(dataset, window_size, ["y", "x"], method)
+    return aggregate_fields(dataset, window_size, ["y", "x"], method, "mean")
 
 
 def aggregate_fields(
-    dataset: xr.Dataset, window_size, dim="x", method="mean", trim=False
+    dataset: xr.Dataset,
+    window_size,
+    dim="x",
+    method="mean",
+    velocity_method="mean",
+    trim=False,
 ) -> xr.Dataset:
     """Aggregate fields along a given direction.
 
@@ -201,7 +216,11 @@ def aggregate_fields(
         dims, instead of a single dim
     method: string, optional
         Optional argument that specifies the operation to use
-        to aggregate the values within the window.
+        to aggregate the precipitation values within the window.
+        Default to mean operator.
+    velocity_method: string, optional
+        Optional argument that specifies the operation to use
+        to aggregate the velocity values within the window.
         Default to mean operator.
     trim: bool
          In case that the ``data`` is not perfectly divisible by
@@ -254,9 +273,8 @@ def aggregate_fields(
                 f"dataset.sizes[dim]={dataset.sizes[d]}"
             )
 
-    # FIXME: The aggregation method is applied to all DataArrays in the Dataset
-    #        Fix to allow support for an aggregation method per DataArray
-    return (
+    dataset_ref = dataset
+    dataset = (
         dataset.rolling(dict(zip(dim, window_size)))
         .reduce(_aggregation_methods[method])
         .isel(
@@ -266,6 +284,50 @@ def aggregate_fields(
             }
         )
     )
+    if "velocity_x" in dataset_ref:
+        dataset["velocity_x"] = (
+            dataset_ref["velocity_x"]
+            .rolling(dict(zip(dim, window_size)))
+            .reduce(_aggregation_methods[velocity_method])
+            .isel(
+                {
+                    d: slice(
+                        ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws
+                    )
+                    for d, ws in zip(dim, window_size)
+                }
+            )
+        )
+    if "velocity_y" in dataset_ref:
+        dataset["velocity_y"] = (
+            dataset_ref["velocity_y"]
+            .rolling(dict(zip(dim, window_size)))
+            .reduce(_aggregation_methods[velocity_method])
+            .isel(
+                {
+                    d: slice(
+                        ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws
+                    )
+                    for d, ws in zip(dim, window_size)
+                }
+            )
+        )
+    if "quality" in dataset_ref:
+        dataset["quality"] = (
+            dataset_ref["quality"]
+            .rolling(dict(zip(dim, window_size)))
+            .reduce(_aggregation_methods["min"])
+            .isel(
+                {
+                    d: slice(
+                        ws - 1, dataset_ref.sizes[d] - dataset_ref.sizes[d] % ws, ws
+                    )
+                    for d, ws in zip(dim, window_size)
+                }
+            )
+        )
+
+    return dataset
 
 
 def clip_domain(dataset: xr.Dataset, extent=None):
@@ -298,8 +360,7 @@ def clip_domain(dataset: xr.Dataset, extent=None):
 def _pad_domain(
     dataset: xr.Dataset, dim_to_pad: str, idx_buffer: int, zerovalue: float
 ) -> xr.Dataset:
-    # assumes that frames are evenly spaced
-    delta = dataset[dim_to_pad].values[1] - dataset[dim_to_pad].values[0]
+    delta = dataset[dim_to_pad].attrs["stepsize"]
     end_values = (
         dataset[dim_to_pad].values[0] - delta * idx_buffer,
         dataset[dim_to_pad].values[-1] + delta * idx_buffer,
@@ -307,8 +368,6 @@ def _pad_domain(
 
     dataset_ref = dataset
 
-    # FIXME: The same zerovalue is used for all DataArrays in the Dataset
-    #        Fix to allow support for a zerovalue per DataArray
     dataset = dataset_ref.pad({dim_to_pad: idx_buffer}, constant_values=zerovalue)
     dataset[dim_to_pad] = dataset_ref[dim_to_pad].pad(
         {dim_to_pad: idx_buffer},
@@ -318,6 +377,24 @@ def _pad_domain(
     dataset.lat.data[:], dataset.lon.data[:] = compute_lat_lon(
         dataset.x.values, dataset.y.values, dataset.attrs["projection"]
     )
+    if "velocity_x" in dataset_ref:
+        dataset["velocity_x"].data = (
+            dataset_ref["velocity_x"]
+            .pad({dim_to_pad: idx_buffer}, constant_values=0.0)
+            .values
+        )
+    if "velocity_y" in dataset_ref:
+        dataset["velocity_y"].data = (
+            dataset_ref["velocity_y"]
+            .pad({dim_to_pad: idx_buffer}, constant_values=0.0)
+            .values
+        )
+    if "quality" in dataset_ref:
+        dataset["quality"].data = (
+            dataset_ref["quality"]
+            .pad({dim_to_pad: idx_buffer}, constant_values=0.0)
+            .values
+        )
     return dataset
 
 
@@ -332,14 +409,17 @@ def square_domain(dataset: xr.Dataset, method="pad", inverse=False):
         :py:mod:`pysteps.io.importers`.
     method: {'pad', 'crop'}, optional
         Either pad or crop.
-        If pad, an equal number of zeros is added to both ends of its shortest
-        side in order to produce a square domain.
+        If pad, an equal number of pixels
+        filled with the minimum value of the precipitation
+        field is added to both ends of the precipitation fields shortest
+        side in order to produce a square domain. The quality and velocity fields
+        are always padded with zeros.
         If crop, an equal number of pixels is removed
         to both ends of its longest side in order to produce a square domain.
         Note that the crop method involves an irreversible loss of data.
     inverse: bool, optional
         Perform the inverse method to recover the original domain shape.
-        After a crop, the inverse is performed by padding the field with zeros.
+        After a crop, the inverse is performed by doing a pad.
 
     Returns
     -------
diff --git a/pysteps/converters.py b/pysteps/xarray_helpers.py
similarity index 76%
rename from pysteps/converters.py
rename to pysteps/xarray_helpers.py
index 2825af612..a1049f32f 100644
--- a/pysteps/converters.py
+++ b/pysteps/xarray_helpers.py
@@ -3,7 +3,7 @@
 pysteps.converters
 ==================
 
-Module with data converter functions.
+Module with xarray helper functions.
 
 .. autosummary::
     :toctree: ../generated/
@@ -11,6 +11,8 @@
     convert_to_xarray_dataset
 """
 
+from datetime import datetime, timedelta
+
 import numpy as np
 import numpy.typing as npt
 import pyproj
@@ -77,7 +79,7 @@ def compute_lat_lon(
     return lat.reshape(x_2d.shape), lon.reshape(x_2d.shape)
 
 
-def convert_to_xarray_dataset(
+def convert_input_to_xarray_dataset(
     precip: np.ndarray,
     quality: np.ndarray | None,
     metadata: dict[str, str | float | None],
@@ -111,6 +113,21 @@ def convert_to_xarray_dataset(
     y_r = np.linspace(metadata["y1"], metadata["y2"], h + 1)[:-1]
     y_r += 0.5 * (y_r[1] - y_r[0])
 
+    if "xpixelsize" in metadata:
+        xpixelsize = metadata["xpixelsize"]
+    else:
+        xpixelsize = x_r[1] - x_r[0]
+
+    if "ypixelsize" in metadata:
+        ypixelsize = metadata["ypixelsize"]
+    else:
+        ypixelsize = y_r[1] - y_r[0]
+
+    if x_r[1] - x_r[0] != xpixelsize:
+        raise ValueError("xpixelsize does not match x1, x2 and array shape")
+    if y_r[1] - y_r[0] != ypixelsize:
+        raise ValueError("ypixelsize does not match y1, y2 and array shape")
+
     # flip yr vector if yorigin is upper
     if metadata["yorigin"] == "upper":
         y_r = np.flip(y_r)
@@ -160,6 +177,7 @@ def convert_to_xarray_dataset(
                 "long_name": "y-coordinate in Cartesian system",
                 "standard_name": "projection_y_coordinate",
                 "units": metadata["cartesian_unit"],
+                "stepsize": ypixelsize,
             },
         ),
         "x": (
@@ -170,6 +188,7 @@ def convert_to_xarray_dataset(
                 "long_name": "x-coordinate in Cartesian system",
                 "standard_name": "projection_x_coordinate",
                 "units": metadata["cartesian_unit"],
+                "stepsize": xpixelsize,
             },
         ),
         "lon": (
@@ -178,7 +197,6 @@ def convert_to_xarray_dataset(
             {
                 "long_name": "longitude coordinate",
                 "standard_name": "longitude",
-                # TODO(converters): Don't hard-code the unit.
                 "units": "degrees_east",
             },
         ),
@@ -188,7 +206,6 @@ def convert_to_xarray_dataset(
             {
                 "long_name": "latitude coordinate",
                 "standard_name": "latitude",
-                # TODO(converters): Don't hard-code the unit.
                 "units": "degrees_north",
             },
         ),
@@ -207,3 +224,45 @@ def convert_to_xarray_dataset(
     }
     dataset = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
     return dataset.sortby(["y", "x"])
+
+
+def convert_output_to_xarray_dataset(
+    dataset: xr.Dataset, timesteps: int | list[int], output: np.ndarray
+) -> xr.Dataset:
+    precip_var = dataset.attrs["precip_var"]
+    metadata = dataset[precip_var].attrs
+
+    last_timestamp = (
+        dataset["time"][-1].values.astype("datetime64[us]").astype(datetime)
+    )
+    time_metadata = dataset["time"].attrs
+    timestep_seconds = dataset["time"].attrs["stepsize"]
+    dataset = dataset.drop_vars([precip_var]).drop_dims(["time"])
+    if isinstance(timesteps, int):
+        timesteps = list(range(1, timesteps + 1))
+    next_timestamps = [
+        last_timestamp + timedelta(seconds=timestep_seconds * i) for i in timesteps
+    ]
+    dataset = dataset.assign_coords(
+        {"time": (["time"], next_timestamps, time_metadata)}
+    )
+
+    if output.ndim == 4:
+        dataset = dataset.assign_coords(
+            {
+                "ens_number": (
+                    ["ens_number"],
+                    list(range(1, output.shape[0] + 1)),
+                    {
+                        "long_name": "ensemble member",
+                        "standard_name": "realization",
+                        "units": "",
+                    },
+                )
+            }
+        )
+        dataset[precip_var] = (["ens_number", "time", "y", "x"], output, metadata)
+    else:
+        dataset[precip_var] = (["time", "y", "x"], output, metadata)
+
+    return dataset