Merge branch 'master' into bugfix/metric_collection_and_aggregation

Lightning-AI · Jul 12, 2023 · 22b6f04 · 22b6f04
2 parents f70b28d + eba8ab8
commit 22b6f04
Show file tree

Hide file tree

Showing 13 changed files with 446 additions and 28 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Added source aggregated signal-to-distortion ratio (SA-SDR) metric ([#1882](https://github.com/Lightning-AI/torchmetrics/pull/1882)
+
+
 - Added `VisualInformationFidelity` to image package ([#1830](https://github.com/Lightning-AI/torchmetrics/pull/1830))
 
 
@@ -35,6 +38,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed bug related to empty predictions for `IntersectionOverUnion` metric ([#1892](https://github.com/Lightning-AI/torchmetrics/pull/1892))
 
 
+- Fixed bug related to `MeanMetric` and broadcasting of weights when Nans are present ([#1898](https://github.com/Lightning-AI/torchmetrics/pull/1898))
+
+
 ## [1.0.0] - 2022-07-04
 
 ### Added

diff --git a/docs/source/audio/source_aggregated_signal_distortion_ratio.rst b/docs/source/audio/source_aggregated_signal_distortion_ratio.rst
@@ -0,0 +1,23 @@
+.. customcarditem::
+   :header: Source Aggregated Signal-to-Distortion Ratio (SA-SDR)
+   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/audio_classification.svg
+   :tags: Audio
+
+.. include:: ../links.rst
+
+#####################################################
+Source Aggregated Signal-to-Distortion Ratio (SA-SDR)
+#####################################################
+
+Module Interface
+________________
+
+.. autoclass:: torchmetrics.audio.sdr.SourceAggregatedSignalDistortionRatio
+    :noindex:
+    :exclude-members: update, compute
+
+Functional Interface
+____________________
+
+.. autofunction:: torchmetrics.functional.audio.sdr.source_aggregated_signal_distortion_ratio
+    :noindex:
diff --git a/docs/source/links.rst b/docs/source/links.rst
@@ -117,6 +117,7 @@
 .. _sdr ref2: https://arxiv.org/abs/2110.06440
 .. _Scale-invariant signal-to-distortion ratio: https://arxiv.org/abs/1811.02508
 .. _Scale-invariant signal-to-noise ratio: https://arxiv.org/abs/1711.00541
+.. _Source-aggregated signal-to-distortion ratio: https://arxiv.org/abs/2110.15581
 .. _Complex scale-invariant signal-to-noise ratio: https://arxiv.org/abs/2011.09162
 .. _Signal-to-noise ratio: https://arxiv.org/abs/1811.02508
 .. _Speech-to-Reverberation Modulation Energy Ratio: https://ieeexplore.ieee.org/document/5547575

diff --git a/src/torchmetrics/aggregation.py b/src/torchmetrics/aggregation.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable, List, Optional, Sequence, Union
+from typing import Any, Callable, List, Optional, Sequence, Tuple, Union
 
 import torch
 from torch import Tensor
@@ -71,25 +71,36 @@ def __init__(
         self.add_state(state_name, default=default_value, dist_reduce_fx=fn)
         self.state_name = state_name
 
-    def _cast_and_nan_check_input(self, x: Union[float, Tensor]) -> Tensor:
+    def _cast_and_nan_check_input(
+        self, x: Union[float, Tensor], weight: Optional[Union[float, Tensor]] = None
+    ) -> Tuple[Tensor, Tensor]:
         """Convert input ``x`` to a tensor and check for Nans."""
         if not isinstance(x, Tensor):
             x = torch.as_tensor(x, dtype=torch.float32, device=self.device)
+        if weight is not None and not isinstance(weight, Tensor):
+            weight = torch.as_tensor(weight, dtype=torch.float32, device=self.device)
 
         nans = torch.isnan(x)
-        if nans.any():
+        if weight is not None:
+            nans_weight = torch.isnan(weight)
+        else:
+            nans_weight = torch.zeros_like(nans).bool()
+            weight = torch.ones_like(x)
+        if nans.any() or nans_weight.any():
             if self.nan_strategy == "error":
                 raise RuntimeError("Encounted `nan` values in tensor")
             if self.nan_strategy in ("ignore", "warn"):
                 if self.nan_strategy == "warn":
                     rank_zero_warn("Encounted `nan` values in tensor. Will be removed.", UserWarning)
-                x = x[~nans]
+                x = x[~(nans | nans_weight)]
+                weight = weight[~(nans | nans_weight)]
             else:
                 if not isinstance(self.nan_strategy, float):
                     raise ValueError(f"`nan_strategy` shall be float but you pass {self.nan_strategy}")
-                x[nans] = self.nan_strategy
+                x[nans | nans_weight] = self.nan_strategy
+                weight[nans | nans_weight] = self.nan_strategy
 
-        return x.float()
+        return x.float(), weight.float()
 
     def update(self, value: Union[float, Tensor]) -> None:
         """Overwrite in child class."""
@@ -157,7 +168,7 @@ def update(self, value: Union[float, Tensor]) -> None:
             value: Either a float or tensor containing data. Additional tensor
                 dimensions will be flattened
         """
-        value = self._cast_and_nan_check_input(value)
+        value, _ = self._cast_and_nan_check_input(value)
         if value.numel():  # make sure tensor not empty
             self.max_value = torch.max(self.max_value, torch.max(value))
 
@@ -259,7 +270,7 @@ def update(self, value: Union[float, Tensor]) -> None:
             value: Either a float or tensor containing data. Additional tensor
                 dimensions will be flattened
         """
-        value = self._cast_and_nan_check_input(value)
+        value, _ = self._cast_and_nan_check_input(value)
         if value.numel():  # make sure tensor not empty
             self.min_value = torch.min(self.min_value, torch.min(value))
 
@@ -360,7 +371,7 @@ def update(self, value: Union[float, Tensor]) -> None:
             value: Either a float or tensor containing data. Additional tensor
                 dimensions will be flattened
         """
-        value = self._cast_and_nan_check_input(value)
+        value, _ = self._cast_and_nan_check_input(value)
         if value.numel():
             self.sum_value += value.sum()
 
@@ -456,7 +467,7 @@ def update(self, value: Union[float, Tensor]) -> None:
             value: Either a float or tensor containing data. Additional tensor
                 dimensions will be flattened
         """
-        value = self._cast_and_nan_check_input(value)
+        value, _ = self._cast_and_nan_check_input(value)
         if value.numel():
             self.value.append(value)
 
@@ -530,13 +541,16 @@ def update(self, value: Union[float, Tensor], weight: Union[float, Tensor] = 1.0
                 the shape of `value`. Default to `1.0` corresponding to simple
                 harmonic average.
         """
-        value = self._cast_and_nan_check_input(value)
-        weight = self._cast_and_nan_check_input(weight)
+        # broadcast weight to value shape
+        if not isinstance(value, Tensor):
+            value = torch.as_tensor(value, dtype=torch.float32, device=self.device)
+        if weight is not None and not isinstance(weight, Tensor):
+            weight = torch.as_tensor(weight, dtype=torch.float32, device=self.device)
+        weight = torch.broadcast_to(weight, value.shape)
+        value, weight = self._cast_and_nan_check_input(value, weight)
 
         if value.numel() == 0:
             return
-        # broadcast weight to value shape
-        weight = torch.broadcast_to(weight, value.shape)
         self.mean_value += (value * weight).sum()
         self.weight += weight.sum()
 

diff --git a/src/torchmetrics/audio/__init__.py b/src/torchmetrics/audio/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from torchmetrics.audio.pit import PermutationInvariantTraining
-from torchmetrics.audio.sdr import ScaleInvariantSignalDistortionRatio, SignalDistortionRatio
+from torchmetrics.audio.sdr import (
+    ScaleInvariantSignalDistortionRatio,
+    SignalDistortionRatio,
+    SourceAggregatedSignalDistortionRatio,
+)
 from torchmetrics.audio.snr import (
     ComplexScaleInvariantSignalNoiseRatio,
     ScaleInvariantSignalNoiseRatio,
@@ -30,6 +34,7 @@
     "PermutationInvariantTraining",
     "ScaleInvariantSignalDistortionRatio",
     "SignalDistortionRatio",
+    "SourceAggregatedSignalDistortionRatio",
     "ScaleInvariantSignalNoiseRatio",
     "SignalNoiseRatio",
     "ComplexScaleInvariantSignalNoiseRatio",

diff --git a/src/torchmetrics/audio/sdr.py b/src/torchmetrics/audio/sdr.py
@@ -15,15 +15,23 @@
 
 from torch import Tensor, tensor
 
-from torchmetrics.functional.audio.sdr import scale_invariant_signal_distortion_ratio, signal_distortion_ratio
+from torchmetrics.functional.audio.sdr import (
+    scale_invariant_signal_distortion_ratio,
+    signal_distortion_ratio,
+    source_aggregated_signal_distortion_ratio,
+)
 from torchmetrics.metric import Metric
 from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE
 from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
 
 __doctest_requires__ = {"SignalDistortionRatio": ["fast_bss_eval"]}
 
 if not _MATPLOTLIB_AVAILABLE:
-    __doctest_skip__ = ["SignalDistortionRatio.plot", "ScaleInvariantSignalDistortionRatio.plot"]
+    __doctest_skip__ = [
+        "SignalDistortionRatio.plot",
+        "ScaleInvariantSignalDistortionRatio.plot",
+        "SourceAggregatedSignalDistortionRatio.plot",
+    ]
 
 
 class SignalDistortionRatio(Metric):
@@ -265,3 +273,122 @@ def plot(
             >>> fig_, ax_ = metric.plot(values)
         """
         return self._plot(val, ax)
+
+
+class SourceAggregatedSignalDistortionRatio(Metric):
+    r"""`Source-aggregated signal-to-distortion ratio`_ (SA-SDR).
+
+    The SA-SDR is proposed to provide a stable gradient for meeting style source separation, where
+    one-speaker and multiple-speaker scenes coexist.
+
+    As input to ``forward`` and ``update`` the metric accepts the following input
+
+    - ``preds`` (:class:`~torch.Tensor`): float tensor with shape ``(..., spk, time)``
+    - ``target`` (:class:`~torch.Tensor`): float tensor with shape ``(..., spk, time)``
+
+    As output of `forward` and `compute` the metric returns the following output
+
+    - ``sa_sdr`` (:class:`~torch.Tensor`): float scalar tensor with average SA-SDR value over samples
+
+    Args:
+        preds: float tensor with shape ``(..., spk, time)``
+        target: float tensor with shape ``(..., spk, time)``
+        scale_invariant: if True, scale the targets of different speakers with the same alpha
+        zero_mean: If to zero mean target and preds or not
+        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
+
+    Example:
+        >>> import torch
+        >>> from torchmetrics.audio import SourceAggregatedSignalDistortionRatio
+        >>> g = torch.manual_seed(1)
+        >>> preds = torch.randn(2, 8000) # [..., spk, time]
+        >>> target = torch.randn(2, 8000)
+        >>> sasdr = SourceAggregatedSignalDistortionRatio()
+        >>> sasdr(preds, target)
+        tensor(-41.6579)
+        >>> # use with pit
+        >>> from torchmetrics.audio import PermutationInvariantTraining
+        >>> from torchmetrics.functional.audio import source_aggregated_signal_distortion_ratio
+        >>> preds = torch.randn(4, 2, 8000)  # [batch, spk, time]
+        >>> target = torch.randn(4, 2, 8000)
+        >>> pit = PermutationInvariantTraining(source_aggregated_signal_distortion_ratio,
+        ...     mode="permutation-wise", eval_func="max")
+        >>> pit(preds, target)
+        tensor(-41.2790)
+    """
+
+    msum: Tensor
+    mnum: Tensor
+    full_state_update: bool = False
+    is_differentiable: bool = True
+    higher_is_better: bool = True
+    plot_lower_bound: Optional[float] = None
+    plot_upper_bound: Optional[float] = None
+
+    def __init__(
+        self,
+        scale_invariant: bool = True,
+        zero_mean: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+
+        if not isinstance(scale_invariant, bool):
+            raise ValueError(f"Expected argument `scale_invarint` to be a bool, but got {scale_invariant}")
+        self.scale_invariant = scale_invariant
+        if not isinstance(zero_mean, bool):
+            raise ValueError(f"Expected argument `zero_mean` to be a bool, but got {zero_mean}")
+        self.zero_mean = zero_mean
+
+        self.add_state("msum", default=tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("mnum", default=tensor(0), dist_reduce_fx="sum")
+
+    def update(self, preds: Tensor, target: Tensor) -> None:
+        """Update state with predictions and targets."""
+        mbatch = source_aggregated_signal_distortion_ratio(preds, target, self.scale_invariant, self.zero_mean)
+
+        self.msum += mbatch.sum()
+        self.mnum += mbatch.numel()
+
+    def compute(self) -> Tensor:
+        """Compute metric."""
+        return self.msum / self.mnum
+
+    def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE:
+        """Plot a single or multiple values from the metric.
+
+        Args:
+            val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
+                If no value is provided, will automatically call `metric.compute` and plot that result.
+            ax: An matplotlib axis object. If provided will add plot to that axis
+
+        Returns:
+            Figure and Axes object
+
+        Raises:
+            ModuleNotFoundError:
+                If `matplotlib` is not installed
+
+        .. plot::
+            :scale: 75
+
+            >>> # Example plotting a single value
+            >>> import torch
+            >>> from torchmetrics.audio import SourceAggregatedSignalDistortionRatio
+            >>> metric = SourceAggregatedSignalDistortionRatio()
+            >>> metric.update(torch.rand(2,8000), torch.rand(2,8000))
+            >>> fig_, ax_ = metric.plot()
+
+        .. plot::
+            :scale: 75
+
+            >>> # Example plotting multiple values
+            >>> import torch
+            >>> from torchmetrics.audio import SourceAggregatedSignalDistortionRatio
+            >>> metric = SourceAggregatedSignalDistortionRatio()
+            >>> values = [ ]
+            >>> for _ in range(10):
+            ...     values.append(metric(torch.rand(2,8000), torch.rand(2,8000)))
+            >>> fig_, ax_ = metric.plot(values)
+        """
+        return self._plot(val, ax)
diff --git a/src/torchmetrics/classification/group_fairness.py b/src/torchmetrics/classification/group_fairness.py
@@ -291,23 +291,25 @@ def plot(
         .. plot::
             :scale: 75
 
-            >>> from torch import rand, randint
+            >>> import torch
+            >>> _ = torch.manual_seed(42)
             >>> # Example plotting a single value
             >>> from torchmetrics.classification import BinaryFairness
             >>> metric = BinaryFairness(2)
-            >>> metric.update(rand(20), randint(2,(20,)), randint(2,(20,)))
+            >>> metric.update(torch.rand(20), torch.randint(2,(20,)), torch.randint(2,(20,)))
             >>> fig_, ax_ = metric.plot()
 
         .. plot::
             :scale: 75
 
-            >>> from torch import rand, randint, ones
+            >>> import torch
+            >>> _ = torch.manual_seed(42)
             >>> # Example plotting multiple values
             >>> from torchmetrics.classification import BinaryFairness
             >>> metric = BinaryFairness(2)
             >>> values = [ ]
             >>> for _ in range(10):
-            ...     values.append(metric(rand(20), randint(2,(20,)), ones(20).long()))
+            ...     values.append(metric(torch.rand(20), torch.randint(2,(20,)), torch.ones(20).long()))
             >>> fig_, ax_ = metric.plot(values)
         """
         return self._plot(val, ax)
diff --git a/src/torchmetrics/functional/audio/__init__.py b/src/torchmetrics/functional/audio/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from torchmetrics.functional.audio.pit import permutation_invariant_training, pit_permutate
-from torchmetrics.functional.audio.sdr import scale_invariant_signal_distortion_ratio, signal_distortion_ratio
+from torchmetrics.functional.audio.sdr import (
+    scale_invariant_signal_distortion_ratio,
+    signal_distortion_ratio,
+    source_aggregated_signal_distortion_ratio,
+)
 from torchmetrics.functional.audio.snr import (
     complex_scale_invariant_signal_noise_ratio,
     scale_invariant_signal_noise_ratio,
@@ -30,6 +34,7 @@
     "permutation_invariant_training",
     "pit_permutate",
     "scale_invariant_signal_distortion_ratio",
+    "source_aggregated_signal_distortion_ratio",
     "signal_distortion_ratio",
     "scale_invariant_signal_noise_ratio",
     "signal_noise_ratio",