From c35a2fbb81120c1143f67e768fafcef3e44dbb09 Mon Sep 17 00:00:00 2001
From: Nicki Skafte Detlefsen <skaftenicki@gmail.com>
Date: Sat, 25 Nov 2023 17:16:20 +0100
Subject: [PATCH] Fix support for half precision in Perplexity metric (#2235)

---
 CHANGELOG.md                                  |  5 +-
 .../functional/text/perplexity.py             | 11 +--
 tests/unittests/bases/test_collections.py     | 76 +++++++++----------
 tests/unittests/text/test_perplexity.py       | 23 +++++-
 4 files changed, 67 insertions(+), 48 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b0c7bfe1ade..e640fe410e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,7 +57,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed numerical stability issue in `UniversalImageQualityIndex` metric ([#2222](https://github.com/Lightning-AI/torchmetrics/pull/2222))
 
 
--  Fix device and dtype for `LearnedPerceptualImagePatchSimilarity` functional metric ([#2234](https://github.com/Lightning-AI/torchmetrics/pull/2234))
+- Fixed support for half precision in Perplexity metric ([#2235](https://github.com/Lightning-AI/torchmetrics/pull/2235))
+
+
+- Fixed device and dtype for `LearnedPerceptualImagePatchSimilarity` functional metric ([#2234](https://github.com/Lightning-AI/torchmetrics/pull/2234))
 
 
 ## [1.2.0] - 2023-09-22
diff --git a/src/torchmetrics/functional/text/perplexity.py b/src/torchmetrics/functional/text/perplexity.py
index 127d3c74a67..cb0bafd5082 100644
--- a/src/torchmetrics/functional/text/perplexity.py
+++ b/src/torchmetrics/functional/text/perplexity.py
@@ -16,9 +16,6 @@
 
 import torch
 from torch import Tensor
-from torch.nn import functional as F  # noqa: N812
-
-_TORCH_FLOAT_OR_DOUBLE = (torch.float32, torch.float64)
 
 
 def _check_shape_and_type_consistency(preds: Tensor, target: Tensor) -> None:
@@ -59,10 +56,8 @@ def _check_shape_and_type_consistency(preds: Tensor, target: Tensor) -> None:
             "Input tensors `preds` and `target` are expected to have equaling first two dimensions,"
             f" [batch_size, seq_len], but got {preds.shape[:2]} and {target.shape}."
         )
-    if preds.dtype not in _TORCH_FLOAT_OR_DOUBLE:
-        raise TypeError(
-            f"Input tensor `preds` is expected to be of a type one of {_TORCH_FLOAT_OR_DOUBLE} but got {preds.dtype}."
-        )
+    if not preds.is_floating_point():
+        raise TypeError(f"Input tensor `preds` is expected to be of floating point type but got {preds.dtype}.")
     if target.dtype != torch.int64:
         raise TypeError(f"Input tensor `target` is expected to be of a type {torch.int64} but got {target.dtype}.")
 
@@ -87,7 +82,7 @@ def _perplexity_update(preds: Tensor, target: Tensor, ignore_index: Optional[int
     """
     _check_shape_and_type_consistency(preds, target)
 
-    probs = F.softmax(preds.reshape(-1, preds.shape[-1]), dim=1)
+    probs = torch.nn.functional.softmax(preds.reshape(-1, preds.shape[-1]), dim=1)
     target = target.reshape(-1)
 
     if ignore_index is not None:
diff --git a/tests/unittests/bases/test_collections.py b/tests/unittests/bases/test_collections.py
index 6f3e64d1b6c..ce4bb1ba8c8 100644
--- a/tests/unittests/bases/test_collections.py
+++ b/tests/unittests/bases/test_collections.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import pickle
-import time
 from copy import deepcopy
 from typing import Any
 
@@ -480,43 +479,44 @@ def _compare(m1, m2):
                     _compare(metric_cg, metric_no_cg)
 
 
-@pytest.mark.parametrize(
-    "metrics",
-    [
-        {"acc0": MulticlassAccuracy(3), "acc1": MulticlassAccuracy(3)},
-        [MulticlassPrecision(3), MulticlassRecall(3)],
-        [MulticlassConfusionMatrix(3), MulticlassCohenKappa(3), MulticlassRecall(3), MulticlassPrecision(3)],
-        {
-            "acc": MulticlassAccuracy(3),
-            "acc2": MulticlassAccuracy(3),
-            "acc3": MulticlassAccuracy(num_classes=3, average="macro"),
-            "f1": MulticlassF1Score(3),
-            "recall": MulticlassRecall(3),
-            "confmat": MulticlassConfusionMatrix(3),
-        },
-    ],
-)
-@pytest.mark.parametrize("steps", [1000])
-def test_check_compute_groups_is_faster(metrics, steps):
-    """Check that compute groups are formed after initialization."""
-    m = MetricCollection(deepcopy(metrics), compute_groups=True)
-    # Construct without for comparison
-    m2 = MetricCollection(deepcopy(metrics), compute_groups=False)
-
-    preds = torch.randn(10, 3).softmax(dim=-1)
-    target = torch.randint(3, (10,))
-
-    start = time.time()
-    for _ in range(steps):
-        m.update(preds, target)
-    time_cg = time.time() - start
-
-    start = time.time()
-    for _ in range(steps):
-        m2.update(preds, target)
-    time_no_cg = time.time() - start
-
-    assert time_cg < time_no_cg, "using compute groups were not faster"
+# TODO: test is flaky
+# @pytest.mark.parametrize(
+#     "metrics",
+#     [
+#         {"acc0": MulticlassAccuracy(3), "acc1": MulticlassAccuracy(3)},
+#         [MulticlassPrecision(3), MulticlassRecall(3)],
+#         [MulticlassConfusionMatrix(3), MulticlassCohenKappa(3), MulticlassRecall(3), MulticlassPrecision(3)],
+#         {
+#             "acc": MulticlassAccuracy(3),
+#             "acc2": MulticlassAccuracy(3),
+#             "acc3": MulticlassAccuracy(num_classes=3, average="macro"),
+#             "f1": MulticlassF1Score(3),
+#             "recall": MulticlassRecall(3),
+#             "confmat": MulticlassConfusionMatrix(3),
+#         },
+#     ],
+# )
+# @pytest.mark.parametrize("steps", [1000])
+# def test_check_compute_groups_is_faster(metrics, steps):
+#     """Check that compute groups are formed after initialization."""
+#     m = MetricCollection(deepcopy(metrics), compute_groups=True)
+#     # Construct without for comparison
+#     m2 = MetricCollection(deepcopy(metrics), compute_groups=False)
+
+#     preds = torch.randn(10, 3).softmax(dim=-1)
+#     target = torch.randint(3, (10,))
+
+#     start = time.time()
+#     for _ in range(steps):
+#         m.update(preds, target)
+#     time_cg = time.time() - start
+
+#     start = time.time()
+#     for _ in range(steps):
+#         m2.update(preds, target)
+#     time_no_cg = time.time() - start
+
+#     assert time_cg < time_no_cg, "using compute groups were not faster"
 
 
 def test_compute_group_define_by_user():
diff --git a/tests/unittests/text/test_perplexity.py b/tests/unittests/text/test_perplexity.py
index b79f33391df..658c6eee878 100644
--- a/tests/unittests/text/test_perplexity.py
+++ b/tests/unittests/text/test_perplexity.py
@@ -71,7 +71,7 @@ def test_perplexity_fn(self, preds, target, ignore_index):
             metric_args={"ignore_index": ignore_index},
         )
 
-    def test_accuracy_differentiability(self, preds, target, ignore_index):
+    def test_perplexity_differentiability(self, preds, target, ignore_index):
         """Test the differentiability of the metric, according to its `is_differentiable` attribute."""
         self.run_differentiability_test(
             preds=preds,
@@ -80,3 +80,24 @@ def test_accuracy_differentiability(self, preds, target, ignore_index):
             metric_functional=perplexity,
             metric_args={"ignore_index": ignore_index},
         )
+
+    @pytest.mark.parametrize("dtype", [torch.half, torch.double])
+    def test_perplexity_dtypes_cpu(self, preds, target, ignore_index, dtype):
+        """Test dtype support of the metric on CPU."""
+        if dtype == torch.half:
+            with pytest.raises(RuntimeError, match="\"softmax_lastdim_kernel_impl\" not implemented for 'Half'"):
+                self.run_precision_test_cpu(
+                    preds, target, Perplexity, perplexity, metric_args={"ignore_index": ignore_index}, dtype=dtype
+                )
+        else:
+            self.run_precision_test_cpu(
+                preds, target, Perplexity, perplexity, metric_args={"ignore_index": ignore_index}, dtype=dtype
+            )
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires cuda")
+    @pytest.mark.parametrize("dtype", [torch.half, torch.double])
+    def test_perplexity_dtypes_gpu(self, preds, target, ignore_index, dtype):
+        """Test dtype support of the metric on GPU."""
+        self.run_precision_test_gpu(
+            preds, target, Perplexity, perplexity, metric_args={"ignore_index": ignore_index}, dtype=dtype
+        )