[BugFix] Fix exploration in losses (#1898)

Vincent Moens · web-flow · commit 1bd5ec640a4a · 2024-02-11T17:04:42.000Z
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -25,6 +25,7 @@
     TensorDictSequential,
     TensorDictSequential as Seq,
 )
+from torchrl.envs.utils import exploration_type, ExplorationType, set_exploration_type
 
 from torchrl.modules.models import QMixer
 
@@ -12391,6 +12392,22 @@ def __init__(self):
                 assert p.device == dest
 
 
+def test_loss_exploration():
+    class DummyLoss(LossModule):
+        def forward(self, td):
+            assert exploration_type() == InteractionType.MODE
+            with set_exploration_type(ExplorationType.RANDOM):
+                assert exploration_type() == ExplorationType.RANDOM
+            assert exploration_type() == ExplorationType.MODE
+            return td
+
+    loss_fn = DummyLoss()
+    with set_exploration_type(ExplorationType.RANDOM):
+        assert exploration_type() == ExplorationType.RANDOM
+        loss_fn(None)
+        assert exploration_type() == ExplorationType.RANDOM
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/test/test_exploration.py b/test/test_exploration.py
@@ -54,6 +54,7 @@
 class TestEGreedy:
     @pytest.mark.parametrize("eps_init", [0.0, 0.5, 1])
     @pytest.mark.parametrize("module", [True, False])
+    @set_exploration_type(InteractionType.RANDOM)
     def test_egreedy(self, eps_init, module):
         torch.manual_seed(0)
         spec = BoundedTensorSpec(1, 1, torch.Size([4]))
diff --git a/torchrl/objectives/common.py b/torchrl/objectives/common.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import abc
 import warnings
 from copy import deepcopy
 from dataclasses import dataclass
@@ -31,7 +32,13 @@ def _updater_check_forward_prehook(module, *args, **kwargs):
         )
 
 
-class LossModule(TensorDictModuleBase):
+class _LossMeta(abc.ABCMeta):
+    def __init__(cls, name, bases, attr_dict):
+        super().__init__(name, bases, attr_dict)
+        cls.forward = set_exploration_type(ExplorationType.MODE)(cls.forward)
+
+
+class LossModule(TensorDictModuleBase, metaclass=_LossMeta):
     """A parent class for RL losses.
 
     LossModule inherits from nn.Module. It is designed to read an input
@@ -109,16 +116,6 @@ def __init__(self):
         self.value_type = self.default_value_estimator
         self._tensor_keys = self._AcceptedKeys()
         self.register_forward_pre_hook(_updater_check_forward_prehook)
-        expl_mode = set_exploration_type(ExplorationType.MODE)
-
-        def _pre_hook(*args, expl_mode=expl_mode, **kwargs):
-            expl_mode.__enter__()
-
-        def _post_hook(*args, expl_mode=expl_mode, **kwargs):
-            expl_mode.__exit__(exc_type=None, exc_value=None, traceback=None)
-
-        self.register_forward_pre_hook(_pre_hook)
-        self.register_forward_hook(_post_hook)
 
     def _set_deprecated_ctor_keys(self, **kwargs) -> None:
         for key, value in kwargs.items():