[BugFix] Fix slice sampler end computation at the cursor place (#2225)

Vincent Moens · web-flow · commit 47a1005d2724 · 2024-06-12T10:04:20.000+01:00
diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
@@ -926,7 +926,9 @@ def __repr__(self):
         )
 
     @classmethod
-    def _find_start_stop_traj(cls, *, trajectory=None, end=None, at_capacity: bool):
+    def _find_start_stop_traj(
+        cls, *, trajectory=None, end=None, at_capacity: bool, cursor=None
+    ):
         if trajectory is not None:
             # slower
             # _, stop_idx = torch.unique_consecutive(trajectory, return_counts=True)
@@ -954,12 +956,28 @@ def _find_start_stop_traj(cls, *, trajectory=None, end=None, at_capacity: bool):
                 dim=0,
                 value=1,
             )
-        elif not end.any(0).all():
-            # we must have at least one end by traj to delimitate trajectories
+        else:
+            # we must have at least one end by traj to individuate trajectories
             # so if no end can be found we set it manually
-            mask = ~end.any(0, True)
-            mask = torch.cat([torch.zeros_like(end[:-1]), mask])
-            end = torch.masked_fill(mask, end, 1)
+            if cursor is not None:
+                if isinstance(cursor, torch.Tensor):
+                    cursor = cursor[-1].item()
+                elif isinstance(cursor, range):
+                    cursor = cursor[-1]
+                if not _is_int(cursor):
+                    raise RuntimeError(
+                        "cursor should be an integer or a 1d tensor or a range."
+                    )
+                end = torch.index_fill(
+                    end,
+                    index=torch.tensor(cursor, device=end.device, dtype=torch.long),
+                    dim=0,
+                    value=1,
+                )
+            if not end.any(0).all():
+                mask = ~end.any(0, True)
+                mask = torch.cat([torch.zeros_like(end[:-1]), mask])
+                end = torch.masked_fill(mask, end, 1)
         ndim = end.ndim
         if ndim == 0:
             raise RuntimeError(
@@ -994,7 +1012,7 @@ def _end_to_start_stop(end, length):
             # In this case we have only one start and stop has already been set
             pass
         lengths = stop_idx[:, 0] - start_idx[:, 0] + 1
-        lengths[lengths < 0] = lengths[lengths < 0] + length
+        lengths[lengths <= 0] = lengths[lengths <= 0] + length
         return start_idx, stop_idx, lengths
 
     def _start_to_end(self, st: torch.Tensor, length: int):
@@ -1072,7 +1090,9 @@ def _get_stop_and_length(self, storage, fallback=True):
                         "Could not get a tensordict out of the storage, which is required for SliceSampler to compute the trajectories."
                     )
                 vals = self._find_start_stop_traj(
-                    end=done.squeeze()[: len(storage)], at_capacity=storage._is_full
+                    end=done.squeeze()[: len(storage)],
+                    at_capacity=storage._is_full,
+                    cursor=getattr(storage, "_last_cursor", None),
                 )
                 if self.cache_values:
                     self._cache["stop-and-length"] = vals
@@ -1270,7 +1290,6 @@ def _get_index(
             ],
             1,
         )
-
         index = self._tensor_slices_from_startend(seq_length, starts, storage_length)
         if self.truncated_key is not None:
             truncated_key = self.truncated_key
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -2609,9 +2609,10 @@ def rollout(
             for key in self.done_keys:
                 if _ends_with(key, "truncated"):
                     val = out_td.get(("next", key))
+                    done = out_td.get(("next", _replace_last(key, "done")))
                     val[(slice(None),) * (out_td.ndim - 1) + (-1,)] = True
                     out_td.set(("next", key), val)
-                    out_td.set(("next", _replace_last(key, "done")), val)
+                    out_td.set(("next", _replace_last(key, "done")), val | done)
                     found_truncated = True
             if not found_truncated:
                 raise RuntimeError(
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -3048,7 +3048,7 @@ def unfold_done(done, N):
             reset_unfold_list = [torch.zeros_like(reset_unfold_slice)]
             for r in reversed(reset_unfold.unbind(-1)):
                 reset_unfold_list.append(r | reset_unfold_list[-1])
-                reset_unfold_slice = reset_unfold_list[-1]
+                # reset_unfold_slice = reset_unfold_list[-1]
             reset_unfold = torch.stack(list(reversed(reset_unfold_list))[1:], -1)
             reset = reset[prefix + (slice(self.N - 1, None),)]
             reset[prefix + (0,)] = 1