pytorch · vmoens · Jun 21, 2024 · Jun 21, 2024
diff --git a/test/test_rb.py b/test/test_rb.py
@@ -2629,12 +2629,21 @@ def test_prb_update_max_priority(self, max_priority_within_buffer):
         for data in torch.arange(20):
             idx = rb.add(data)
             rb.update_priority(idx, 21 - data)
-            if data <= 10 or not max_priority_within_buffer:
+            if data <= 10:
+                # The max is always going to be the first value
                 assert rb._sampler._max_priority[0] == 21
                 assert rb._sampler._max_priority[1] == 0
-            else:
-                assert rb._sampler._max_priority[0] == 10
+            elif not max_priority_within_buffer:
+                # The max is the historical max, which was at idx 0
+                assert rb._sampler._max_priority[0] == 21
                 assert rb._sampler._max_priority[1] == 0
+            else:
+                # the max is the current max. Find it and compare
+                sumtree = torch.as_tensor(
+                    [rb._sampler._sum_tree[i] for i in range(rb._sampler._max_capacity)]
+                )
+                assert rb._sampler._max_priority[0] == sumtree.max()
+                assert rb._sampler._max_priority[1] == sumtree.argmax()
         idx = rb.extend(torch.arange(10))
         rb.update_priority(idx, 12)
         if max_priority_within_buffer:

diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
@@ -575,12 +575,24 @@ def update_priority(
                     priority = priority[valid_index]
 
         max_p, max_p_idx = priority.max(dim=0)
-        max_priority = self._max_priority[0]
-        if max_priority is None or max_p > max_priority:
-            self._max_priority = (max_p, max_p_idx)
+        cur_max_priority, cur_max_priority_index = self._max_priority
+        if cur_max_priority is None or max_p > cur_max_priority:
+            cur_max_priority, cur_max_priority_index = self._max_priority = (
+                max_p,
+                index[max_p_idx] if index.ndim else index,
+            )
         priority = torch.pow(priority + self._eps, self._alpha)
         self._sum_tree[index] = priority
         self._min_tree[index] = priority
+        if (
+            self._max_priority_within_buffer
+            and cur_max_priority_index is not None
+            and (index == cur_max_priority_index).any()
+        ):
+            maxval, maxidx = torch.tensor(
+                [self._sum_tree[i] for i in range(self._max_capacity)]
+            ).max(0)
+            self._max_priority = (maxval, maxidx)
 
     def mark_update(
         self, index: Union[int, torch.Tensor], *, storage: Storage | None = None