From 04e50e945809a914563c6c391b86ae939b838a9a Mon Sep 17 00:00:00 2001
From: Joseph Viviano <joseph@viviano.ca>
Date: Tue, 2 Apr 2024 17:38:55 -0400
Subject: [PATCH] reworking of prioritized replay buffer logic

---
 src/gfn/containers/replay_buffer.py | 65 +++++++++++++++--------------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/src/gfn/containers/replay_buffer.py b/src/gfn/containers/replay_buffer.py
index fe1b111..2cf9fcc 100644
--- a/src/gfn/containers/replay_buffer.py
+++ b/src/gfn/containers/replay_buffer.py
@@ -132,7 +132,9 @@ def __init__(
             capacity: the size of the buffer.
             objects_type: the type of buffer (transitions, trajectories, or states).
             cutoff_distance: threshold used to determine if new last_states are
-                different enough from those already contained in the buffer.
+                different enough from those already contained in the buffer. If the
+                cutoff is negative, all diversity caclulations are skipped (since all
+                norms are >= 0).
             p_norm_distance: p-norm distance value to pass to torch.cdist, for the
                 determination of novel states.
     """
@@ -195,40 +197,41 @@ def add(self, training_objects: Transitions | Trajectories | tuple[States]):
             #         dim=-1,
             #     )
 
-            # Filter the batch for diverse final_states with high reward.
-            batch = training_objects.last_states.tensor.float()
-            batch_dim = training_objects.last_states.batch_shape[0]
-            batch_batch_dist = torch.cdist(
-                batch.view(batch_dim, -1).unsqueeze(0),
-                batch.view(batch_dim, -1).unsqueeze(0),
-                p=self.p_norm_distance,
-            ).squeeze(0)
-
-            # Finds the min distance at each row, and removes rows below the cutoff.
-            r, w = torch.triu_indices(*batch_batch_dist.shape)  # Remove upper diag.
-            batch_batch_dist[r, w] = torch.finfo(batch_batch_dist.dtype).max
-            batch_batch_dist = batch_batch_dist.min(-1)[0]
-            idx_batch_batch = batch_batch_dist > self.cutoff_distance
-            training_objects = training_objects[idx_batch_batch]
-
-            # Compute all pairwise distances between the remaining batch and the buffer.
-            batch = training_objects.last_states.tensor.float()
-            buffer = self.training_objects.last_states.tensor.float()
-            batch_dim = training_objects.last_states.batch_shape[0]
-            buffer_dim = self.training_objects.last_states.batch_shape[0]
-            batch_buffer_dist = (
-                torch.cdist(
+            if self.cutoff_distance >= 0:
+                # Filter the batch for diverse final_states with high reward.
+                batch = training_objects.last_states.tensor.float()
+                batch_dim = training_objects.last_states.batch_shape[0]
+                batch_batch_dist = torch.cdist(
+                    batch.view(batch_dim, -1).unsqueeze(0),
                     batch.view(batch_dim, -1).unsqueeze(0),
-                    buffer.view(buffer_dim, -1).unsqueeze(0),
                     p=self.p_norm_distance,
+                ).squeeze(0)
+
+                # Finds the min distance at each row, and removes rows below the cutoff.
+                r, w = torch.triu_indices(*batch_batch_dist.shape)  # Remove upper diag.
+                batch_batch_dist[r, w] = torch.finfo(batch_batch_dist.dtype).max
+                batch_batch_dist = batch_batch_dist.min(-1)[0]
+                idx_batch_batch = batch_batch_dist > self.cutoff_distance
+                training_objects = training_objects[idx_batch_batch]
+
+                # Compute all pairwise distances between the remaining batch & buffer.
+                batch = training_objects.last_states.tensor.float()
+                buffer = self.training_objects.last_states.tensor.float()
+                batch_dim = training_objects.last_states.batch_shape[0]
+                buffer_dim = self.training_objects.last_states.batch_shape[0]
+                batch_buffer_dist = (
+                    torch.cdist(
+                        batch.view(batch_dim, -1).unsqueeze(0),
+                        buffer.view(buffer_dim, -1).unsqueeze(0),
+                        p=self.p_norm_distance,
+                    )
+                    .squeeze(0)
+                    .min(-1)[0]  # Min calculated over rows - the batch elements.
                 )
-                .squeeze(0)
-                .min(-1)[0]  # Min calculated over rows, i.e., over the batch elements.
-            )
 
-            # Filter the batch for diverse final_states w.r.t the buffer.
-            idx_batch_buffer = batch_buffer_dist > self.cutoff_distance
-            training_objects = training_objects[idx_batch_buffer]
+                # Filter the batch for diverse final_states w.r.t the buffer.
+                idx_batch_buffer = batch_buffer_dist > self.cutoff_distance
+                training_objects = training_objects[idx_batch_buffer]
 
             # If any training object remain after filtering, add them.
             if len(training_objects):